table_sync 6.0.4 → 6.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,8 +4,11 @@ module TableSync::Publishing::Message
4
4
  class Raw
5
5
  include Tainbox
6
6
 
7
- attribute :object_class
7
+ attribute :model_name
8
+ attribute :table_name
9
+ attribute :schema_name
8
10
  attribute :original_attributes
11
+
9
12
  attribute :routing_key
10
13
  attribute :headers
11
14
 
@@ -21,18 +24,14 @@ module TableSync::Publishing::Message
21
24
 
22
25
  def notify!
23
26
  TableSync::Instrument.notify(
24
- table: model_naming.table,
25
- schema: model_naming.schema,
27
+ table: table_name,
28
+ schema: schema_name,
26
29
  event: event,
27
30
  count: original_attributes.count,
28
31
  direction: :publish,
29
32
  )
30
33
  end
31
34
 
32
- def model_naming
33
- TableSync.publishing_adapter.model_naming(object_class.constantize)
34
- end
35
-
36
35
  # MESSAGE PARAMS
37
36
 
38
37
  def message_params
@@ -41,13 +40,13 @@ module TableSync::Publishing::Message
41
40
 
42
41
  def data
43
42
  TableSync::Publishing::Data::Raw.new(
44
- object_class: object_class, attributes_for_sync: original_attributes, event: event,
43
+ model_name: model_name, attributes_for_sync: original_attributes, event: event,
45
44
  ).construct
46
45
  end
47
46
 
48
47
  def params
49
48
  TableSync::Publishing::Params::Raw.new(
50
- attributes.slice(:object_class, :headers, :routing_key).compact,
49
+ attributes.slice(:model_name, :headers, :routing_key).compact,
51
50
  ).construct
52
51
  end
53
52
  end
@@ -2,6 +2,8 @@
2
2
 
3
3
  module TableSync::Publishing::Params
4
4
  class Raw < Batch
5
- # FOR NAMING CONSISTENCY
5
+ attribute :model_name
6
+
7
+ alias_method :object_class, :model_name
6
8
  end
7
9
  end
@@ -3,7 +3,9 @@
3
3
  class TableSync::Publishing::Raw
4
4
  include Tainbox
5
5
 
6
- attribute :object_class
6
+ attribute :model_name
7
+ attribute :table_name
8
+ attribute :schema_name
7
9
  attribute :original_attributes
8
10
 
9
11
  attribute :routing_key
@@ -101,12 +101,12 @@ exactly_hash = proc do |block_for_keys, block_for_values|
101
101
  raise TableSync::WrongOptionValue.new(model, option_name, new_value)
102
102
  end
103
103
 
104
- new_value.to_a.map do |key, value|
104
+ new_value.to_a.to_h do |key, value|
105
105
  [
106
106
  instance_exec("#{option_name} keys", key, &block_for_keys),
107
107
  instance_exec("#{option_name} values", value, &block_for_values),
108
108
  ]
109
- end.to_h
109
+ end
110
110
  end
111
111
  end
112
112
 
@@ -77,7 +77,7 @@ class TableSync::Receiving::Handler < Rabbit::EventHandler
77
77
  end
78
78
 
79
79
  def processed_data(config)
80
- data.map do |row|
80
+ data.filter_map do |row|
81
81
  next if config.skip(row: row)
82
82
 
83
83
  row = row.dup
@@ -97,7 +97,7 @@ class TableSync::Receiving::Handler < Rabbit::EventHandler
97
97
  (row[rest_key] ||= {}).merge!(rest) if rest_key
98
98
 
99
99
  row
100
- end.compact
100
+ end
101
101
  end
102
102
 
103
103
  def validate_data(data, target_keys:)
@@ -115,7 +115,7 @@ class TableSync::Receiving::Handler < Rabbit::EventHandler
115
115
 
116
116
  keys_sample = data[0].keys
117
117
  keys_diff = data.each_with_object(Set.new) do |row, set|
118
- (row.keys - keys_sample | keys_sample - row.keys).each { |x| set.add(x) }
118
+ ((row.keys - keys_sample) | (keys_sample - row.keys)).each { |x| set.add(x) }
119
119
  end
120
120
 
121
121
  unless keys_diff.empty?
@@ -60,7 +60,7 @@ module TableSync::Receiving::Model
60
60
  end
61
61
 
62
62
  def upsert(data:, target_keys:, version_key:, default_values:)
63
- data.map do |datum|
63
+ data.filter_map do |datum|
64
64
  conditions = datum.select { |k| target_keys.include?(k) }
65
65
 
66
66
  row = raw_model.lock("FOR NO KEY UPDATE").where(conditions)
@@ -81,7 +81,7 @@ module TableSync::Receiving::Model
81
81
  end
82
82
 
83
83
  row_to_hash(row)
84
- end.compact
84
+ end
85
85
  end
86
86
 
87
87
  def destroy(data:, target_keys:, version_key:)
@@ -77,7 +77,7 @@ module TableSync::Receiving::Model
77
77
  end
78
78
 
79
79
  def update_spec(keys)
80
- keys.map { |key| [key, ::Sequel[:excluded][key]] }.to_h
80
+ keys.to_h { |key| [key, ::Sequel[:excluded][key]] }
81
81
  end
82
82
  end
83
83
  end
@@ -1,4 +1,4 @@
1
- # frozen-string_literal: true
1
+ # frozen_string_literal: true
2
2
 
3
3
  module TableSync::Setup
4
4
  class ActiveRecord < Base
@@ -1,4 +1,4 @@
1
- # frozen-string_literal: true
1
+ # frozen_string_literal: true
2
2
 
3
3
  module TableSync::Setup
4
4
  class Base
@@ -1,4 +1,4 @@
1
- # frozen-string_literal: true
1
+ # frozen_string_literal: true
2
2
 
3
3
  module TableSync::Setup
4
4
  class Sequel < Base
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module TableSync
4
- VERSION = "6.0.4"
4
+ VERSION = "6.1.0"
5
5
  end
data/lib/table_sync.rb CHANGED
@@ -37,12 +37,13 @@ module TableSync
37
37
  attr_accessor :routing_key_callable
38
38
  attr_accessor :exchange_name
39
39
  attr_accessor :headers_callable
40
- attr_accessor :notifier
40
+ attr_accessor :notify
41
41
 
42
42
  attr_reader :orm
43
43
  attr_reader :publishing_adapter
44
44
  attr_reader :receiving_model
45
45
  attr_reader :setup
46
+ attr_reader :notifier
46
47
 
47
48
  def sync(object_class, **options)
48
49
  setup.new(
@@ -70,5 +71,15 @@ module TableSync
70
71
 
71
72
  @orm = val
72
73
  end
74
+
75
+ def notifier=(value)
76
+ self.notify = true if notify.nil?
77
+
78
+ @notifier = value
79
+ end
80
+
81
+ def notify?
82
+ !!notify
83
+ end
73
84
  end
74
85
  end
data/table_sync.gemspec CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
  require "table_sync/version"
6
6
 
7
7
  Gem::Specification.new do |spec|
8
- spec.required_ruby_version = ">= 2.5.6"
8
+ spec.required_ruby_version = ">= 2.7.0"
9
9
 
10
10
  spec.name = "table_sync"
11
11
  spec.version = TableSync::VERSION
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: table_sync
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.4
4
+ version: 6.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Umbrellio
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-02-07 00:00:00.000000000 Z
11
+ date: 2023-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: memery
@@ -268,13 +268,6 @@ files:
268
268
  - Rakefile
269
269
  - bin/console
270
270
  - bin/setup
271
- - docs/message_protocol.md
272
- - docs/notifications.md
273
- - docs/publishing.md
274
- - docs/publishing/configuration.md
275
- - docs/publishing/manual.md
276
- - docs/publishing/publishers.md
277
- - docs/receiving.md
278
271
  - lib/table_sync.rb
279
272
  - lib/table_sync/errors.rb
280
273
  - lib/table_sync/event.rb
@@ -331,14 +324,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
331
324
  requirements:
332
325
  - - ">="
333
326
  - !ruby/object:Gem::Version
334
- version: 2.5.6
327
+ version: 2.7.0
335
328
  required_rubygems_version: !ruby/object:Gem::Requirement
336
329
  requirements:
337
330
  - - ">="
338
331
  - !ruby/object:Gem::Version
339
332
  version: '0'
340
333
  requirements: []
341
- rubygems_version: 3.3.0.dev
334
+ rubygems_version: 3.4.6
342
335
  signing_key:
343
336
  specification_version: 4
344
337
  summary: DB Table synchronization between microservices based on Model's event system
@@ -1,24 +0,0 @@
1
- # Messages protocol
2
-
3
- ```json
4
- {
5
- "project_id": "pid",
6
- "data": {
7
- "event": "update",
8
- "model": "User",
9
- "version": 1.23,
10
- "attributes": [
11
- {
12
- "id": "1",
13
- "name": "user1",
14
- "email": "user1@example.com"
15
- },
16
- {
17
- "id": "2",
18
- "name": "user2",
19
- "email": "user2@example.com"
20
- }
21
- ]
22
- }
23
- }
24
- ```
@@ -1,45 +0,0 @@
1
- ### Notifications
2
-
3
- #### ActiveSupport adapter
4
-
5
- You can use an already existing ActiveSupport adapter:
6
- ```ruby
7
- TableSync.notifier = TableSync::InstrumentAdapter::ActiveSupport
8
- ```
9
-
10
- This instrumentation API is provided by Active Support. It allows to subscribe to notifications:
11
-
12
- ```ruby
13
- ActiveSupport::Notifications.subscribe(/tablesync/) do |name, start, finish, id, payload|
14
- # do something
15
- end
16
- ```
17
-
18
- Types of events available:
19
- `"tablesync.receive.update"`, `"tablesync.receive.destroy"`, `"tablesync.publish.update"`
20
- and `"tablesync.publish.destroy"`.
21
-
22
- You have access to the payload, which contains `event`, `direction`, `table`, `schema` and `count`.
23
-
24
- ```
25
- {
26
- :event => :update, # one of update / destroy
27
- :direction => :publish, # one of publish / receive
28
- :table => "users",
29
- :schema => "public",
30
- :count => 1
31
- }
32
- ```
33
-
34
- See more at https://guides.rubyonrails.org/active_support_instrumentation.html
35
-
36
-
37
- #### Custom adapters
38
-
39
- You can also create a custom adapter. It is expected to respond to the following method:
40
-
41
- ```ruby
42
- def notify(table:, event:, direction:, count:)
43
- # processes data about table_sync event
44
- end
45
- ```
@@ -1,143 +0,0 @@
1
- # Configuration
2
-
3
- Customization, configuration and other options.
4
-
5
- ## Model Customization
6
-
7
- There are methods you can define on a synched model to customize published messages for it.
8
-
9
- ### `#attributes_for_sync`
10
-
11
- Models can implement `#attributes_for_sync` to override which attributes are published for `update` and `create` events. If not present, all attributes are published.
12
-
13
- ### `#attributes_for_destroy`
14
-
15
- Models can implement `#attributes_for_destroy` to override which attributes are published for `destroy` events. If not present, `needle` (primary key) is published.
16
-
17
- ### `#attributes_for_routing_key`
18
-
19
- Models can implement `#attributes_for_routing_key` to override which attributes are given to the `routing_key_callable`. If not present, published attributes are given.
20
-
21
- ### `#attributes_for_headers`
22
-
23
- Models can implement `#attributes_for_headers` to override which attributes are given to the `headers_callable`. If not present, published attributes are given.
24
-
25
- ### `.table_sync_model_name`
26
-
27
- Models can implement `.table_sync_model_name` class method to override the model name used for publishing events. Default is a model class name.
28
-
29
- ## Callables
30
-
31
- Callables are defined once. TableSync will use them to dynamically resolve things like jobs, routing_key and headers.
32
-
33
- ### Single publishing job (required for automatic and delayed publishing)
34
-
35
- - `TableSync.single_publishing_job_class_callable` is a callable which should resolve to a class that calls TableSync back to actually publish changes.
36
-
37
- It is expected to have `.perform_at(hash_with_options)` and it will be passed a hash with the following keys:
38
-
39
- - `original_attributes` - serialized `original_attributes`
40
- - `object_class` - model name
41
- - `debounce_time` - pause between publishing messages
42
- - `event` - type of event that happened to synched entity
43
- - `perform_at` - time to perform the job at (depends on debounce)
44
-
45
- Example:
46
-
47
- ```ruby
48
- TableSync.single_publishing_job_class_callable = -> { TableSync::Job }
49
-
50
- class TableSync::Job < ActiveJob::Base
51
- def perform(jsoned_attributes)
52
- TableSync::Publishing::Single.new(
53
- JSON.parse(jsoned_attributes),
54
- ).publish_now
55
- end
56
-
57
- def self.perform_at(attributes)
58
- set(wait_until: attributes.delete(:perform_at))
59
- .perform_later(attributes.to_json)
60
- end
61
- end
62
-
63
- # will enqueue the job described above
64
-
65
- TableSync::Publishing::Single.new(
66
- object_class: "User",
67
- original_attributes: { id: 1, name: "Mark" }, # will be serialized!
68
- debounce_time: 60,
69
- event: :update,
70
- ).publish_later
71
- ```
72
-
73
- ### Batch publishing job (required only for `TableSync::Publishing::Batch#publish_later`)
74
-
75
- - `TableSync.batch_publishing_job_class_callable` is a callable which should resolve to a class that calls TableSync back to actually publish changes.
76
-
77
- It is expected to have `.perform_later(hash_with_options)` and it will be passed a hash with the following keys:
78
-
79
- - `original_attributes` - array of serialized `original_attributes`
80
- - `object_class` - model name
81
- - `event` - type of event that happened to synched entity
82
- - `routing_key` - custom routing_key (optional)
83
- - `headers` - custom headers (optional)
84
-
85
- More often than not this job is not very useful, since it makes more sense to use `#publish_now` from an already existing job that does a lot of things (not just publishing messages).
86
-
87
- ### Example
88
-
89
- ```ruby
90
- TableSync.batch_publishing_job_class_callable = -> { TableSync::BatchJob }
91
-
92
- class TableSync::BatchJob < ActiveJob::Base
93
- def perform(jsoned_attributes)
94
- TableSync::Publishing::Batch.new(
95
- JSON.parse(jsoned_attributes),
96
- ).publish_now
97
- end
98
-
99
- def self.perform_later(attributes)
100
- super(attributes.to_json)
101
- end
102
- end
103
-
104
- TableSync::Publishing::Batch.new(
105
- object_class: "User",
106
- original_attributes: [{ id: 1, name: "Mark" }, { id: 2, name: "Bob" }], # will be serialized!
107
- event: :create,
108
- routing_key: :custom_key, # optional
109
- headers: { type: "admin" }, # optional
110
- ).publish_later
111
- ```
112
-
113
- ### Routing key callable (required)
114
-
115
- - `TableSync.routing_key_callable` is a callable that resolves which routing key to use when publishing changes. It receives object class and published attributes or `#attributes_for_routing_key` (if defined).
116
-
117
- Example:
118
-
119
- ```ruby
120
- TableSync.routing_key_callable = -> (klass, attributes) { klass.gsub('::', '_').tableize }
121
- ```
122
-
123
- ### Headers callable (required)
124
-
125
- - `TableSync.headers_callable` is a callable that adds RabbitMQ headers which can be used in routing. It receives object class and published attributes or `#attributes_for_headers` (if defined).
126
-
127
- One possible way of using it is defining a headers exchange and routing rules based on key-value pairs (which correspond to sent headers).
128
-
129
- Example:
130
-
131
- ```ruby
132
- TableSync.headers_callable = -> (klass, attributes) { attributes.slice("project_id") }
133
- ```
134
-
135
- ## Other
136
-
137
- - `TableSync.exchange_name` defines the exchange name used for publishing (optional, falls back to default Rabbit gem configuration).
138
-
139
- - `TableSync.notifier` is a module that provides publish and recieve notifications.
140
-
141
- - `TableSync.raise_on_empty_message` - raises an error on empty message if set to true.
142
-
143
- - `TableSync.orm` - set ORM (ActiveRecord or Sequel) used to process given entities. Required!
@@ -1,155 +0,0 @@
1
- # Manual Sync
2
-
3
- There are two ways you can manually publish large amounts of data.
4
-
5
- ### `TableSync::Publishing:Batch`
6
-
7
- Easier to use, but does a lot of DB queries. May filter out invalid PK values.
8
-
9
- #### Pros:
10
-
11
- - requires less work
12
- - it will automatically use methods for data customization (`#attributes_for_sync`, `#attributes_for_destroy`)
13
- - you can be sure that data you publish is valid (more or less)
14
- - serializes values for (`#publish_later`)
15
-
16
- #### Cons:
17
-
18
- - it queries database for each entity in batch (for `create` and `update`)
19
- - it may also do a lot of queries if `#attributes_for_sync` contains additional data from other connected entities
20
- - serializes values for (`#publish_later`); if your PK contains invalid values (ex. Date) they will be filtered out
21
-
22
- ### `TableSync::Publishing:Raw`
23
-
24
- More complex to use, but requires very few DB queries.
25
- You are responsible for the data you send!
26
-
27
- #### Pros:
28
-
29
- - very customizable; only requirement - `object_class` must exist
30
- - you can send whatever data you want
31
-
32
- #### Cons:
33
-
34
- - you have to manually prepare data for publishing
35
- - you have to be really sure you are sending valid data
36
-
37
- ## Tips
38
-
39
- - **Don't make data batches too large!**
40
-
41
- It may result in failure to process them. Good rule is to send approx. 5000 rows in one batch.
42
-
43
- - **Make pauses between publishing data batches!**
44
-
45
- Publishing without pause may overwhelm the receiving side. Either their background job processor (ex. Sidekiq) may clog with jobs, or their consumers may not be able to get messages from Rabbit server fast enough.
46
-
47
- 1 or 2 seconds is a good wait period.
48
-
49
- - **Do not use `TableSync::Publishing:Single` to send millions or even thousands of rows of data.**
50
-
51
- Or just calling update on rows with automatic sync.
52
- It WILL overwhelm the receiving side. Especially if they have some additional receiving logic.
53
-
54
- - **On the receiving side don't create job with custom logic (if it exists) for every row in a batch.**
55
-
56
- Better to process it whole. Otherwise three batches of 5000 will result in 15000 new jobs.
57
-
58
- - **Send one test batch before publishing the rest of the data.**
59
-
60
- Make sure it was received properly. This way you won't send a lot invalid messages.
61
-
62
- - **Check the other arguments.**
63
-
64
- - Ensure the routing_key is correct if you are using a custom one. Remember, that batch publishing ignores `#attributes_for_routing_key` on a model.
65
- - Ensure that `object_class` is correct. And it belongs to entities you want to send.
66
- - Ensure that you chose the correct event.
67
- - If you have some logic depending on headers, ensure they are also correct. Remember, that batch publishing ignores `#attributes_for_headers` on a model.
68
-
69
- - **You can check what you send before publishing with:**
70
-
71
- ```ruby
72
- TableSync::Publishing:Batch.new(...).message.message_params
73
-
74
- TableSync::Publishing:Raw.new(...).message.message_params
75
- ```
76
-
77
- ## Examples
78
-
79
- ### `TableSync::Publishing:Raw`
80
-
81
- ```ruby
82
- # For Sequel
83
- # gem 'sequel-batches' or equivalent that will allow you to chunk data somehow
84
- # or #each_page from Sequel
85
-
86
- # this is a simple query
87
- # they can be much more complex, with joins and other things
88
- # just make sure that it results in a set of data you expect
89
- data = User.in_batches(of: 5000).naked.select(:id, :name, :email)
90
-
91
- data.each_with_index do |batch, i|
92
- TableSync::Publishing::Raw.new(
93
- object_class: "User",
94
- original_attributes: batch,
95
- event: :create,
96
- routing_key: :custom_key, # optional
97
- headers: { type: "admin" }, # optional
98
- ).publish_now
99
-
100
- # make a pause between batches
101
- sleep 1
102
-
103
- # for when you are sending from terminal
104
- # allows you to keep an eye on progress
105
- # you can create more complex output
106
- puts "Batch #{i} sent!"
107
- end
108
-
109
- ```
110
-
111
- #### Another way to gather data
112
-
113
- If you don't want to create a data query (maybe it's too complex) but there is a lot of quereing in `#attributes_for_sync` and you are willing to trade a little bit of perfomance, you can try the following.
114
-
115
- ```ruby
116
- class User < Sequel
117
- one_to_many :user_info
118
-
119
- # For example our receiving side wants to know the ips user logged in under
120
- # But doesn't want to sync the user_info
121
- def attributes_for_sync
122
- attributes.merge(
123
- ips: user_info.ips
124
- )
125
- end
126
- end
127
-
128
- # to prevent the need to query for every piece of additional data we can user eager load
129
- # and construct published data by calling #attributes_for_sync
130
- # don't forget to chunk it into more managable sizes before trying to send
131
- data = User.eager(:statuses).map { |user| user.attributes_for_sync }
132
- ```
133
- This way it will not make unnecessary queries.
134
-
135
- ### `TableSync::Publishing::Batch`
136
-
137
- Remember, it will query or initialize each row.
138
-
139
- ```ruby
140
- # You can just send ids.
141
- data = User.in_batches(of: 5000).naked.select(:id)
142
-
143
- data.each_with_index do |data, i|
144
- TableSync::Publishing::Batch.new(
145
- object_class: "User",
146
- original_attributes: data,
147
- event: :create,
148
- routing_key: :custom_key, # optional
149
- headers: { type: "admin" }, # optional
150
- ).publish_now
151
-
152
- sleep 1
153
- puts "Batch #{i} sent!"
154
- end
155
- ```