table_sync 6.0.4 → 6.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +2 -2
- data/.rubocop.yml +12 -1
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +152 -138
- data/README.md +1 -7
- data/lib/table_sync/errors.rb +3 -2
- data/lib/table_sync/instrument.rb +24 -2
- data/lib/table_sync/orm_adapter/active_record.rb +4 -4
- data/lib/table_sync/orm_adapter/base.rb +6 -4
- data/lib/table_sync/orm_adapter/sequel.rb +6 -6
- data/lib/table_sync/publishing/data/raw.rb +9 -5
- data/lib/table_sync/publishing/message/raw.rb +8 -9
- data/lib/table_sync/publishing/params/raw.rb +3 -1
- data/lib/table_sync/publishing/raw.rb +3 -1
- data/lib/table_sync/receiving/config.rb +2 -2
- data/lib/table_sync/receiving/handler.rb +3 -3
- data/lib/table_sync/receiving/model/active_record.rb +2 -2
- data/lib/table_sync/receiving/model/sequel.rb +1 -1
- data/lib/table_sync/setup/active_record.rb +1 -1
- data/lib/table_sync/setup/base.rb +1 -1
- data/lib/table_sync/setup/sequel.rb +1 -1
- data/lib/table_sync/version.rb +1 -1
- data/lib/table_sync.rb +12 -1
- data/table_sync.gemspec +1 -1
- metadata +4 -11
- data/docs/message_protocol.md +0 -24
- data/docs/notifications.md +0 -45
- data/docs/publishing/configuration.md +0 -143
- data/docs/publishing/manual.md +0 -155
- data/docs/publishing/publishers.md +0 -162
- data/docs/publishing.md +0 -80
- data/docs/receiving.md +0 -346
@@ -4,8 +4,11 @@ module TableSync::Publishing::Message
|
|
4
4
|
class Raw
|
5
5
|
include Tainbox
|
6
6
|
|
7
|
-
attribute :
|
7
|
+
attribute :model_name
|
8
|
+
attribute :table_name
|
9
|
+
attribute :schema_name
|
8
10
|
attribute :original_attributes
|
11
|
+
|
9
12
|
attribute :routing_key
|
10
13
|
attribute :headers
|
11
14
|
|
@@ -21,18 +24,14 @@ module TableSync::Publishing::Message
|
|
21
24
|
|
22
25
|
def notify!
|
23
26
|
TableSync::Instrument.notify(
|
24
|
-
table:
|
25
|
-
schema:
|
27
|
+
table: table_name,
|
28
|
+
schema: schema_name,
|
26
29
|
event: event,
|
27
30
|
count: original_attributes.count,
|
28
31
|
direction: :publish,
|
29
32
|
)
|
30
33
|
end
|
31
34
|
|
32
|
-
def model_naming
|
33
|
-
TableSync.publishing_adapter.model_naming(object_class.constantize)
|
34
|
-
end
|
35
|
-
|
36
35
|
# MESSAGE PARAMS
|
37
36
|
|
38
37
|
def message_params
|
@@ -41,13 +40,13 @@ module TableSync::Publishing::Message
|
|
41
40
|
|
42
41
|
def data
|
43
42
|
TableSync::Publishing::Data::Raw.new(
|
44
|
-
|
43
|
+
model_name: model_name, attributes_for_sync: original_attributes, event: event,
|
45
44
|
).construct
|
46
45
|
end
|
47
46
|
|
48
47
|
def params
|
49
48
|
TableSync::Publishing::Params::Raw.new(
|
50
|
-
attributes.slice(:
|
49
|
+
attributes.slice(:model_name, :headers, :routing_key).compact,
|
51
50
|
).construct
|
52
51
|
end
|
53
52
|
end
|
@@ -101,12 +101,12 @@ exactly_hash = proc do |block_for_keys, block_for_values|
|
|
101
101
|
raise TableSync::WrongOptionValue.new(model, option_name, new_value)
|
102
102
|
end
|
103
103
|
|
104
|
-
new_value.to_a.
|
104
|
+
new_value.to_a.to_h do |key, value|
|
105
105
|
[
|
106
106
|
instance_exec("#{option_name} keys", key, &block_for_keys),
|
107
107
|
instance_exec("#{option_name} values", value, &block_for_values),
|
108
108
|
]
|
109
|
-
end
|
109
|
+
end
|
110
110
|
end
|
111
111
|
end
|
112
112
|
|
@@ -77,7 +77,7 @@ class TableSync::Receiving::Handler < Rabbit::EventHandler
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def processed_data(config)
|
80
|
-
data.
|
80
|
+
data.filter_map do |row|
|
81
81
|
next if config.skip(row: row)
|
82
82
|
|
83
83
|
row = row.dup
|
@@ -97,7 +97,7 @@ class TableSync::Receiving::Handler < Rabbit::EventHandler
|
|
97
97
|
(row[rest_key] ||= {}).merge!(rest) if rest_key
|
98
98
|
|
99
99
|
row
|
100
|
-
end
|
100
|
+
end
|
101
101
|
end
|
102
102
|
|
103
103
|
def validate_data(data, target_keys:)
|
@@ -115,7 +115,7 @@ class TableSync::Receiving::Handler < Rabbit::EventHandler
|
|
115
115
|
|
116
116
|
keys_sample = data[0].keys
|
117
117
|
keys_diff = data.each_with_object(Set.new) do |row, set|
|
118
|
-
(row.keys - keys_sample | keys_sample - row.keys).each { |x| set.add(x) }
|
118
|
+
((row.keys - keys_sample) | (keys_sample - row.keys)).each { |x| set.add(x) }
|
119
119
|
end
|
120
120
|
|
121
121
|
unless keys_diff.empty?
|
@@ -60,7 +60,7 @@ module TableSync::Receiving::Model
|
|
60
60
|
end
|
61
61
|
|
62
62
|
def upsert(data:, target_keys:, version_key:, default_values:)
|
63
|
-
data.
|
63
|
+
data.filter_map do |datum|
|
64
64
|
conditions = datum.select { |k| target_keys.include?(k) }
|
65
65
|
|
66
66
|
row = raw_model.lock("FOR NO KEY UPDATE").where(conditions)
|
@@ -81,7 +81,7 @@ module TableSync::Receiving::Model
|
|
81
81
|
end
|
82
82
|
|
83
83
|
row_to_hash(row)
|
84
|
-
end
|
84
|
+
end
|
85
85
|
end
|
86
86
|
|
87
87
|
def destroy(data:, target_keys:, version_key:)
|
data/lib/table_sync/version.rb
CHANGED
data/lib/table_sync.rb
CHANGED
@@ -37,12 +37,13 @@ module TableSync
|
|
37
37
|
attr_accessor :routing_key_callable
|
38
38
|
attr_accessor :exchange_name
|
39
39
|
attr_accessor :headers_callable
|
40
|
-
attr_accessor :
|
40
|
+
attr_accessor :notify
|
41
41
|
|
42
42
|
attr_reader :orm
|
43
43
|
attr_reader :publishing_adapter
|
44
44
|
attr_reader :receiving_model
|
45
45
|
attr_reader :setup
|
46
|
+
attr_reader :notifier
|
46
47
|
|
47
48
|
def sync(object_class, **options)
|
48
49
|
setup.new(
|
@@ -70,5 +71,15 @@ module TableSync
|
|
70
71
|
|
71
72
|
@orm = val
|
72
73
|
end
|
74
|
+
|
75
|
+
def notifier=(value)
|
76
|
+
self.notify = true if notify.nil?
|
77
|
+
|
78
|
+
@notifier = value
|
79
|
+
end
|
80
|
+
|
81
|
+
def notify?
|
82
|
+
!!notify
|
83
|
+
end
|
73
84
|
end
|
74
85
|
end
|
data/table_sync.gemspec
CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
5
5
|
require "table_sync/version"
|
6
6
|
|
7
7
|
Gem::Specification.new do |spec|
|
8
|
-
spec.required_ruby_version = ">= 2.
|
8
|
+
spec.required_ruby_version = ">= 2.7.0"
|
9
9
|
|
10
10
|
spec.name = "table_sync"
|
11
11
|
spec.version = TableSync::VERSION
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: table_sync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.0
|
4
|
+
version: 6.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Umbrellio
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: memery
|
@@ -268,13 +268,6 @@ files:
|
|
268
268
|
- Rakefile
|
269
269
|
- bin/console
|
270
270
|
- bin/setup
|
271
|
-
- docs/message_protocol.md
|
272
|
-
- docs/notifications.md
|
273
|
-
- docs/publishing.md
|
274
|
-
- docs/publishing/configuration.md
|
275
|
-
- docs/publishing/manual.md
|
276
|
-
- docs/publishing/publishers.md
|
277
|
-
- docs/receiving.md
|
278
271
|
- lib/table_sync.rb
|
279
272
|
- lib/table_sync/errors.rb
|
280
273
|
- lib/table_sync/event.rb
|
@@ -331,14 +324,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
331
324
|
requirements:
|
332
325
|
- - ">="
|
333
326
|
- !ruby/object:Gem::Version
|
334
|
-
version: 2.
|
327
|
+
version: 2.7.0
|
335
328
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
336
329
|
requirements:
|
337
330
|
- - ">="
|
338
331
|
- !ruby/object:Gem::Version
|
339
332
|
version: '0'
|
340
333
|
requirements: []
|
341
|
-
rubygems_version: 3.
|
334
|
+
rubygems_version: 3.4.6
|
342
335
|
signing_key:
|
343
336
|
specification_version: 4
|
344
337
|
summary: DB Table synchronization between microservices based on Model's event system
|
data/docs/message_protocol.md
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
# Messages protocol
|
2
|
-
|
3
|
-
```json
|
4
|
-
{
|
5
|
-
"project_id": "pid",
|
6
|
-
"data": {
|
7
|
-
"event": "update",
|
8
|
-
"model": "User",
|
9
|
-
"version": 1.23,
|
10
|
-
"attributes": [
|
11
|
-
{
|
12
|
-
"id": "1",
|
13
|
-
"name": "user1",
|
14
|
-
"email": "user1@example.com"
|
15
|
-
},
|
16
|
-
{
|
17
|
-
"id": "2",
|
18
|
-
"name": "user2",
|
19
|
-
"email": "user2@example.com"
|
20
|
-
}
|
21
|
-
]
|
22
|
-
}
|
23
|
-
}
|
24
|
-
```
|
data/docs/notifications.md
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
### Notifications
|
2
|
-
|
3
|
-
#### ActiveSupport adapter
|
4
|
-
|
5
|
-
You can use an already existing ActiveSupport adapter:
|
6
|
-
```ruby
|
7
|
-
TableSync.notifier = TableSync::InstrumentAdapter::ActiveSupport
|
8
|
-
```
|
9
|
-
|
10
|
-
This instrumentation API is provided by Active Support. It allows to subscribe to notifications:
|
11
|
-
|
12
|
-
```ruby
|
13
|
-
ActiveSupport::Notifications.subscribe(/tablesync/) do |name, start, finish, id, payload|
|
14
|
-
# do something
|
15
|
-
end
|
16
|
-
```
|
17
|
-
|
18
|
-
Types of events available:
|
19
|
-
`"tablesync.receive.update"`, `"tablesync.receive.destroy"`, `"tablesync.publish.update"`
|
20
|
-
and `"tablesync.publish.destroy"`.
|
21
|
-
|
22
|
-
You have access to the payload, which contains `event`, `direction`, `table`, `schema` and `count`.
|
23
|
-
|
24
|
-
```
|
25
|
-
{
|
26
|
-
:event => :update, # one of update / destroy
|
27
|
-
:direction => :publish, # one of publish / receive
|
28
|
-
:table => "users",
|
29
|
-
:schema => "public",
|
30
|
-
:count => 1
|
31
|
-
}
|
32
|
-
```
|
33
|
-
|
34
|
-
See more at https://guides.rubyonrails.org/active_support_instrumentation.html
|
35
|
-
|
36
|
-
|
37
|
-
#### Custom adapters
|
38
|
-
|
39
|
-
You can also create a custom adapter. It is expected to respond to the following method:
|
40
|
-
|
41
|
-
```ruby
|
42
|
-
def notify(table:, event:, direction:, count:)
|
43
|
-
# processes data about table_sync event
|
44
|
-
end
|
45
|
-
```
|
@@ -1,143 +0,0 @@
|
|
1
|
-
# Configuration
|
2
|
-
|
3
|
-
Customization, configuration and other options.
|
4
|
-
|
5
|
-
## Model Customization
|
6
|
-
|
7
|
-
There are methods you can define on a synched model to customize published messages for it.
|
8
|
-
|
9
|
-
### `#attributes_for_sync`
|
10
|
-
|
11
|
-
Models can implement `#attributes_for_sync` to override which attributes are published for `update` and `create` events. If not present, all attributes are published.
|
12
|
-
|
13
|
-
### `#attributes_for_destroy`
|
14
|
-
|
15
|
-
Models can implement `#attributes_for_destroy` to override which attributes are published for `destroy` events. If not present, `needle` (primary key) is published.
|
16
|
-
|
17
|
-
### `#attributes_for_routing_key`
|
18
|
-
|
19
|
-
Models can implement `#attributes_for_routing_key` to override which attributes are given to the `routing_key_callable`. If not present, published attributes are given.
|
20
|
-
|
21
|
-
### `#attributes_for_headers`
|
22
|
-
|
23
|
-
Models can implement `#attributes_for_headers` to override which attributes are given to the `headers_callable`. If not present, published attributes are given.
|
24
|
-
|
25
|
-
### `.table_sync_model_name`
|
26
|
-
|
27
|
-
Models can implement `.table_sync_model_name` class method to override the model name used for publishing events. Default is a model class name.
|
28
|
-
|
29
|
-
## Callables
|
30
|
-
|
31
|
-
Callables are defined once. TableSync will use them to dynamically resolve things like jobs, routing_key and headers.
|
32
|
-
|
33
|
-
### Single publishing job (required for automatic and delayed publishing)
|
34
|
-
|
35
|
-
- `TableSync.single_publishing_job_class_callable` is a callable which should resolve to a class that calls TableSync back to actually publish changes.
|
36
|
-
|
37
|
-
It is expected to have `.perform_at(hash_with_options)` and it will be passed a hash with the following keys:
|
38
|
-
|
39
|
-
- `original_attributes` - serialized `original_attributes`
|
40
|
-
- `object_class` - model name
|
41
|
-
- `debounce_time` - pause between publishing messages
|
42
|
-
- `event` - type of event that happened to synched entity
|
43
|
-
- `perform_at` - time to perform the job at (depends on debounce)
|
44
|
-
|
45
|
-
Example:
|
46
|
-
|
47
|
-
```ruby
|
48
|
-
TableSync.single_publishing_job_class_callable = -> { TableSync::Job }
|
49
|
-
|
50
|
-
class TableSync::Job < ActiveJob::Base
|
51
|
-
def perform(jsoned_attributes)
|
52
|
-
TableSync::Publishing::Single.new(
|
53
|
-
JSON.parse(jsoned_attributes),
|
54
|
-
).publish_now
|
55
|
-
end
|
56
|
-
|
57
|
-
def self.perform_at(attributes)
|
58
|
-
set(wait_until: attributes.delete(:perform_at))
|
59
|
-
.perform_later(attributes.to_json)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
# will enqueue the job described above
|
64
|
-
|
65
|
-
TableSync::Publishing::Single.new(
|
66
|
-
object_class: "User",
|
67
|
-
original_attributes: { id: 1, name: "Mark" }, # will be serialized!
|
68
|
-
debounce_time: 60,
|
69
|
-
event: :update,
|
70
|
-
).publish_later
|
71
|
-
```
|
72
|
-
|
73
|
-
### Batch publishing job (required only for `TableSync::Publishing::Batch#publish_later`)
|
74
|
-
|
75
|
-
- `TableSync.batch_publishing_job_class_callable` is a callable which should resolve to a class that calls TableSync back to actually publish changes.
|
76
|
-
|
77
|
-
It is expected to have `.perform_later(hash_with_options)` and it will be passed a hash with the following keys:
|
78
|
-
|
79
|
-
- `original_attributes` - array of serialized `original_attributes`
|
80
|
-
- `object_class` - model name
|
81
|
-
- `event` - type of event that happened to synched entity
|
82
|
-
- `routing_key` - custom routing_key (optional)
|
83
|
-
- `headers` - custom headers (optional)
|
84
|
-
|
85
|
-
More often than not this job is not very useful, since it makes more sense to use `#publish_now` from an already existing job that does a lot of things (not just publishing messages).
|
86
|
-
|
87
|
-
### Example
|
88
|
-
|
89
|
-
```ruby
|
90
|
-
TableSync.batch_publishing_job_class_callable = -> { TableSync::BatchJob }
|
91
|
-
|
92
|
-
class TableSync::BatchJob < ActiveJob::Base
|
93
|
-
def perform(jsoned_attributes)
|
94
|
-
TableSync::Publishing::Batch.new(
|
95
|
-
JSON.parse(jsoned_attributes),
|
96
|
-
).publish_now
|
97
|
-
end
|
98
|
-
|
99
|
-
def self.perform_later(attributes)
|
100
|
-
super(attributes.to_json)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
TableSync::Publishing::Batch.new(
|
105
|
-
object_class: "User",
|
106
|
-
original_attributes: [{ id: 1, name: "Mark" }, { id: 2, name: "Bob" }], # will be serialized!
|
107
|
-
event: :create,
|
108
|
-
routing_key: :custom_key, # optional
|
109
|
-
headers: { type: "admin" }, # optional
|
110
|
-
).publish_later
|
111
|
-
```
|
112
|
-
|
113
|
-
### Routing key callable (required)
|
114
|
-
|
115
|
-
- `TableSync.routing_key_callable` is a callable that resolves which routing key to use when publishing changes. It receives object class and published attributes or `#attributes_for_routing_key` (if defined).
|
116
|
-
|
117
|
-
Example:
|
118
|
-
|
119
|
-
```ruby
|
120
|
-
TableSync.routing_key_callable = -> (klass, attributes) { klass.gsub('::', '_').tableize }
|
121
|
-
```
|
122
|
-
|
123
|
-
### Headers callable (required)
|
124
|
-
|
125
|
-
- `TableSync.headers_callable` is a callable that adds RabbitMQ headers which can be used in routing. It receives object class and published attributes or `#attributes_for_headers` (if defined).
|
126
|
-
|
127
|
-
One possible way of using it is defining a headers exchange and routing rules based on key-value pairs (which correspond to sent headers).
|
128
|
-
|
129
|
-
Example:
|
130
|
-
|
131
|
-
```ruby
|
132
|
-
TableSync.headers_callable = -> (klass, attributes) { attributes.slice("project_id") }
|
133
|
-
```
|
134
|
-
|
135
|
-
## Other
|
136
|
-
|
137
|
-
- `TableSync.exchange_name` defines the exchange name used for publishing (optional, falls back to default Rabbit gem configuration).
|
138
|
-
|
139
|
-
- `TableSync.notifier` is a module that provides publish and recieve notifications.
|
140
|
-
|
141
|
-
- `TableSync.raise_on_empty_message` - raises an error on empty message if set to true.
|
142
|
-
|
143
|
-
- `TableSync.orm` - set ORM (ActiveRecord or Sequel) used to process given entities. Required!
|
data/docs/publishing/manual.md
DELETED
@@ -1,155 +0,0 @@
|
|
1
|
-
# Manual Sync
|
2
|
-
|
3
|
-
There are two ways you can manually publish large amounts of data.
|
4
|
-
|
5
|
-
### `TableSync::Publishing:Batch`
|
6
|
-
|
7
|
-
Easier to use, but does a lot of DB queries. May filter out invalid PK values.
|
8
|
-
|
9
|
-
#### Pros:
|
10
|
-
|
11
|
-
- requires less work
|
12
|
-
- it will automatically use methods for data customization (`#attributes_for_sync`, `#attributes_for_destroy`)
|
13
|
-
- you can be sure that data you publish is valid (more or less)
|
14
|
-
- serializes values for (`#publish_later`)
|
15
|
-
|
16
|
-
#### Cons:
|
17
|
-
|
18
|
-
- it queries database for each entity in batch (for `create` and `update`)
|
19
|
-
- it may also do a lot of queries if `#attributes_for_sync` contains additional data from other connected entities
|
20
|
-
- serializes values for (`#publish_later`); if your PK contains invalid values (ex. Date) they will be filtered out
|
21
|
-
|
22
|
-
### `TableSync::Publishing:Raw`
|
23
|
-
|
24
|
-
More complex to use, but requires very few DB queries.
|
25
|
-
You are responsible for the data you send!
|
26
|
-
|
27
|
-
#### Pros:
|
28
|
-
|
29
|
-
- very customizable; only requirement - `object_class` must exist
|
30
|
-
- you can send whatever data you want
|
31
|
-
|
32
|
-
#### Cons:
|
33
|
-
|
34
|
-
- you have to manually prepare data for publishing
|
35
|
-
- you have to be really sure you are sending valid data
|
36
|
-
|
37
|
-
## Tips
|
38
|
-
|
39
|
-
- **Don't make data batches too large!**
|
40
|
-
|
41
|
-
It may result in failure to process them. Good rule is to send approx. 5000 rows in one batch.
|
42
|
-
|
43
|
-
- **Make pauses between publishing data batches!**
|
44
|
-
|
45
|
-
Publishing without pause may overwhelm the receiving side. Either their background job processor (ex. Sidekiq) may clog with jobs, or their consumers may not be able to get messages from Rabbit server fast enough.
|
46
|
-
|
47
|
-
1 or 2 seconds is a good wait period.
|
48
|
-
|
49
|
-
- **Do not use `TableSync::Publishing:Single` to send millions or even thousands of rows of data.**
|
50
|
-
|
51
|
-
Or just calling update on rows with automatic sync.
|
52
|
-
It WILL overwhelm the receiving side. Especially if they have some additional receiving logic.
|
53
|
-
|
54
|
-
- **On the receiving side don't create job with custom logic (if it exists) for every row in a batch.**
|
55
|
-
|
56
|
-
Better to process it whole. Otherwise three batches of 5000 will result in 15000 new jobs.
|
57
|
-
|
58
|
-
- **Send one test batch before publishing the rest of the data.**
|
59
|
-
|
60
|
-
Make sure it was received properly. This way you won't send a lot invalid messages.
|
61
|
-
|
62
|
-
- **Check the other arguments.**
|
63
|
-
|
64
|
-
- Ensure the routing_key is correct if you are using a custom one. Remember, that batch publishing ignores `#attributes_for_routing_key` on a model.
|
65
|
-
- Ensure that `object_class` is correct. And it belongs to entities you want to send.
|
66
|
-
- Ensure that you chose the correct event.
|
67
|
-
- If you have some logic depending on headers, ensure they are also correct. Remember, that batch publishing ignores `#attributes_for_headers` on a model.
|
68
|
-
|
69
|
-
- **You can check what you send before publishing with:**
|
70
|
-
|
71
|
-
```ruby
|
72
|
-
TableSync::Publishing:Batch.new(...).message.message_params
|
73
|
-
|
74
|
-
TableSync::Publishing:Raw.new(...).message.message_params
|
75
|
-
```
|
76
|
-
|
77
|
-
## Examples
|
78
|
-
|
79
|
-
### `TableSync::Publishing:Raw`
|
80
|
-
|
81
|
-
```ruby
|
82
|
-
# For Sequel
|
83
|
-
# gem 'sequel-batches' or equivalent that will allow you to chunk data somehow
|
84
|
-
# or #each_page from Sequel
|
85
|
-
|
86
|
-
# this is a simple query
|
87
|
-
# they can be much more complex, with joins and other things
|
88
|
-
# just make sure that it results in a set of data you expect
|
89
|
-
data = User.in_batches(of: 5000).naked.select(:id, :name, :email)
|
90
|
-
|
91
|
-
data.each_with_index do |batch, i|
|
92
|
-
TableSync::Publishing::Raw.new(
|
93
|
-
object_class: "User",
|
94
|
-
original_attributes: batch,
|
95
|
-
event: :create,
|
96
|
-
routing_key: :custom_key, # optional
|
97
|
-
headers: { type: "admin" }, # optional
|
98
|
-
).publish_now
|
99
|
-
|
100
|
-
# make a pause between batches
|
101
|
-
sleep 1
|
102
|
-
|
103
|
-
# for when you are sending from terminal
|
104
|
-
# allows you to keep an eye on progress
|
105
|
-
# you can create more complex output
|
106
|
-
puts "Batch #{i} sent!"
|
107
|
-
end
|
108
|
-
|
109
|
-
```
|
110
|
-
|
111
|
-
#### Another way to gather data
|
112
|
-
|
113
|
-
If you don't want to create a data query (maybe it's too complex) but there is a lot of quereing in `#attributes_for_sync` and you are willing to trade a little bit of perfomance, you can try the following.
|
114
|
-
|
115
|
-
```ruby
|
116
|
-
class User < Sequel
|
117
|
-
one_to_many :user_info
|
118
|
-
|
119
|
-
# For example our receiving side wants to know the ips user logged in under
|
120
|
-
# But doesn't want to sync the user_info
|
121
|
-
def attributes_for_sync
|
122
|
-
attributes.merge(
|
123
|
-
ips: user_info.ips
|
124
|
-
)
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
# to prevent the need to query for every piece of additional data we can user eager load
|
129
|
-
# and construct published data by calling #attributes_for_sync
|
130
|
-
# don't forget to chunk it into more managable sizes before trying to send
|
131
|
-
data = User.eager(:statuses).map { |user| user.attributes_for_sync }
|
132
|
-
```
|
133
|
-
This way it will not make unnecessary queries.
|
134
|
-
|
135
|
-
### `TableSync::Publishing::Batch`
|
136
|
-
|
137
|
-
Remember, it will query or initialize each row.
|
138
|
-
|
139
|
-
```ruby
|
140
|
-
# You can just send ids.
|
141
|
-
data = User.in_batches(of: 5000).naked.select(:id)
|
142
|
-
|
143
|
-
data.each_with_index do |data, i|
|
144
|
-
TableSync::Publishing::Batch.new(
|
145
|
-
object_class: "User",
|
146
|
-
original_attributes: data,
|
147
|
-
event: :create,
|
148
|
-
routing_key: :custom_key, # optional
|
149
|
-
headers: { type: "admin" }, # optional
|
150
|
-
).publish_now
|
151
|
-
|
152
|
-
sleep 1
|
153
|
-
puts "Batch #{i} sent!"
|
154
|
-
end
|
155
|
-
```
|