deimos-ruby 1.7.0.pre.beta1 → 1.8.0.pre.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/Gemfile.lock +8 -2
  4. data/README.md +69 -15
  5. data/deimos-ruby.gemspec +2 -0
  6. data/docs/ARCHITECTURE.md +144 -0
  7. data/docs/CONFIGURATION.md +4 -0
  8. data/lib/deimos.rb +6 -6
  9. data/lib/deimos/active_record_consume/batch_consumption.rb +159 -0
  10. data/lib/deimos/active_record_consume/batch_slicer.rb +27 -0
  11. data/lib/deimos/active_record_consume/message_consumption.rb +58 -0
  12. data/lib/deimos/active_record_consume/schema_model_converter.rb +52 -0
  13. data/lib/deimos/active_record_consumer.rb +33 -75
  14. data/lib/deimos/batch_consumer.rb +2 -142
  15. data/lib/deimos/config/configuration.rb +8 -10
  16. data/lib/deimos/consume/batch_consumption.rb +148 -0
  17. data/lib/deimos/consume/message_consumption.rb +93 -0
  18. data/lib/deimos/consumer.rb +79 -72
  19. data/lib/deimos/kafka_message.rb +1 -1
  20. data/lib/deimos/message.rb +6 -1
  21. data/lib/deimos/utils/db_poller.rb +6 -6
  22. data/lib/deimos/utils/db_producer.rb +6 -2
  23. data/lib/deimos/utils/deadlock_retry.rb +68 -0
  24. data/lib/deimos/utils/lag_reporter.rb +19 -26
  25. data/lib/deimos/version.rb +1 -1
  26. data/spec/active_record_batch_consumer_spec.rb +481 -0
  27. data/spec/active_record_consume/batch_slicer_spec.rb +42 -0
  28. data/spec/active_record_consume/schema_model_converter_spec.rb +105 -0
  29. data/spec/active_record_consumer_spec.rb +3 -11
  30. data/spec/batch_consumer_spec.rb +23 -7
  31. data/spec/config/configuration_spec.rb +4 -0
  32. data/spec/consumer_spec.rb +6 -6
  33. data/spec/deimos_spec.rb +57 -49
  34. data/spec/handlers/my_batch_consumer.rb +6 -1
  35. data/spec/handlers/my_consumer.rb +6 -1
  36. data/spec/message_spec.rb +19 -0
  37. data/spec/schemas/com/my-namespace/MySchemaCompound-key.avsc +18 -0
  38. data/spec/schemas/com/my-namespace/Wibble.avsc +43 -0
  39. data/spec/spec_helper.rb +17 -0
  40. data/spec/utils/db_poller_spec.rb +2 -2
  41. data/spec/utils/deadlock_retry_spec.rb +74 -0
  42. data/spec/utils/lag_reporter_spec.rb +29 -22
  43. metadata +57 -16
  44. data/lib/deimos/base_consumer.rb +0 -100
  45. data/lib/deimos/utils/executor.rb +0 -124
  46. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  47. data/lib/deimos/utils/signal_handler.rb +0 -68
  48. data/spec/utils/executor_spec.rb +0 -53
  49. data/spec/utils/signal_handler_spec.rb +0 -16
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ab8ac284db2c98dac5624caf5bf75118ad89fb9ec6e1f3109f15373f2bf4c8be
4
- data.tar.gz: 7d26a7d8d163ab4783638c9393bea2d1a7c8f364a7eed2ea4cb699cbcbafd244
3
+ metadata.gz: 82c1b89b49622cb0d47a7a7aa56e076d908785906581f9f0772f09679acd6895
4
+ data.tar.gz: eeadf1d1a63db1407a81ef1f993be1b8d421e8e2624b9f42c324a2dad776bd1c
5
5
  SHA512:
6
- metadata.gz: 2610223a8d8c2546dad4037e4d2b1845e77372b304f4c072c269316bceed4a6626bd36d3541561d91e1c68a06ed891c051feb16a1800763cf805061c05cadc58
7
- data.tar.gz: 3916fa546b45182b987b0409d51bcce96c5a1beee449722861819f76377258e7ccb5700f50d2c0d8d1791986ed4f5d19b7c989cdcdbe77d1289e8531eb24ba15
6
+ metadata.gz: 6a85cd1d407c11d4becb6d6d11636623b96c706325f8bbca52caf4bf0cdce079df65432cb076d01503a4d4d977c8a662c22622e27193aeafc87ffbdc8d934045
7
+ data.tar.gz: '08f9b75f3057071f41effaf870f8d0927dca85d647726ba833371001c470a7a062ee74ac63b5acc2f284493d3a32373ffb6c3ca96f9428ffb762c83cfd804c42'
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## UNRELEASED
9
9
 
10
+ ## 1.8.0-beta1 - 2020-07-06
11
+ ### Features :star:
12
+ - Added `ActiveRecordConsumer` batch mode
13
+
14
+ ### Fixes :wrench:
15
+ - Lag calculation can be incorrect if no messages are being consumed.
16
+ - Fixed bug where printing messages on a MessageSizeTooLarge
17
+ error didn't work.
18
+
19
+ ### Roadmap
20
+ - Moved SignalHandler and Executor to the `sigurd` gem.
21
+
22
+ ## 1.7.0-beta1 - 2020-05-12
10
23
  ### Features :star:
11
24
  - Added the DB Poller feature / process.
12
25
 
@@ -5,6 +5,7 @@ PATH
5
5
  avro_turf (~> 0.11)
6
6
  phobos (~> 1.9)
7
7
  ruby-kafka (~> 0.7)
8
+ sigurd (= 0.0.1)
8
9
 
9
10
  GEM
10
11
  remote: https://rubygems.org/
@@ -65,6 +66,7 @@ GEM
65
66
  concurrent-ruby-ext (1.1.6)
66
67
  concurrent-ruby (= 1.1.6)
67
68
  crass (1.0.6)
69
+ database_cleaner (1.8.5)
68
70
  ddtrace (0.35.1)
69
71
  msgpack
70
72
  diff-lcs (1.3)
@@ -142,7 +144,7 @@ GEM
142
144
  pry (0.13.1)
143
145
  coderay (~> 1.1)
144
146
  method_source (~> 1.0)
145
- rack (2.2.2)
147
+ rack (2.2.3)
146
148
  rack-test (1.1.0)
147
149
  rack (>= 1.0, < 3)
148
150
  rails (5.2.4.2)
@@ -204,6 +206,9 @@ GEM
204
206
  digest-crc
205
207
  ruby-progressbar (1.10.1)
206
208
  shellany (0.0.1)
209
+ sigurd (0.0.1)
210
+ concurrent-ruby (~> 1)
211
+ exponential-backoff
207
212
  sprockets (4.0.0)
208
213
  concurrent-ruby (~> 1.0)
209
214
  rack (> 1, < 3)
@@ -219,7 +224,7 @@ GEM
219
224
  unicode-display_width (1.7.0)
220
225
  websocket-driver (0.7.1)
221
226
  websocket-extensions (>= 0.1.0)
222
- websocket-extensions (0.1.4)
227
+ websocket-extensions (0.1.5)
223
228
 
224
229
  PLATFORMS
225
230
  ruby
@@ -228,6 +233,7 @@ DEPENDENCIES
228
233
  activerecord (~> 5.2)
229
234
  activerecord-import
230
235
  avro (~> 1.9)
236
+ database_cleaner (~> 1.7)
231
237
  ddtrace (~> 0.11)
232
238
  deimos-ruby!
233
239
  dogstatsd-ruby (~> 4.2)
data/README.md CHANGED
@@ -314,24 +314,14 @@ messages as an array and then process them together. This can improve
314
314
  consumer throughput, depending on the use case. Batch consumers behave like
315
315
  other consumers in regards to key and payload decoding, etc.
316
316
 
317
- To enable batch consumption, ensure that the `delivery` property is set to `inline_batch`. For example:
317
+ To enable batch consumption, ensure that the `delivery` property of your
318
+ consumer is set to `inline_batch`.
318
319
 
319
- ```ruby
320
- Deimos.configure do
321
- consumer do
322
- class_name 'Consumers::MyBatchConsumer'
323
- topic 'my_batched_topic'
324
- group_id 'my_group_id'
325
- delivery :inline_batch
326
- end
327
- end
328
- ```
329
-
330
- Batch consumers must inherit from the Deimos::BatchConsumer class as in
331
- this sample:
320
+ Batch consumers will invoke the `consume_batch` method instead of `consume`
321
+ as in this example:
332
322
 
333
323
  ```ruby
334
- class MyBatchConsumer < Deimos::BatchConsumer
324
+ class MyBatchConsumer < Deimos::Consumer
335
325
 
336
326
  def consume_batch(payloads, metadata)
337
327
  # payloads is an array of schema-decoded hashes.
@@ -533,12 +523,14 @@ class MyConsumer < Deimos::ActiveRecordConsumer
533
523
 
534
524
  # Optional override of the way to fetch records based on payload and
535
525
  # key. Default is to use the key to search the primary key of the table.
526
+ # Only used in non-batch mode.
536
527
  def fetch_record(klass, payload, key)
537
528
  super
538
529
  end
539
530
 
540
531
  # Optional override on how to set primary key for new records.
541
532
  # Default is to set the class's primary key to the message's decoded key.
533
+ # Only used in non-batch mode.
542
534
  def assign_key(record, payload, key)
543
535
  super
544
536
  end
@@ -546,6 +538,7 @@ class MyConsumer < Deimos::ActiveRecordConsumer
546
538
  # Optional override of the default behavior, which is to call `destroy`
547
539
  # on the record - e.g. you can replace this with "archiving" the record
548
540
  # in some way.
541
+ # Only used in non-batch mode.
549
542
  def destroy_record(record)
550
543
  super
551
544
  end
@@ -555,6 +548,64 @@ class MyConsumer < Deimos::ActiveRecordConsumer
555
548
  def record_attributes(payload)
556
549
  super.merge(:some_field => 'some_value')
557
550
  end
551
+
552
+ # Optional override to change the attributes used for identifying records
553
+ def record_key(payload)
554
+ super
555
+ end
556
+ end
557
+ ```
558
+
559
+ #### Batch Consumers
560
+
561
+ Deimos also provides a batch consumption mode for `ActiveRecordConsumer` which
562
+ processes groups of messages at once using the ActiveRecord backend.
563
+
564
+ Batch ActiveRecord consumers make use of the
565
+ [activerecord-import](https://github.com/zdennis/activerecord-import) to insert
566
+ or update multiple records in bulk SQL statements. This reduces processing
567
+ time at the cost of skipping ActiveRecord callbacks for individual records.
568
+ Deleted records (tombstones) are grouped into `delete_all` calls and thus also
569
+ skip `destroy` callbacks.
570
+
571
+ Batch consumption is used when the `delivery` setting for your consumer is set to `inline_batch`.
572
+
573
+ **Note**: Currently, batch consumption only supports only primary keys as identifiers out of the box. See
574
+ [the specs](spec/active_record_batch_consumer_spec.rb) for an example of how to use compound keys.
575
+
576
+ By default, batches will be compacted before processing, i.e. only the last
577
+ message for each unique key in a batch will actually be processed. To change
578
+ this behaviour, call `compacted false` inside of your consumer definition.
579
+
580
+ A sample batch consumer would look as follows:
581
+
582
+ ```ruby
583
+ class MyConsumer < Deimos::ActiveRecordConsumer
584
+ schema 'MySchema'
585
+ key_config field: 'my_field'
586
+ record_class Widget
587
+
588
+ # Controls whether the batch is compacted before consuming.
589
+ # If true, only the last message for each unique key in a batch will be
590
+ # processed.
591
+ # If false, messages will be grouped into "slices" of independent keys
592
+ # and each slice will be imported separately.
593
+ #
594
+ # compacted false
595
+
596
+
597
+ # Optional override of the default behavior, which is to call `delete_all`
598
+ # on the associated records - e.g. you can replace this with setting a deleted
599
+ # flag on the record.
600
+ def remove_records(records)
601
+ super
602
+ end
603
+
604
+ # Optional override to change the attributes of the record before they
605
+ # are saved.
606
+ def record_attributes(payload)
607
+ super.merge(:some_field => 'some_value')
608
+ end
558
609
  end
559
610
  ```
560
611
 
@@ -853,6 +904,9 @@ Deimos::Utils::InlineConsumer.get_messages_for(
853
904
 
854
905
  Bug reports and pull requests are welcome on GitHub at https://github.com/flipp-oss/deimos .
855
906
 
907
+ We have more information on the [internal architecture](docs/ARCHITECTURE.md) of Deimos
908
+ for contributors!
909
+
856
910
  ### Linting
857
911
 
858
912
  Deimos uses Rubocop to lint the code. Please run Rubocop on your code
@@ -21,10 +21,12 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency('avro_turf', '~> 0.11')
22
22
  spec.add_runtime_dependency('phobos', '~> 1.9')
23
23
  spec.add_runtime_dependency('ruby-kafka', '~> 0.7')
24
+ spec.add_runtime_dependency('sigurd', '0.0.1')
24
25
 
25
26
  spec.add_development_dependency('activerecord', '~> 5.2')
26
27
  spec.add_development_dependency('activerecord-import')
27
28
  spec.add_development_dependency('avro', '~> 1.9')
29
+ spec.add_development_dependency('database_cleaner', '~> 1.7')
28
30
  spec.add_development_dependency('ddtrace', '~> 0.11')
29
31
  spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
30
32
  spec.add_development_dependency('guard', '~> 2')
@@ -0,0 +1,144 @@
1
+ # Deimos Architecture
2
+
3
+ Deimos is the third of three libraries that add functionality on top of each
4
+ other:
5
+
6
+ * [RubyKafka](https://github.com/zendesk/ruby-kafka) is the low-level Kafka
7
+ client, providing API's for producers, consumers and the client as a whole.
8
+ * [Phobos](https://github.com/phobos/phobos) is a lightweight wrapper on top
9
+ of RubyKafka that provides threaded consumers, a simpler way to write
10
+ producers, and lifecycle management.
11
+ * [Deimos](https://github.com/flipp-oss/deimos/) is a full-featured framework
12
+ using Phobos as its base which provides schema integration (e.g. Avro),
13
+ database integration, metrics, tracing, test helpers and other utilities.
14
+
15
+ ## Folder structure
16
+
17
+ As of May 12, 2020, the following are the important files to understand in how
18
+ Deimos fits together:
19
+ * `lib/generators`: Generators to generate database migrations, e.g.
20
+ for the DB Poller and DB Producer features.
21
+ * `lib/tasks`: Rake tasks for starting consumers, DB Pollers, etc.
22
+ * `lib/deimos`: Main Deimos code.
23
+ * `lib/deimos/deimos.rb`: The bootstrap / startup code for Deimos. Also provides
24
+ some global convenience methods and (for legacy purposes) the way to
25
+ start the DB Producer.
26
+ * `lib/deimos/backends`: The different plug-in producer backends - e.g. produce
27
+ directly to Kafka, use the DB backend, etc.
28
+ * `lib/deimos/schema_backends`: The different plug-in schema handlers, such
29
+ as the various flavors of Avro (with/without schema registry etc.)
30
+ * `lib/deimos/metrics`: The different plug-in metrics providers, e.g. Datadog.
31
+ * `lib/deimos/tracing`: The different plug-in tracing providers, e.g. Datadog.
32
+ * `lib/deimos/utils`: Utility classes for things not directly related to
33
+ producing and consuming, such as the DB Poller, DB Producer, lag reporter, etc.
34
+ * `lib/deimos/config`: Classes related to configuring Deimos.
35
+ * `lib/deimos/monkey_patches`: Monkey patches to existing libraries. These
36
+ should be removed in a future update.
37
+
38
+ ## Features
39
+
40
+ ### Producers and Consumers
41
+
42
+ Both producers and consumers include the `SharedConfig` module, which
43
+ standardizes configuration like schema settings, topic, keys, etc.
44
+
45
+ Consumers come in two flavors: `Consumer` and `BatchConsumer`. Both include
46
+ `BaseConsumer` for shared functionality.
47
+
48
+ While producing messages go to Kafka by default, literally anything else
49
+ can happen when your producer calls `produce`, by swapping out the producer
50
+ _backend_. This is just a file that needs to inherit from `Deimos::Backends::Base`
51
+ and must implement a single method, `execute`.
52
+
53
+ Producers have a complex workflow while processing the payload to publish. This
54
+ is aided by the `Deimos::Message` class (not to be confused with the
55
+ `KafkaMessage` class, which is an ActiveRecord used by the DB Producer feature,
56
+ below).
57
+
58
+ ### Schemas
59
+
60
+ Schema backends are used to encode and decode payloads into different formats
61
+ such as Avro. These are integrated with producers and consumers, as well
62
+ as test helpers. These are a bit more involved than producer backends, and
63
+ must define methods such as:
64
+ * `encode` a payload or key (when encoding a key, for Avro a key schema
65
+ may be auto-generated)
66
+ * `decode` a payload or key
67
+ * `validate` that a payload is correct for encoding
68
+ * `coerce` a payload into the given schema (e.g. turn ints into strings)
69
+ * Get a list of `schema_fields` in the configured schema, used when interacting
70
+ with ActiveRecord
71
+ * Define a `mock` backend when the given backend is used. This is used
72
+ during testing. Typically mock backends will validate values but not
73
+ actually encode/decode them.
74
+
75
+ ### Configuration
76
+
77
+ Deimos has its own `Configurable` module that makes heavy use of `method_missing`
78
+ to provide a very succinct but powerful configuration format (including
79
+ default values, procs, print out as hash, reset, etc.). It also
80
+ allows for multiple blocks to define different objects of the same time
81
+ (like producers, consumers, pollers etc.).
82
+
83
+ The configuration definition for Deimos is in `config/configuration.rb`. In
84
+ addition, there are methods in `config/phobos_config.rb` which translate to/from
85
+ the Phobos configuration format and support the old `phobos.yml` method
86
+ of configuration.
87
+
88
+ ### Metrics and Tracing
89
+
90
+ These are simpler than other plugins and must implement the expected methods
91
+ (`increment`, `gauge`, `histogram` and `time` for metrics, and `start`, `finish`
92
+ and `set_error` for tracing). These are used primarily in producers and consumers.
93
+
94
+ ### ActiveRecord Integration
95
+
96
+ Deimos provides an `ActiveRecordConsumer` and `ActiveRecordProducer`. These are
97
+ relatively lightweight ways to save data into a database or read it off
98
+ the database as part of app logic. It uses things like the `coerce` method
99
+ of the schema backends to manage the differences between the given payload
100
+ and the configured schema for the topic.
101
+
102
+ ### Database Backend / Database Producer
103
+
104
+ This feature (which provides better performance and transaction guarantees)
105
+ is powered by two components:
106
+ * The `db` _publish backend_, which saves messages to the database rather
107
+ than to Kafka;
108
+ * The `DbProducer` utility, which runs as a separate process, pulls data
109
+ from the database and sends it to Kafka.
110
+
111
+ There are a set of utility classes that power the producer, which are largely
112
+ copied from Phobos:
113
+ * `Executor` takes a set of "runnable" things (which implement a `start` and `stop`
114
+ method) puts them in a thread pool and runs them all concurrently. It
115
+ manages starting and stopping all threads when necessary.
116
+ * `SignalHandler` wraps the Executor and handles SIGINT and SIGTERM signals
117
+ to stop the executor gracefully.
118
+
119
+ In the case of this feature, the `DbProducer` is the runnable object - it
120
+ can run several threads at once.
121
+
122
+ On the database side, the `ActiveRecord` models that power this feature are:
123
+ * `KafkaMessage`: The actual message, saved to the database. This message
124
+ is already encoded by the producer, so only has to be sent.
125
+ * `KafkaTopicInfo`: Used for locking topics so only one producer can work
126
+ on it at once.
127
+
128
+ A Rake task (defined in `deimos.rake`) can be used to start the producer.
129
+
130
+ ### Database Poller
131
+
132
+ This feature (which periodically polls the database to send Kafka messages)
133
+ primarily uses other aspects of Deimos and hence is relatively small in size.
134
+ The `DbPoller` class acts as a "runnable" and is used by an Executor (above).
135
+ The `PollInfo` class is saved to the database to keep track of where each
136
+ poller is up to.
137
+
138
+ A Rake task (defined in `deimos.rake`) can be used to start the pollers.
139
+
140
+ ### Other Utilities
141
+
142
+ The `utils` folder also contains the `LagReporter` (which sends metrics on
143
+ lag) and the `InlineConsumer`, which can read data from a topic and directly
144
+ pass it into a handler or save it to memory.
@@ -58,6 +58,10 @@ Deimos.configure do
58
58
  namespace 'my.namespace'
59
59
  key_config field: :id
60
60
 
61
+ # Setting to :inline_batch will invoke consume_batch instead of consume
62
+ # for each batch of messages.
63
+ delivery :batch
64
+
61
65
  # If config.schema.path is app/schemas, assumes there is a file in
62
66
  # app/schemas/my/namespace/MyTopicSchema.avsc
63
67
  end
@@ -28,8 +28,8 @@ if defined?(ActiveRecord)
28
28
  require 'deimos/kafka_source'
29
29
  require 'deimos/kafka_topic_info'
30
30
  require 'deimos/backends/db'
31
- require 'deimos/utils/signal_handler.rb'
32
- require 'deimos/utils/executor.rb'
31
+ require 'sigurd/signal_handler.rb'
32
+ require 'sigurd/executor.rb'
33
33
  require 'deimos/utils/db_producer.rb'
34
34
  require 'deimos/utils/db_poller'
35
35
  end
@@ -72,10 +72,10 @@ module Deimos
72
72
  Deimos::Utils::DbProducer.
73
73
  new(self.config.db_producer.logger || self.config.logger)
74
74
  end
75
- executor = Deimos::Utils::Executor.new(producers,
76
- sleep_seconds: 5,
77
- logger: self.config.logger)
78
- signal_handler = Deimos::Utils::SignalHandler.new(executor)
75
+ executor = Sigurd::Executor.new(producers,
76
+ sleep_seconds: 5,
77
+ logger: self.config.logger)
78
+ signal_handler = Sigurd::SignalHandler.new(executor)
79
79
  signal_handler.run!
80
80
  end
81
81
  end
@@ -0,0 +1,159 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/active_record_consume/batch_slicer'
4
+ require 'deimos/utils/deadlock_retry'
5
+ require 'deimos/message'
6
+
7
+ module Deimos
8
+ module ActiveRecordConsume
9
+ # Methods for consuming batches of messages and saving them to the database
10
+ # in bulk ActiveRecord operations.
11
+ module BatchConsumption
12
+ # Handle a batch of Kafka messages. Batches are split into "slices",
13
+ # which are groups of independent messages that can be processed together
14
+ # in a single database operation.
15
+ # If two messages in a batch have the same key, we cannot process them
16
+ # in the same operation as they would interfere with each other. Thus
17
+ # they are split
18
+ # @param payloads [Array<Hash>] Decoded payloads.
19
+ # @param metadata [Hash] Information about batch, including keys.
20
+ def consume_batch(payloads, metadata)
21
+ messages = payloads.
22
+ zip(metadata[:keys]).
23
+ map { |p, k| Deimos::Message.new(p, nil, key: k) }
24
+
25
+ tags = %W(topic:#{metadata[:topic]})
26
+
27
+ Deimos.instrument('ar_consumer.consume_batch', tags) do
28
+ # The entire batch should be treated as one transaction so that if
29
+ # any message fails, the whole thing is rolled back or retried
30
+ # if there is deadlock
31
+ Deimos::Utils::DeadlockRetry.wrap(tags) do
32
+ if @compacted || self.class.config[:no_keys]
33
+ update_database(compact_messages(messages))
34
+ else
35
+ uncompacted_update(messages)
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ # Get unique key for the ActiveRecord instance from the incoming key.
42
+ # Override this method (with super) to customize the set of attributes that
43
+ # uniquely identifies each record in the database.
44
+ # @param key [String] The encoded key.
45
+ # @return [Hash] The key attributes.
46
+ def record_key(key)
47
+ decoded_key = decode_key(key)
48
+
49
+ if decoded_key.nil?
50
+ {}
51
+ elsif decoded_key.is_a?(Hash)
52
+ @key_converter.convert(decoded_key)
53
+ else
54
+ { @klass.primary_key => decoded_key }
55
+ end
56
+ end
57
+
58
+ protected
59
+
60
+ # Perform database operations for a batch of messages without compaction.
61
+ # All messages are split into slices containing only unique keys, and
62
+ # each slice is handles as its own batch.
63
+ # @param messages [Array<Message>] List of messages.
64
+ def uncompacted_update(messages)
65
+ BatchSlicer.
66
+ slice(messages).
67
+ each(&method(:update_database))
68
+ end
69
+
70
+ # Perform database operations for a group of messages.
71
+ # All messages with payloads are passed to upsert_records.
72
+ # All tombstones messages are passed to remove_records.
73
+ # @param messages [Array<Message>] List of messages.
74
+ def update_database(messages)
75
+ # Find all upserted records (i.e. that have a payload) and all
76
+ # deleted record (no payload)
77
+ removed, upserted = messages.partition(&:tombstone?)
78
+
79
+ upsert_records(upserted) if upserted.any?
80
+ remove_records(removed) if removed.any?
81
+ end
82
+
83
+ # Upsert any non-deleted records
84
+ # @param messages [Array<Message>] List of messages for a group of
85
+ # records to either be updated or inserted.
86
+ def upsert_records(messages)
87
+ key_cols = key_columns(messages)
88
+
89
+ # Create payloads with payload + key attributes
90
+ upserts = messages.map do |m|
91
+ record_attributes(m.payload, m.key)&.
92
+ merge(record_key(m.key))
93
+ end
94
+
95
+ # If overridden record_attributes indicated no record, skip
96
+ upserts.compact!
97
+
98
+ options = if key_cols.empty?
99
+ {} # Can't upsert with no key, just do regular insert
100
+ else
101
+ {
102
+ on_duplicate_key_update: {
103
+ # conflict_target must explicitly list the columns for
104
+ # Postgres and SQLite. Not required for MySQL, but this
105
+ # ensures consistent behaviour.
106
+ conflict_target: key_cols,
107
+ columns: :all
108
+ }
109
+ }
110
+ end
111
+
112
+ @klass.import!(upserts, options)
113
+ end
114
+
115
+ # Delete any records with a tombstone.
116
+ # @param messages [Array<Message>] List of messages for a group of
117
+ # deleted records.
118
+ def remove_records(messages)
119
+ clause = deleted_query(messages)
120
+
121
+ clause.delete_all
122
+ end
123
+
124
+ # Create an ActiveRecord relation that matches all of the passed
125
+ # records. Used for bulk deletion.
126
+ # @param records [Array<Message>] List of messages.
127
+ # @return ActiveRecord::Relation Matching relation.
128
+ def deleted_query(records)
129
+ keys = records.
130
+ map { |m| record_key(m.key)[@klass.primary_key] }.
131
+ reject(&:nil?)
132
+
133
+ @klass.unscoped.where(@klass.primary_key => keys)
134
+ end
135
+
136
+ # Get the set of attribute names that uniquely identify messages in the
137
+ # batch. Requires at least one record.
138
+ # @param records [Array<Message>] Non-empty list of messages.
139
+ # @return [Array<String>] List of attribute names.
140
+ # @raise If records is empty.
141
+ def key_columns(records)
142
+ raise 'Cannot determine key from empty batch' if records.empty?
143
+
144
+ first_key = records.first.key
145
+ record_key(first_key).keys
146
+ end
147
+
148
+ # Compact a batch of messages, taking only the last message for each
149
+ # unique key.
150
+ # @param batch [Array<Message>] Batch of messages.
151
+ # @return [Array<Message>] Compacted batch.
152
+ def compact_messages(batch)
153
+ return batch unless batch.first&.key.present?
154
+
155
+ batch.reverse.uniq(&:key).reverse!
156
+ end
157
+ end
158
+ end
159
+ end