deimos-ruby 1.0.0.pre.beta22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +32 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +752 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos/active_record_consumer.rb +81 -0
  24. data/lib/deimos/active_record_producer.rb +64 -0
  25. data/lib/deimos/avro_data_coder.rb +89 -0
  26. data/lib/deimos/avro_data_decoder.rb +36 -0
  27. data/lib/deimos/avro_data_encoder.rb +51 -0
  28. data/lib/deimos/backends/db.rb +27 -0
  29. data/lib/deimos/backends/kafka.rb +27 -0
  30. data/lib/deimos/backends/kafka_async.rb +27 -0
  31. data/lib/deimos/configuration.rb +90 -0
  32. data/lib/deimos/consumer.rb +164 -0
  33. data/lib/deimos/instrumentation.rb +71 -0
  34. data/lib/deimos/kafka_message.rb +27 -0
  35. data/lib/deimos/kafka_source.rb +126 -0
  36. data/lib/deimos/kafka_topic_info.rb +86 -0
  37. data/lib/deimos/message.rb +74 -0
  38. data/lib/deimos/metrics/datadog.rb +47 -0
  39. data/lib/deimos/metrics/mock.rb +39 -0
  40. data/lib/deimos/metrics/provider.rb +38 -0
  41. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  42. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  43. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  44. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  45. data/lib/deimos/producer.rb +218 -0
  46. data/lib/deimos/publish_backend.rb +30 -0
  47. data/lib/deimos/railtie.rb +8 -0
  48. data/lib/deimos/schema_coercer.rb +108 -0
  49. data/lib/deimos/shared_config.rb +59 -0
  50. data/lib/deimos/test_helpers.rb +356 -0
  51. data/lib/deimos/tracing/datadog.rb +35 -0
  52. data/lib/deimos/tracing/mock.rb +40 -0
  53. data/lib/deimos/tracing/provider.rb +31 -0
  54. data/lib/deimos/utils/db_producer.rb +122 -0
  55. data/lib/deimos/utils/executor.rb +117 -0
  56. data/lib/deimos/utils/inline_consumer.rb +144 -0
  57. data/lib/deimos/utils/lag_reporter.rb +182 -0
  58. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  59. data/lib/deimos/utils/signal_handler.rb +68 -0
  60. data/lib/deimos/version.rb +5 -0
  61. data/lib/deimos.rb +133 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +27 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +120 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +259 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
data/README.md ADDED
@@ -0,0 +1,752 @@
1
+ <p align="center">
2
+ <img src="support/deimos-with-name.png" title="Deimos logo"/>
3
+ <br/>
4
+ <img src="https://img.shields.io/circleci/build/github/flipp-oss/deimos.svg" alt="CircleCI"/>
5
+ <a href="https://badge.fury.io/rb/deimos"><img src="https://badge.fury.io/rb/deimos.svg" alt="Gem Version" height="18"></a>
6
+ <img src="https://img.shields.io/codeclimate/maintainability/flipp-oss/deimos.svg"/>
7
+ </p>
8
+
9
+ A Ruby framework for marrying Kafka, Avro, and/or ActiveRecord and provide
10
+ a useful toolbox of goodies for Ruby-based Kafka development.
11
+ Built on Phobos and hence Ruby-Kafka.
12
+
13
+ <!--ts-->
14
+ * [Installation](#installation)
15
+ * [Versioning](#versioning)
16
+ * [Configuration](#configuration)
17
+ * [Producers](#producers)
18
+ * [Auto-added Fields](#auto-added-fields)
19
+ * [Coerced Values](#coerced-values)
20
+ * [Instrumentation](#instrumentation)
21
+ * [Kafka Message Keys](#kafka-message-keys)
22
+ * [Consumers](#consumers)
23
+ * [Rails Integration](#rails-integration)
24
+ * [Running Consumers](#running-consumers)
25
+ * [Metrics](#metrics)
26
+ * [Testing](#testing)
27
+ * [Integration Test Helpers](#integration-test-helpers)
28
+ * [Contributing](#contributing)
29
+ <!--te-->
30
+
31
+ # Installation
32
+
33
+ Add this line to your application's Gemfile:
34
+ ```ruby
35
+ gem 'deimos-ruby'
36
+ ```
37
+
38
+ And then execute:
39
+
40
+ $ bundle
41
+
42
+ Or install it yourself as:
43
+
44
+ $ gem install deimos-ruby
45
+
46
+ # Versioning
47
+
48
+ We use version of semver for this gem. Any change in previous behavior
49
+ (something works differently or something old no longer works)
50
+ is denoted with a bump in the minor version (0.4 -> 0.5). Patch versions
51
+ are for bugfixes or new functionality which does not affect existing code. You
52
+ should be locking your Gemfile to the minor version:
53
+
54
+ ```ruby
55
+ gem 'deimos-ruby', '~> 1.1'
56
+ ```
57
+
58
+ # Configuration
59
+
60
+ To configure the gem, use `configure` in an initializer:
61
+
62
+ ```ruby
63
+ Deimos.configure do |config|
64
+ # Configure logger
65
+ config.logger = Rails.logger
66
+
67
+ # Phobos settings
68
+ config.phobos_config_file = 'config/phobos.yml'
69
+ config.schema_registry_url = 'https://my-schema-registry.com'
70
+ config.seed_broker = 'my.seed.broker.0.net:9093,my.seed.broker.1.net:9093'
71
+ config.ssl_enabled = ENV['KAFKA_SSL_ENABLED']
72
+ if config.ssl_enabled
73
+ config.ssl_ca_cert = File.read(ENV['SSL_CA_CERT'])
74
+ config.ssl_client_cert = File.read(ENV['SSL_CLIENT_CERT'])
75
+ config.ssl_client_cert_key = File.read(ENV['SSL_CLIENT_CERT_KEY'])
76
+ end
77
+
78
+ # Other settings
79
+
80
+ # Local path to find schemas, for publishing and testing consumers
81
+ config.schema_path = "#{Rails.root}/app/schemas"
82
+
83
+ # Default namespace for producers to use
84
+ config.producer_schema_namespace = 'com.deimos.my_app'
85
+
86
+ # Prefix for all topics, e.g. environment name
87
+ config.producer_topic_prefix = 'myenv.'
88
+
89
+ # Disable all producers - e.g. when doing heavy data lifting and events
90
+ # would be fired a different way
91
+ config.disable_producers = true
92
+
93
+ # Default behavior is to swallow uncaught exceptions and log to DataDog.
94
+ # Set this to true to instead raise all errors. Note that raising an error
95
+ # will ensure that the message cannot be processed - if there is a bad
96
+ # message which will always raise that error, your consumer will not
97
+ # be able to proceed past it and will be stuck forever until you fix
98
+ # your code.
99
+ config.reraise_consumer_errors = true
100
+
101
+ # Set to true to send consumer lag metrics
102
+ config.report_lag = %w(production staging).include?(Rails.env)
103
+
104
+ # Change the default backend. See Backends, below.
105
+ config.backend = :db
106
+
107
+ # If the DB backend is being used, specify the number of threads to create
108
+ # to process the DB messages.
109
+ config.num_producer_threads = 1
110
+
111
+ # Configure the metrics provider (see below).
112
+ config.metrics = Deimos::Metrics::Mock.new({ tags: %w(env:prod my_tag:another_1) })
113
+
114
+ # Configure the tracing provider (see below).
115
+ config.tracer = Deimos::Tracing::Mock.new({service_name: 'my-service'})
116
+ end
117
+ ```
118
+
119
+ Note that the configuration options from Phobos (seed_broker and the SSL settings)
120
+ can be removed from `phobos.yml` since Deimos will load them instead.
121
+
122
+ # Producers
123
+
124
+ Producers will look like this:
125
+
126
+ ```ruby
127
+ class MyProducer < Deimos::Producer
128
+
129
+ # Can override default namespace.
130
+ namespace 'com.deimos.my-app-special'
131
+ topic 'MyApp.MyTopic'
132
+ schema 'MySchema'
133
+ key_config field: 'my_field' # see Kafka Message Keys, below
134
+
135
+ # If config.schema_path is app/schemas, assumes there is a file in
136
+ # app/schemas/com/deimos/my-app-special/MySchema.avsc
137
+
138
+ class << self
139
+
140
+ # Optionally override the default partition key logic, which is to use
141
+ # the payload key if it's provided, and nil if there is no payload key.
142
+ def partition_key(payload)
143
+ payload[:my_id]
144
+ end
145
+
146
+ # You can call publish / publish_list directly, or create new methods
147
+ # wrapping them.
148
+
149
+ def send_some_message(an_object)
150
+ payload = {
151
+ 'some-key' => an_object.foo,
152
+ 'some-key2' => an_object.bar
153
+ }
154
+ # You can also publish an array with self.publish_list(payloads)
155
+ self.publish(payload)
156
+ end
157
+
158
+ end
159
+
160
+
161
+ end
162
+ ```
163
+
164
+ ### Auto-added Fields
165
+
166
+ If your schema has a field called `message_id`, and the payload you give
167
+ your producer doesn't have this set, Deimos will auto-generate
168
+ a message ID. It is highly recommended to give all schemas a message_id
169
+ so that you can track each sent message via logging.
170
+
171
+ You can also provide a field in your schema called `timestamp` which will be
172
+ auto-filled with the current timestamp if not provided.
173
+
174
+ ### Coerced Values
175
+
176
+ Deimos will do some simple coercions if you pass values that don't
177
+ exactly match the schema.
178
+
179
+ * If the schema is :int or :long, any integer value, or a string representing
180
+ an integer, will be parsed to Integer.
181
+ * If the schema is :float or :double, any numeric value, or a string
182
+ representing a number, will be parsed to Float.
183
+ * If the schema is :string, if the value implements its own `to_s` method,
184
+ this will be called on it. This includes hashes, symbols, numbers, dates, etc.
185
+
186
+ ### Instrumentation
187
+
188
+ Deimos will send ActiveSupport Notifications.
189
+ You can listen to these notifications e.g. as follows:
190
+
191
+ ```ruby
192
+ Deimos.subscribe('produce') do |event|
193
+ # event is an ActiveSupport::Notifications::Event
194
+ # you can access time, duration, and transaction_id
195
+ # payload contains :producer, :topic, and :payloads
196
+ data = event.payload
197
+ end
198
+ ```
199
+
200
+ The following events are also produced:
201
+
202
+ * `produce_error` - sent when an error occurs when producing a message.
203
+ * producer - the class that produced the message
204
+ * topic
205
+ * exception_object
206
+ * payloads - the unencoded payloads
207
+ * `encode_messages` - sent when messages are being Avro-encoded.
208
+ * producer - the class that produced the message
209
+ * topic
210
+ * payloads - the unencoded payloads
211
+
212
+ Similarly:
213
+ ```ruby
214
+ Deimos.subscribe('produce_error') do |event|
215
+ data = event.payloads
216
+ Mail.send("Got an error #{event.exception_object.message} on topic #{data[:topic]} with payloads #{data[:payloads]}")
217
+ end
218
+
219
+ Deimos.subscribe('encode_messages') do |event|
220
+ # ...
221
+ end
222
+ ```
223
+
224
+ ### Kafka Message Keys
225
+
226
+ Topics representing events rather than domain data don't need keys. However,
227
+ best practice for domain messages is to Avro-encode message keys
228
+ with a separate Avro schema.
229
+
230
+ This enforced by requiring producers to define a `key_config` directive. If
231
+ any message comes in with a key, the producer will error out if `key_config` is
232
+ not defined.
233
+
234
+ There are three possible configurations to use:
235
+
236
+ * `key_config none: true` - this indicates that you are not using keys at all
237
+ for this topic. This *must* be set if your messages won't have keys - either
238
+ all your messages in a topic need to have a key, or they all need to have
239
+ no key. This is a good choice for events that aren't keyed - you can still
240
+ set a partition key.
241
+ * `key_config plain: true` - this indicates that you are not using an Avro-encoded
242
+ key. Use this for legacy topics - new topics should not use this setting.
243
+ * `key_config schema: 'MyKeySchema-key'` - this tells the producer to look for
244
+ an existing key schema named `MyKeySchema-key` in the schema registry and to
245
+ encode the key using it. Use this if you've already created a key schema
246
+ or the key value does not exist in the existing payload
247
+ (e.g. it is a compound or generated key).
248
+ * `key_config field: 'my_field'` - this tells the producer to look for a field
249
+ named `my_field` in the value schema. When a payload comes in, the producer
250
+ will take that value from the payload and insert it in a *dynamically generated*
251
+ key schema. This key schema does not need to live in your codebase. Instead,
252
+ it will be a subset of the value schema with only the key field in it.
253
+
254
+ If your value schema looks like this:
255
+ ```javascript
256
+ {
257
+ "namespace": "com.my-namespace",
258
+ "name": "MySchema",
259
+ "type": "record",
260
+ "doc": "Test schema",
261
+ "fields": [
262
+ {
263
+ "name": "test_id",
264
+ "type": "string",
265
+ "doc": "test string"
266
+ },
267
+ {
268
+ "name": "some_int",
269
+ "type": "int",
270
+ "doc": "test int"
271
+ }
272
+ ]
273
+ }
274
+ ```
275
+
276
+ ...setting `key_config field: 'test_id'` will create a key schema that looks
277
+ like this:
278
+
279
+ ```javascript
280
+ {
281
+ "namespace": "com.my-namespace",
282
+ "name": "MySchema-key",
283
+ "type": "record",
284
+ "doc": "Key for com.my-namespace.MySchema",
285
+ "fields": [
286
+ {
287
+ "name": "test_id",
288
+ "type": "string",
289
+ "doc": "test string"
290
+ }
291
+ ]
292
+ }
293
+ ```
294
+
295
+ If you publish a payload `{ "test_id" => "123", "some_int" => 123 }`, this
296
+ will be turned into a key that looks like `{ "test_id" => "123"}` and encoded
297
+ via Avro before being sent to Kafka.
298
+
299
+ If you are using `plain` or `schema` as your config, you will need to have a
300
+ special `payload_key` key to your payload hash. This will be extracted and
301
+ used as the key (for `plain`, it will be used directly, while for `schema`
302
+ it will be encoded first against the schema). So your payload would look like
303
+ `{ "test_id" => "123", "some_int" => 123, payload_key: "some_other_key"}`.
304
+ Remember that if you're using `schema`, the `payload_key` must be a *hash*,
305
+ not a plain value.
306
+
307
+ # Consumers
308
+
309
+ Here is a sample consumer:
310
+
311
+ ```ruby
312
+ class MyConsumer < Deimos::Consumer
313
+
314
+ # These are optional but strongly recommended for testing purposes; this
315
+ # will validate against a local schema file used as the reader schema,
316
+ # as well as being able to write tests against this schema.
317
+ # This is recommended since it ensures you are always getting the values
318
+ # you expect.
319
+ schema 'MySchema'
320
+ namespace 'com.my-namespace'
321
+ # This directive works identically to the producer - see Kafka Keys, above.
322
+ # This only affects the `decode_key` method below. You need to provide
323
+ # `schema` and `namespace`, above, for this to work.
324
+ key_config field: :my_id
325
+
326
+ def consume(payload, metadata)
327
+ # Same method as Phobos consumers.
328
+ # payload is an Avro-decoded hash.
329
+ # Metadata is a hash that contains information like :key and :topic. Both
330
+ # key (if configured) and payload will be Avro-decoded.
331
+ end
332
+ end
333
+ ```
334
+
335
+ # Rails Integration
336
+
337
+ ### Producing
338
+
339
+ Deimos comes with an ActiveRecordProducer. This takes a single or
340
+ list of ActiveRecord objects or hashes and maps it to the given schema.
341
+
342
+ An example would look like this:
343
+
344
+ ```ruby
345
+ class MyProducer < Deimos::ActiveRecordProducer
346
+
347
+ topic 'MyApp.MyTopic'
348
+ schema 'MySchema'
349
+ key_config field: 'my_field'
350
+
351
+ # The record class should be set on every ActiveRecordProducer.
352
+ # By default, if you give the producer a hash, it will re-fetch the
353
+ # record itself for use in the payload generation. This can be useful
354
+ # if you pass a list of hashes to the method e.g. as part of a
355
+ # mass import operation. You can turn off this behavior (e.g. if you're just
356
+ # using the default functionality and don't need to override it)
357
+ # by setting `refetch` to false. This will avoid extra database fetches.
358
+ record_class Widget, refetch: false
359
+
360
+ # Optionally override this if you want the message to be
361
+ # sent even if fields that aren't in the schema are changed.
362
+ def watched_attributes
363
+ super + ['a_non_schema_attribute']
364
+ end
365
+
366
+ # If you want to just use the default functionality you can leave this
367
+ # method out entirely. You only need to use it if you want to massage
368
+ # the payload in some way, e.g. adding fields that don't exist on the
369
+ # record itself.
370
+ def generate_payload(attributes, record)
371
+ super # generates payload based on the record and schema
372
+ end
373
+
374
+ end
375
+
376
+ # or `send_event` with just one Widget
377
+ MyProducer.send_events([Widget.new(foo: 1), Widget.new(foo: 2)])
378
+ MyProducer.send_events([{foo: 1}, {foo: 2}])
379
+ ```
380
+
381
+ #### Disabling Producers
382
+
383
+ You can disable producers globally or inside a block. Globally:
384
+ ```ruby
385
+ Deimos.config.disable_producers = true
386
+ ```
387
+
388
+ For the duration of a block:
389
+ ```ruby
390
+ Deimos.disable_producers do
391
+ # code goes here
392
+ end
393
+ ```
394
+
395
+ For specific producers only:
396
+ ```ruby
397
+ Deimos.disable_producers(Producer1, Producer2) do
398
+ # code goes here
399
+ end
400
+ ```
401
+
402
+ #### KafkaSource
403
+
404
+ There is a special mixin which can be added to any ActiveRecord class. This
405
+ will create callbacks which will automatically send messages to Kafka whenever
406
+ this class is saved. This even includes using the [activerecord-import](https://github.com/zdennis/activerecord-import) gem
407
+ to import objects (including using `on_duplicate_key_update`). However,
408
+ it will *not* work for `update_all`, `delete` or `delete_all`, and naturally
409
+ will not fire if using pure SQL or Arel.
410
+
411
+ Note that these messages are sent *during the transaction*, i.e. using
412
+ `after_create`, `after_update` and `after_destroy`. If there are
413
+ questions of consistency between the database and Kafka, it is recommended
414
+ to switch to using the DB backend (see next section) to avoid these issues.
415
+
416
+ When the object is destroyed, an empty payload with a payload key consisting of
417
+ the record's primary key is sent to the producer. If your topic's key is
418
+ from another field, you will need to override the `deletion_payload` method.
419
+
420
+ ```ruby
421
+ class Widget < ActiveRecord::Base
422
+ include Deimos::KafkaSource
423
+
424
+ # Class method that defines an ActiveRecordProducer(s) to take the object
425
+ # and turn it into a payload.
426
+ def self.kafka_producers
427
+ [MyProducer]
428
+ end
429
+
430
+ def deletion_payload
431
+ { payload_key: self.uuid }
432
+ end
433
+
434
+ # Optional - indicate that you want to send messages when these events
435
+ # occur.
436
+ def self.kafka_config
437
+ {
438
+ :update => true,
439
+ :delete => true,
440
+ :import => true,
441
+ :create => true
442
+ }
443
+ end
444
+
445
+ end
446
+ ```
447
+
448
+ #### Database Backend
449
+
450
+ Deimos provides a way to allow Kafka messages to be created inside a
451
+ database transaction, and send them asynchronously. This ensures that your
452
+ database transactions and Kafka messages related to those transactions
453
+ are always in sync. Essentially, it separates the message logic so that a
454
+ message is first validated, encoded, and saved in the database, and then sent
455
+ on a separate thread. This means if you have to roll back your transaction,
456
+ it also rolls back your Kafka messages.
457
+
458
+ This is also known as the [Transactional Outbox pattern](https://microservices.io/patterns/data/transactional-outbox.html).
459
+
460
+ To enable this, first generate the migration to create the relevant tables:
461
+
462
+ rails g deimos:db_backend
463
+
464
+ You can now set the following configuration:
465
+
466
+ config.publish_backend = :db
467
+
468
+ This will save all your Kafka messages to the `kafka_messages` table instead
469
+ of immediately sending to Kafka. Now, you just need to call
470
+
471
+ Deimos.start_db_backend!
472
+
473
+ If using Rails, you can use a Rake task to do this:
474
+
475
+ rails deimos:db_producer
476
+
477
+ This creates one or more threads dedicated to scanning and publishing these
478
+ messages by using the `kafka_topics` table in a manner similar to
479
+ [Delayed Job](https://github.com/collectiveidea/delayed_job).
480
+ You can pass in a number of threads to the method:
481
+
482
+ Deimos.start_db_backend!(thread_count: 2) # OR
483
+ THREAD_COUNT=5 rails deimos:db_producer
484
+
485
+ If you want to force a message to send immediately, just call the `publish_list`
486
+ method with `force_send: true`. You can also pass `force_send` into any of the
487
+ other methods that publish events, like `send_event` in `ActiveRecordProducer`.
488
+
489
+ For more information on how the database backend works and why it was
490
+ implemented, please see [Database Backends](docs/DATABASE_BACKEND.md).
491
+
492
+ ### Consuming
493
+
494
+ Deimos provides an ActiveRecordConsumer which will take a payload
495
+ and automatically save it to a provided model. It will take the intersection
496
+ of the payload fields and the model attributes, and either create a new record
497
+ or update an existing record. It will use the message key to find the record
498
+ in the database.
499
+
500
+ To delete a record, simply produce a message with the record's ID as the message
501
+ key and a null payload.
502
+
503
+ Note that to retrieve the key, you must specify the correct [key encoding](#kafka-message-keys)
504
+ configuration.
505
+
506
+ A sample consumer would look as follows:
507
+
508
+ ```ruby
509
+ class MyConsumer < Deimos::ActiveRecordConsumer
510
+
511
+ schema 'MySchema'
512
+ key_config field: 'my_field'
513
+ record_class Widget
514
+
515
+ # Optional override of the default behavior, which is to call `destroy`
516
+ # on the record - e.g. you can replace this with "archiving" the record
517
+ # in some way.
518
+ def destroy_record(record)
519
+ super
520
+ end
521
+
522
+ # Optional override to change the attributes of the record before they
523
+ # are saved.
524
+ def record_attributes(payload)
525
+ super.merge(:some_field => 'some_value')
526
+ end
527
+ end
528
+ ```
529
+
530
+ ## Running consumers
531
+
532
+ Deimos includes a rake task. Once it's in your gemfile, just run
533
+
534
+ rake deimos:start
535
+
536
+ This will automatically set an environment variable called `DEIMOS_RAKE_TASK`,
537
+ which can be useful if you want to figure out if you're inside the task
538
+ as opposed to running your Rails server or console. E.g. you could start your
539
+ DB backend only when your rake task is running.
540
+
541
+ # Metrics
542
+
543
+ Deimos includes some metrics reporting out the box. It ships with DataDog support, but you can add custom metric providers as well.
544
+
545
+ The following metrics are reported:
546
+ * `consumer_lag` - for each partition, the number of messages
547
+ it's behind the tail of the partition (a gauge). This is only sent if
548
+ `config.report_lag` is set to true.
549
+ * `handler` - a count of the number of messages received. Tagged
550
+ with the following:
551
+ * `topic:{topic_name}`
552
+ * `status:received`
553
+ * `status:success`
554
+ * `status:error`
555
+ * `time:consume` (histogram)
556
+ * `time:time_delayed` (histogram)
557
+ * `publish` - a count of the number of messages received. Tagged
558
+ with `topic:{topic_name}`
559
+ * `publish_error` - a count of the number of messages which failed
560
+ to publish. Tagged with `topic:{topic_name}`
561
+ * `pending_db_messages_max_wait` - the number of seconds which the
562
+ oldest KafkaMessage in the database has been waiting for, for use
563
+ with the database backend.
564
+
565
+ ### Configuring Metrics Providers
566
+
567
+ See the `# Configure Metrics Provider` section under [Configuration](#configuration)
568
+ View all available Metrics Providers [here](lib/deimos/metrics/metrics_providers)
569
+
570
+ ### Custom Metrics Providers
571
+
572
+ Using the above configuration, it is possible to pass in any generic Metrics
573
+ Provider class as long as it exposes the methods and definitions expected by
574
+ the Metrics module.
575
+ The easiest way to do this is to inherit from the `Metrics::Provider` class
576
+ and implement the methods in it.
577
+
578
+ See the [Mock provider](lib/deimos/metrics/mock.rb) as an example. It implements a constructor which receives config, plus the required metrics methods.
579
+
580
+ Also see [deimos.rb](lib/deimos.rb) under `Configure metrics` to see how the metrics module is called.
581
+
582
+ # Tracing
583
+
584
+ Deimos also includes some tracing for kafka consumers. It ships with
585
+ DataDog support, but you can add custom tracing providers as well.
586
+
587
+ Trace spans are used for when incoming messages are avro decoded, and a
588
+ separate span for message consume logic.
589
+
590
+ ### Configuring Tracing Providers
591
+
592
+ See the `# Configure Tracing Provider` section under [Configuration](#configuration)
593
+ View all available Tracing Providers [here](lib/deimos/tracing)
594
+
595
+ ### Custom Tracing Providers
596
+
597
+ Using the above configuration, it is possible to pass in any generic Tracing
598
+ Provider class as long as it exposes the methods and definitions expected by
599
+ the Tracing module.
600
+ The easiest way to do this is to inherit from the `Tracing::Provider` class
601
+ and implement the methods in it.
602
+
603
+ See the [Mock provider](lib/deimos/tracing/mock.rb) as an example. It implements a constructor which receives config, plus the required tracing methods.
604
+
605
+ Also see [deimos.rb](lib/deimos.rb) under `Configure tracing` to see how the tracing module is called.
606
+
607
+ # Testing
608
+
609
+ Deimos comes with a test helper class which automatically stubs out
610
+ external calls (like metrics and tracing providers and the schema
611
+ registry) and provides useful methods for testing consumers.
612
+
613
+ In `spec_helper.rb`:
614
+ ```ruby
615
+ RSpec.configure do |config|
616
+ config.include Deimos::TestHelpers
617
+ config.before(:each) do
618
+ stub_producers_and_consumers!
619
+ end
620
+ end
621
+ ```
622
+
623
+ In your test, you now have the following methods available:
624
+ ```ruby
625
+ # Pass a consumer class (not instance) to validate a payload against it.
626
+ # This will fail if the payload does not match the schema the consumer
627
+ # is set up to consume.
628
+ test_consume_message(MyConsumer,
629
+ { 'some-payload' => 'some-value' }) do |payload, metadata|
630
+ # do some expectation handling here
631
+ end
632
+
633
+ # You can also pass a topic name instead of the consumer class as long
634
+ # as the topic is configured in your phobos.yml configuration:
635
+ test_consume_message('my-topic-name',
636
+ { 'some-payload' => 'some-value' }) do |payload, metadata|
637
+ # do some expectation handling here
638
+ end
639
+
640
+ # Alternatively, you can test the actual consume logic:
641
+ test_consume_message(MyConsumer,
642
+ { 'some-payload' => 'some-value' },
643
+ call_original: true)
644
+
645
+ # Test that a given payload is invalid against the schema:
646
+ test_consume_invalid_message(MyConsumer,
647
+ { 'some-invalid-payload' => 'some-value' })
648
+
649
+ # A matcher which allows you to test that a message was sent on the given
650
+ # topic, without having to know which class produced it.
651
+ expect(topic_name).to have_sent(payload, key=nil)
652
+
653
+ # Inspect sent messages
654
+ message = Deimos::TestHelpers.sent_messages[0]
655
+ expect(message).to eq({
656
+ message: {'some-key' => 'some-value'},
657
+ topic: 'my-topic',
658
+ key: 'my-id'
659
+ })
660
+ ```
661
+
662
+ **Important note:** To use the `have_sent` helper, your producers need to be
663
+ loaded / required *before* starting the test. You can do this in your
664
+ `spec_helper` file, or if you are defining producers dynamically, you can
665
+ add an `RSpec.prepend_before(:each)` block where you define the producer.
666
+ Alternatively, you can use the `stub_producer` and `stub_consumer` methods
667
+ in your test.
668
+
669
+ There is also a helper method that will let you test if an existing schema
670
+ would be compatible with a new version of it. You can use this in your
671
+ Ruby console but it would likely not be part of your RSpec test:
672
+
673
+ ```ruby
674
+ require 'deimos/test_helpers'
675
+ # Can pass a file path, a string or a hash into this:
676
+ Deimos::TestHelpers.schemas_compatible?(schema1, schema2)
677
+ ```
678
+
679
+ ### Integration Test Helpers
680
+
681
+ You can use the `InlineConsumer` class to help with integration testing,
682
+ with a full external Kafka running.
683
+
684
+ If you have a consumer you want to test against messages in a Kafka topic,
685
+ use the `consume` method:
686
+ ```ruby
687
+ Deimos::Utils::InlineConsumer.consume(
688
+ topic: 'my-topic',
689
+ frk_consumer: MyConsumerClass,
690
+ num_messages: 5
691
+ )
692
+ ```
693
+
694
+ This is a _synchronous_ call which will run the consumer against the
695
+ last 5 messages in the topic. You can set `num_messages` to a number
696
+ like `1_000_000` to always consume all the messages. Once the last
697
+ message is retrieved, the process will wait 1 second to make sure
698
+ they're all done, then continue execution.
699
+
700
+ If you just want to retrieve the contents of a topic, you can use
701
+ the `get_messages_for` method:
702
+
703
+ ```ruby
704
+ Deimos::Utils::InlineConsumer.get_messages_for(
705
+ topic: 'my-topic',
706
+ schema: 'my-schema',
707
+ namespace: 'my.namespace',
708
+ key_config: { field: 'id' },
709
+ num_messages: 5
710
+ )
711
+ ```
712
+
713
+ This will run the process and simply return the last 5 messages on the
714
+ topic, as hashes, once it's done. The format of the messages will simply
715
+ be
716
+ ```ruby
717
+ {
718
+ payload: { key: value }, # payload hash here
719
+ key: "some_value" # key value or hash here
720
+ }
721
+ ```
722
+
723
+ Both payload and key will be Avro-decoded as necessary according to the
724
+ key config.
725
+
726
+ You can also just pass an existing producer or consumer class into the method,
727
+ and it will extract the necessary configuration from it:
728
+
729
+ ```ruby
730
+ Deimos::Utils::InlineConsumer.get_messages_for(
731
+ topic: 'my-topic',
732
+ config_class: MyProducerClass,
733
+ num_messages: 5
734
+ )
735
+ ```
736
+
737
+ ## Contributing
738
+
739
+ Bug reports and pull requests are welcome on GitHub at https://github.com/flipp-oss/deimos .
740
+
741
+ ### Linting
742
+
743
+ Deimos uses Rubocop to lint the code. Please run Rubocop on your code
744
+ before submitting a PR.
745
+
746
+ ---
747
+ <p align="center">
748
+ Sponsored by<br/>
749
+ <a href="https://corp.flipp.com/">
750
+ <img src="support/flipp-logo.png" title="Flipp logo" style="border:none;"/>
751
+ </a>
752
+ </p>