deimos-ruby 1.24.2 → 2.0.0.pre.alpha1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +0 -17
  3. data/.tool-versions +1 -0
  4. data/CHANGELOG.md +5 -0
  5. data/README.md +287 -498
  6. data/deimos-ruby.gemspec +4 -4
  7. data/docs/CONFIGURATION.md +133 -226
  8. data/docs/UPGRADING.md +237 -0
  9. data/lib/deimos/active_record_consume/batch_consumption.rb +29 -28
  10. data/lib/deimos/active_record_consume/mass_updater.rb +59 -4
  11. data/lib/deimos/active_record_consume/message_consumption.rb +15 -21
  12. data/lib/deimos/active_record_consumer.rb +36 -21
  13. data/lib/deimos/active_record_producer.rb +28 -9
  14. data/lib/deimos/backends/base.rb +4 -35
  15. data/lib/deimos/backends/kafka.rb +6 -22
  16. data/lib/deimos/backends/kafka_async.rb +6 -22
  17. data/lib/deimos/backends/{db.rb → outbox.rb} +13 -9
  18. data/lib/deimos/config/configuration.rb +116 -379
  19. data/lib/deimos/consume/batch_consumption.rb +24 -124
  20. data/lib/deimos/consume/message_consumption.rb +36 -63
  21. data/lib/deimos/consumer.rb +16 -75
  22. data/lib/deimos/ext/consumer_route.rb +35 -0
  23. data/lib/deimos/ext/producer_middleware.rb +94 -0
  24. data/lib/deimos/ext/producer_route.rb +22 -0
  25. data/lib/deimos/ext/redraw.rb +29 -0
  26. data/lib/deimos/ext/routing_defaults.rb +72 -0
  27. data/lib/deimos/ext/schema_route.rb +70 -0
  28. data/lib/deimos/kafka_message.rb +2 -2
  29. data/lib/deimos/kafka_source.rb +2 -7
  30. data/lib/deimos/kafka_topic_info.rb +1 -1
  31. data/lib/deimos/logging.rb +71 -0
  32. data/lib/deimos/message.rb +2 -11
  33. data/lib/deimos/metrics/datadog.rb +40 -1
  34. data/lib/deimos/metrics/provider.rb +4 -4
  35. data/lib/deimos/producer.rb +39 -116
  36. data/lib/deimos/railtie.rb +6 -0
  37. data/lib/deimos/schema_backends/avro_base.rb +21 -21
  38. data/lib/deimos/schema_backends/avro_schema_registry.rb +1 -2
  39. data/lib/deimos/schema_backends/avro_validation.rb +2 -2
  40. data/lib/deimos/schema_backends/base.rb +19 -12
  41. data/lib/deimos/schema_backends/mock.rb +6 -1
  42. data/lib/deimos/schema_backends/plain.rb +47 -0
  43. data/lib/deimos/schema_class/base.rb +2 -2
  44. data/lib/deimos/schema_class/enum.rb +1 -1
  45. data/lib/deimos/schema_class/record.rb +2 -2
  46. data/lib/deimos/test_helpers.rb +95 -320
  47. data/lib/deimos/tracing/provider.rb +6 -6
  48. data/lib/deimos/transcoder.rb +88 -0
  49. data/lib/deimos/utils/db_poller/base.rb +16 -14
  50. data/lib/deimos/utils/db_poller/state_based.rb +3 -3
  51. data/lib/deimos/utils/db_poller/time_based.rb +4 -4
  52. data/lib/deimos/utils/db_poller.rb +1 -1
  53. data/lib/deimos/utils/deadlock_retry.rb +1 -1
  54. data/lib/deimos/utils/{db_producer.rb → outbox_producer.rb} +16 -47
  55. data/lib/deimos/utils/schema_class.rb +0 -7
  56. data/lib/deimos/version.rb +1 -1
  57. data/lib/deimos.rb +79 -26
  58. data/lib/generators/deimos/{db_backend_generator.rb → outbox_backend_generator.rb} +4 -4
  59. data/lib/generators/deimos/schema_class_generator.rb +0 -1
  60. data/lib/generators/deimos/v2/templates/karafka.rb.tt +149 -0
  61. data/lib/generators/deimos/v2_generator.rb +193 -0
  62. data/lib/tasks/deimos.rake +5 -7
  63. data/spec/active_record_batch_consumer_association_spec.rb +22 -13
  64. data/spec/active_record_batch_consumer_spec.rb +84 -65
  65. data/spec/active_record_consume/batch_consumption_spec.rb +10 -10
  66. data/spec/active_record_consume/batch_slicer_spec.rb +12 -12
  67. data/spec/active_record_consume/mass_updater_spec.rb +137 -0
  68. data/spec/active_record_consumer_spec.rb +29 -13
  69. data/spec/active_record_producer_spec.rb +36 -26
  70. data/spec/backends/base_spec.rb +0 -23
  71. data/spec/backends/kafka_async_spec.rb +1 -3
  72. data/spec/backends/kafka_spec.rb +1 -3
  73. data/spec/backends/{db_spec.rb → outbox_spec.rb} +14 -20
  74. data/spec/batch_consumer_spec.rb +66 -116
  75. data/spec/consumer_spec.rb +53 -147
  76. data/spec/deimos_spec.rb +10 -126
  77. data/spec/kafka_source_spec.rb +19 -52
  78. data/spec/karafka/karafka.rb +69 -0
  79. data/spec/karafka_config/karafka_spec.rb +97 -0
  80. data/spec/logging_spec.rb +25 -0
  81. data/spec/message_spec.rb +9 -9
  82. data/spec/producer_spec.rb +112 -254
  83. data/spec/rake_spec.rb +1 -3
  84. data/spec/schema_backends/avro_validation_spec.rb +1 -1
  85. data/spec/schemas/com/my-namespace/MySchemaWithTitle.avsc +22 -0
  86. data/spec/snapshots/consumers-no-nest.snap +49 -0
  87. data/spec/snapshots/consumers.snap +49 -0
  88. data/spec/snapshots/consumers_and_producers-no-nest.snap +49 -0
  89. data/spec/snapshots/consumers_and_producers.snap +49 -0
  90. data/spec/snapshots/consumers_circular-no-nest.snap +49 -0
  91. data/spec/snapshots/consumers_circular.snap +49 -0
  92. data/spec/snapshots/consumers_complex_types-no-nest.snap +49 -0
  93. data/spec/snapshots/consumers_complex_types.snap +49 -0
  94. data/spec/snapshots/consumers_nested-no-nest.snap +49 -0
  95. data/spec/snapshots/consumers_nested.snap +49 -0
  96. data/spec/snapshots/namespace_folders.snap +49 -0
  97. data/spec/snapshots/namespace_map.snap +49 -0
  98. data/spec/snapshots/producers_with_key-no-nest.snap +49 -0
  99. data/spec/snapshots/producers_with_key.snap +49 -0
  100. data/spec/spec_helper.rb +61 -29
  101. data/spec/utils/db_poller_spec.rb +49 -39
  102. data/spec/utils/{db_producer_spec.rb → outbox_producer_spec.rb} +17 -184
  103. metadata +58 -67
  104. data/lib/deimos/batch_consumer.rb +0 -7
  105. data/lib/deimos/config/phobos_config.rb +0 -163
  106. data/lib/deimos/instrumentation.rb +0 -95
  107. data/lib/deimos/monkey_patches/phobos_cli.rb +0 -35
  108. data/lib/deimos/utils/inline_consumer.rb +0 -158
  109. data/lib/deimos/utils/lag_reporter.rb +0 -186
  110. data/lib/deimos/utils/schema_controller_mixin.rb +0 -129
  111. data/spec/config/configuration_spec.rb +0 -321
  112. data/spec/kafka_listener_spec.rb +0 -55
  113. data/spec/phobos.bad_db.yml +0 -73
  114. data/spec/phobos.yml +0 -77
  115. data/spec/utils/inline_consumer_spec.rb +0 -31
  116. data/spec/utils/lag_reporter_spec.rb +0 -76
  117. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  118. data/spec/utils/schema_controller_mixin_spec.rb +0 -84
  119. /data/lib/generators/deimos/{db_backend → outbox_backend}/templates/migration +0 -0
  120. /data/lib/generators/deimos/{db_backend → outbox_backend}/templates/rails3_migration +0 -0
data/deimos-ruby.gemspec CHANGED
@@ -19,9 +19,8 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_runtime_dependency('avro_turf', '>= 1.4', '< 2')
22
- spec.add_runtime_dependency('fig_tree', '~> 0.0.2')
23
- spec.add_runtime_dependency('phobos', '>= 1.9', '< 3.0')
24
- spec.add_runtime_dependency('ruby-kafka', '< 2')
22
+ spec.add_runtime_dependency('karafka', '~> 2.0')
23
+ spec.add_runtime_dependency('fig_tree', '~> 0.2.0')
25
24
  spec.add_runtime_dependency('sigurd', '>= 0.1.0', '< 1.0')
26
25
 
27
26
  spec.add_development_dependency('activerecord-import')
@@ -33,7 +32,8 @@ Gem::Specification.new do |spec|
33
32
  spec.add_development_dependency('guard', '~> 2')
34
33
  spec.add_development_dependency('guard-rspec', '~> 4')
35
34
  spec.add_development_dependency('guard-rubocop', '~> 1')
36
- spec.add_development_dependency('mysql2', '~> 0.5')
35
+ spec.add_development_dependency('karafka-testing', '~> 2.0')
36
+ spec.add_development_dependency('trilogy', '>= 0.1')
37
37
  spec.add_development_dependency('pg', '~> 1.1')
38
38
  spec.add_development_dependency('rails', '~> 6')
39
39
  spec.add_development_dependency('rake', '~> 13')
@@ -1,109 +1,78 @@
1
1
  # Configuration
2
2
 
3
- Deimos supports a succinct, readable syntax which uses
4
- pure Ruby to allow flexible configuration.
3
+ Deimos has two methods of configuration:
4
+
5
+ * Main Deimos configuration, which uses the [FigTree](https://github.com/flipp-oss/fig_tree) gem for its own settings.
6
+ * Karafka routing configuration, which adds extensions to existing [Karafka routes](https://karafka.io/docs/Routing/).
7
+
8
+ The majority of application configuration, including Kafka and `librdkafka` settings, are part of existing [Karafka configuration](https://karafka.io/docs/Configuration/).
9
+
10
+ ## Main Configuration
5
11
 
6
12
  You can access any configuration value via a simple `Deimos.config.whatever`.
7
13
 
8
- Nested configuration is denoted in simple dot notation:
9
- `kafka.ssl.enabled`. Headings below will follow the nested
10
- configurations.
14
+ Nested configuration is denoted in simple dot notation: `schema.path`. Headings below will follow the nested configurations.
11
15
 
12
- ## Base Configuration
13
- Config name| Default |Description
14
- -----------|-----------------------------|-----------
15
- logger| `Logger.new(STDOUT)` |The logger that Deimos will use.
16
- payload_log| `:full` |Determines how much data is logged per payload.</br>`:full` - all keys and payloads are logged.</br>`:keys` - only keys are logged.</br>`:count` - only the total count of messages are logged.
17
- phobos_logger| `Deimos.config.logger` |The logger passed to Phobos.
18
- metrics| `Deimos::Metrics::Mock.new` |The metrics backend use for reporting.
19
- tracer| `Deimos::Tracing::Mock.new` |The tracer backend used for debugging.
16
+ ### Configuration Syntax
20
17
 
21
- ## Defining Producers
18
+ Sample:
22
19
 
23
- You can define a new producer thusly:
24
20
  ```ruby
25
21
  Deimos.configure do
26
- producer do
27
- class_name 'MyProducer'
28
- topic 'MyTopic'
29
- schema 'MyTopicSchema'
30
- namespace 'my.namespace'
31
- key_config field: :id
32
-
33
- # If config.schema.path is app/schemas, assumes there is a file in
34
- # app/schemas/my/namespace/MyTopicSchema.avsc
22
+ metrics { Deimos::Metrics::Datadog.new({host: 'localhost'}) }
23
+ schema.path "#{Rails.root}/app/schemas"
24
+
25
+ # Multiple nested config fields via block
26
+ consumers do
27
+ session_timeout 30
28
+ offset_commit_interval 10
35
29
  end
36
30
  end
37
31
  ```
38
32
 
39
- You can have as many `producer` blocks as you like to define more producers.
33
+ ### Base Configuration
40
34
 
41
- Config name|Default|Description
42
- -----------|-------|-----------
43
- class_name|nil|Class name of the producer class (subclass of `Deimos::Producer`.)
44
- topic|nil|Topic to produce to.
45
- schema|nil|Name of the schema to use to encode data before producing.
46
- namespace|nil|Namespace of the schema to use when finding it locally.
47
- key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation)
48
- use_schema_classes|nil|Set to true or false to enable or disable using the producers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes)
35
+ | Config name | Default | Description |
36
+ |-------------|-----------------------------|----------------------------------------|
37
+ | metrics | `Deimos::Metrics::Mock.new` | The metrics backend use for reporting. |
38
+ | tracer | `Deimos::Tracing::Mock.new` | The tracer backend used for debugging. |
49
39
 
50
- ## Defining Consumers
40
+ Note that all blocks are evaluated in the context of the configuration object.
41
+ If you're calling this inside another class or method, you'll need to save
42
+ things you need to reference into local variables before calling `configure`.
51
43
 
52
- Consumers are defined almost identically to producers:
44
+ ### Producer Configuration
53
45
 
54
- ```ruby
55
- Deimos.configure do
56
- consumer do
57
- class_name 'MyConsumer'
58
- topic 'MyTopic'
59
- schema 'MyTopicSchema'
60
- namespace 'my.namespace'
61
- key_config field: :id
46
+ | Config name | Default | Description |
47
+ |------------------------|----------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
48
+ | producers.topic_prefix | nil | Add a prefix to all topic names. This can be useful if you're using the same Kafka broker for different environments that are producing the same topics. |
49
+ | producers.disabled | false | Disable all actual message producing. Generally more useful to use the `disable_producers` method instead. |
50
+ | producers.backend | `:kafka_async` | Currently can be set to `:db`, `:kafka`, or `:kafka_async`. If using Kafka directly, a good pattern is to set to async in your user-facing app, and sync in your consumers or delayed workers. |
62
51
 
63
- # Setting to :inline_batch will invoke consume_batch instead of consume
64
- # for each batch of messages.
65
- delivery :batch
52
+ ### Schema Configuration
66
53
 
67
- # If config.schema.path is app/schemas, assumes there is a file in
68
- # app/schemas/my/namespace/MyTopicSchema.avsc
69
- end
70
- end
71
- ```
54
+ | Config name | Default | Description |
55
+ |-----------------------------|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
56
+ | schema.backend | `:mock` | Backend representing the schema encoder/decoder. You can see a full list [here](../lib/deimos/schema_backends). |
57
+ | schema.registry_url | `http://localhost:8081` | URL of the Confluent schema registry. |
58
+ | schema.user | nil | Basic auth user. |
59
+ | schema.password | nil | Basic auth password. |
60
+ | schema.path | nil | Local path to find your schemas. |
61
+ | schema.use_schema_classes | false | Set this to true to use generated schema classes in your application. |
62
+ | schema.generated_class_path | `app/lib/schema_classes` | Local path to generated schema classes. |
63
+ | schema.nest_child_schemas | false | Set to true to nest subschemas within the generated class for the parent schema. |
64
+ | schema.use_full_namespace | false | Set to true to generate folders for schemas matching the full namespace. |
65
+ | schema.schema_namespace_map | {} | A map of namespace prefixes to base module name(s). Example: { 'com.mycompany.suborg' => ['SchemaClasses'] }. Requires `use_full_namespace` to be true. |
66
+
67
+ ### Outbox Configuration
68
+
69
+ | Config name | Default | Description |
70
+ |-----------------------|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
71
+ | outbox.logger | `Deimos.config.logger` | Logger to use inside the DB producer. |
72
+ | outbox.log_topics | `[]` | List of topics to print full messages for, or `:all` to print all topics. This can introduce slowdown since it needs to decode each message using the schema registry. |
73
+ | outbox.compact_topics | `[]` | List of topics to compact before sending, i.e. only send the last message with any given key in a batch. This is an optimization which mirrors what Kafka itself will do with compaction turned on but only within a single batch. You can also specify `:all` to compact all topics. |
72
74
 
73
- In addition to the producer configs, you can define a number of overrides
74
- to the basic consumer configuration for each consumer. This is analogous to
75
- the `listener` config in `phobos.yml`.
76
-
77
- Config name|Default|Description
78
- -----------|-------|-----------
79
- class_name|nil|Class name of the consumer class (subclass of `Deimos::Consumer`.)
80
- topic|nil|Topic to produce to.
81
- schema|nil|This is optional but strongly recommended for testing purposes; this will validate against a local schema file used as the reader schema, as well as being able to write tests against this schema. This is recommended since it ensures you are always getting the values you expect.
82
- namespace|nil|Namespace of the schema to use when finding it locally.
83
- key_config|nil|Configuration hash for message keys. See [Kafka Message Keys](../README.md#installation)
84
- disabled|false|Set to true to skip starting an actual listener for this consumer on startup.
85
- group_id|nil|ID of the consumer group.
86
- use_schema_classes|nil|Set to true or false to enable or disable using the consumers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes)
87
- bulk_import_id_column|:bulk_import_id|Name of the column to use for multi-table imports.
88
- replace_associations|true|If false, append to associations in multi-table imports rather than replacing them.
89
- max_db_batch_size|nil|Maximum limit for batching database calls to reduce the load on the db.
90
- max_concurrency|1|Number of threads created for this listener. Each thread will behave as an independent consumer. They don't share any state.
91
- start_from_beginning|true|Once the consumer group has checkpointed its progress in the topic's partitions, the consumers will always start from the checkpointed offsets, regardless of config. As such, this setting only applies when the consumer initially starts consuming from a topic
92
- max_bytes_per_partition|512.kilobytes|Maximum amount of data fetched from a single partition at a time.
93
- min_bytes|1|Minimum number of bytes to read before returning messages from the server; if `max_wait_time` is reached, this is ignored.
94
- max_wait_time|5|Maximum duration of time to wait before returning messages from the server, in seconds.
95
- force_encoding|nil|Apply this encoding to the message payload. If blank it uses the original encoding. This property accepts values defined by the ruby Encoding class (https://ruby-doc.org/core-2.3.0/Encoding.html). Ex: UTF_8, ASCII_8BIT, etc.
96
- delivery|`:batch`|The delivery mode for the consumer. Possible values: `:message, :batch, :inline_batch`. See Phobos documentation for more details.
97
- session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
98
- offset_commit_interval|10|Interval between offset commits, in seconds.
99
- offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing
100
- offset_retention_time|nil|The time period that committed offsets will be retained, in seconds. Defaults to the broker setting.
101
- heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
102
- backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
103
- replace_associations|nil| Whether to delete existing associations for records during bulk consumption for this consumer. If no value is specified the provided/default value from the `consumers` configuration will be used.
104
- bulk_import_id_generator|nil| Block to determine the `bulk_import_id` generated during bulk consumption. If no block is specified the provided/default block from the `consumers` configuration will be used.
105
-
106
- ## Defining Database Pollers
75
+ ### Defining Database Pollers
107
76
 
108
77
  These are used when polling the database via `rake deimos:db_poller`. You
109
78
  can create a number of pollers, one per topic.
@@ -117,156 +86,94 @@ Deimos.configure do
117
86
  end
118
87
  ```
119
88
 
120
- Config name|Default|Description
121
- -----------|-------|-----------
122
- producer_class|nil|ActiveRecordProducer class to use for sending messages.
123
- mode|:time_based|Whether to use time-based polling or state-based polling.
124
- run_every|60|Amount of time in seconds to wait between runs.
125
- timestamp_column|`:updated_at`|Name of the column to query. Remember to add an index to this column!
126
- delay_time|2|Amount of time in seconds to wait before picking up records, to allow for transactions to finish.
127
- retries|1|The number of times to retry for a *non-Kafka* error.
128
- full_table|false|If set to true, do a full table dump to Kafka each run. Good for very small tables. Time-based only.
129
- start_from_beginning|true|If false, start from the current time instead of the beginning of time if this is the first time running the poller. Time-based only.
130
- state_column|nil|If set, this represents the DB column to use to update publishing status. State-based only.
131
- publish_timestamp_column|nil|If set, this represents the DB column to use to update when publishing is done. State-based only.
132
- published_state|nil|If set, the poller will update the `state_column` to this value when publishing succeeds. State-based only.
133
- failed_state|nil|If set, the poller will update the `state_column` to this value when publishing fails. State-based only.
134
- poller_class|nil|Inherited poller class name to use for publishing to multiple kafka topics from a single poller.
135
-
136
- ## Kafka Configuration
137
-
138
- Config name|Default|Description
139
- -----------|-------|-----------
140
- kafka.logger|`Deimos.config.logger`|Logger passed to RubyKafka.
141
- kafka.seed_brokers|`['localhost:9092']`|URL for the Kafka brokers.
142
- kafka.client_id|`phobos`|Identifier for this application.
143
- kafka.connect_timeout|15|The socket timeout for connecting to the broker, in seconds.
144
- kafka.socket_timeout|15|The socket timeout for reading and writing to the broker, in seconds.
145
- kafka.ssl.enabled|false|Whether SSL is enabled on the brokers.
146
- kafka.ssl.ca_certs_from_system|false|Use CA certs from system.
147
- kafka.ssl.ca_cert|nil| A PEM encoded CA cert, a file path to the cert, or an Array of certs to use with an SSL connection.
148
- kafka.ssl.client_cert|nil|A PEM encoded client cert to use with an SSL connection, or a file path to the cert.
149
- kafka.ssl.client_cert_key|nil|A PEM encoded client cert key to use with an SSL connection.
150
- kafka.sasl.enabled|false|Whether SASL is enabled on the brokers.
151
- kafka.sasl.gssapi_principal|nil|A KRB5 principal.
152
- kafka.sasl.gssapi_keytab|nil|A KRB5 keytab filepath.
153
- kafka.sasl.plain_authzid|nil|Plain authorization ID.
154
- kafka.sasl.plain_username|nil|Plain username.
155
- kafka.sasl.plain_password|nil|Plain password.
156
- kafka.sasl.scram_username|nil|SCRAM username.
157
- kafka.sasl.scram_password|nil|SCRAM password.
158
- kafka.sasl.scram_mechanism|nil|Scram mechanism, either "sha256" or "sha512".
159
- kafka.sasl.enforce_ssl|nil|Whether to enforce SSL with SASL.
160
- kafka.sasl.oauth_token_provider|nil|OAuthBearer Token Provider instance that implements method token. See {Sasl::OAuth#initialize}.
161
-
162
- ## Consumer Configuration
163
-
164
- These are top-level configuration settings, but they can be overridden
165
- by individual consumers.
166
-
167
- Config name|Default|Description
168
- -----------|-------|-----------
169
- consumers.session_timeout|300|Number of seconds after which, if a client hasn't contacted the Kafka cluster, it will be kicked out of the group.
170
- consumers.offset_commit_interval|10|Interval between offset commits, in seconds.
171
- consumers.offset_commit_threshold|0|Number of messages that can be processed before their offsets are committed. If zero, offset commits are not triggered by message processing
172
- consumers.heartbeat_interval|10|Interval between heartbeats; must be less than the session window.
173
- consumers.backoff|`(1000..60_000)`|Range representing the minimum and maximum number of milliseconds to back off after a consumer error.
174
- consumers.reraise_errors|false|Default behavior is to swallow uncaught exceptions and log to the metrics provider. Set this to true to instead raise all errors. Note that raising an error will ensure that the message cannot be processed - if there is a bad message which will always raise that error, your consumer will not be able to proceed past it and will be stuck forever until you fix your code. See also the `fatal_error` configuration. This is automatically set to true when using the `TestHelpers` module in RSpec.
175
- consumers.report_lag|false|Whether to send the `consumer_lag` metric. This requires an extra thread per consumer.
176
- consumers.fatal_error|`proc { false }`|Block taking an exception, payload and metadata and returning true if this should be considered a fatal error and false otherwise. E.g. you can use this to always fail if the database is available. Not needed if reraise_errors is set to true.
177
- consumers.replace_associations|true|Whether to delete existing associations for records during bulk consumption prior to inserting new associated records
178
- consumers.bulk_import_id_generator|`proc { SecureRandom.uuid }`| Block to determine the `bulk_import_id` generated during bulk consumption. Block will be used for all bulk consumers unless explicitly set for individual consumers
179
-
180
- ## Producer Configuration
181
-
182
- Config name|Default|Description
183
- -----------|-------|-----------
184
- producers.ack_timeout|5|Number of seconds a broker can wait for replicas to acknowledge a write before responding with a timeout.
185
- producers.required_acks|1|Number of replicas that must acknowledge a write, or `:all` if all in-sync replicas must acknowledge.
186
- producers.max_retries|2|Number of retries that should be attempted before giving up sending messages to the cluster. Does not include the original attempt.
187
- producers.retry_backoff|1|Number of seconds to wait between retries.
188
- producers.max_buffer_size|10_000|Number of messages allowed in the buffer before new writes will raise `BufferOverflow` exceptions.
189
- producers.max_buffer_bytesize|10_000_000|Maximum size of the buffer in bytes. Attempting to produce messages when the buffer reaches this size will result in `BufferOverflow` being raised.
190
- producers.compression_codec|nil|Name of the compression codec to use, or nil if no compression should be performed. Valid codecs: `:snappy` and `:gzip`
191
- producers.compression_threshold|1|Number of messages that needs to be in a message set before it should be compressed. Note that message sets are per-partition rather than per-topic or per-producer.
192
- producers.max_queue_size|10_000|Maximum number of messages allowed in the queue. Only used for async_producer.
193
- producers.delivery_threshold|0|If greater than zero, the number of buffered messages that will automatically trigger a delivery. Only used for async_producer.
194
- producers.delivery_interval|0|if greater than zero, the number of seconds between automatic message deliveries. Only used for async_producer.
195
- producers.persistent_connections|false|Set this to true to keep the producer connection between publish calls. This can speed up subsequent messages by around 30%, but it does mean that you need to manually call sync_producer_shutdown before exiting, similar to async_producer_shutdown.
196
- producers.schema_namespace|nil|Default namespace for all producers. Can remain nil. Individual producers can override.
197
- producers.topic_prefix|nil|Add a prefix to all topic names. This can be useful if you're using the same Kafka broker for different environments that are producing the same topics.
198
- producers.disabled|false|Disable all actual message producing. Generally more useful to use the `disable_producers` method instead.
199
- producers.backend|`:kafka_async`|Currently can be set to `:db`, `:kafka`, or `:kafka_async`. If using Kafka directly, a good pattern is to set to async in your user-facing app, and sync in your consumers or delayed workers.
200
-
201
- ## Schema Configuration
202
-
203
- Config name|Default|Description
204
- -----------|-------|-----------
205
- schema.backend|`:mock`|Backend representing the schema encoder/decoder. You can see a full list [here](../lib/deimos/schema_backends).
206
- schema.registry_url|`http://localhost:8081`|URL of the Confluent schema registry.
207
- schema.user|nil|Basic auth user.
208
- schema.password|nil|Basic auth password.
209
- schema.path|nil|Local path to find your schemas.
210
- schema.use_schema_classes|false|Set this to true to use generated schema classes in your application.
211
- schema.generated_class_path|`app/lib/schema_classes`|Local path to generated schema classes.
212
- schema.nest_child_schemas|false|Set to true to nest subschemas within the generated class for the parent schema.
213
- schema.use_full_namespace|false|Set to true to generate folders for schemas matching the full namespace.
214
- schema.schema_namespace_map|{}|A map of namespace prefixes to base module name(s). Example: { 'com.mycompany.suborg' => ['SchemaClasses'] }. Requires `use_full_namespace` to be true.
215
-
216
- ## Database Producer Configuration
217
-
218
- Config name|Default|Description
219
- -----------|-------|-----------
220
- db_producer.logger|`Deimos.config.logger`|Logger to use inside the DB producer.
221
- db_producer.log_topics|`[]`|List of topics to print full messages for, or `:all` to print all topics. This can introduce slowdown since it needs to decode each message using the schema registry.
222
- db_producer.compact_topics|`[]`|List of topics to compact before sending, i.e. only send the last message with any given key in a batch. This is an optimization which mirrors what Kafka itself will do with compaction turned on but only within a single batch. You can also specify `:all` to compact all topics.
223
-
224
- ## Configuration Syntax
225
-
226
- Sample:
227
-
89
+ | Config name | Default | Description |
90
+ |--------------------------|---------------|---------------------------------------------------------------------------------------------------------------------------------------|
91
+ | producer_class | nil | ActiveRecordProducer class to use for sending messages. |
92
+ | mode | :time_based | Whether to use time-based polling or state-based polling. |
93
+ | run_every | 60 | Amount of time in seconds to wait between runs. |
94
+ | timestamp_column | `:updated_at` | Name of the column to query. Remember to add an index to this column! |
95
+ | delay_time | 2 | Amount of time in seconds to wait before picking up records, to allow for transactions to finish. |
96
+ | retries | 1 | The number of times to retry for a *non-Kafka* error. |
97
+ | full_table | false | If set to true, do a full table dump to Kafka each run. Good for very small tables. Time-based only. |
98
+ | start_from_beginning | true | If false, start from the current time instead of the beginning of time if this is the first time running the poller. Time-based only. |
99
+ | state_column | nil | If set, this represents the DB column to use to update publishing status. State-based only. |
100
+ | publish_timestamp_column | nil | If set, this represents the DB column to use to update when publishing is done. State-based only. |
101
+ | published_state | nil | If set, the poller will update the `state_column` to this value when publishing succeeds. State-based only. |
102
+ | failed_state | nil | If set, the poller will update the `state_column` to this value when publishing fails. State-based only. |
103
+ | poller_class | nil | Poller subclass name to use for publishing to multiple kafka topics from a single poller. |
104
+
105
+ ## Karafka Routing
106
+
107
+ The following are additional settings that can be added to the `topic` block in Karafka routes, or to `defaults` blocks.
108
+
109
+ ### Shared Settings
110
+
111
+ | Config name | Default | Description |
112
+ |--------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
113
+ | payload_log | :full | Determines how much data is logged per payload.</br>`:full` - all keys and payloads are logged.</br>`:keys` - only keys are logged.</br>`:count` - only the total count of messages are logged. |
114
+ | schema | nil | Name of the schema to use to encode data before producing. |
115
+ | namespace | nil | Namespace of the schema to use when finding it locally. |
116
+ | key_config | nil | Configuration hash for message keys. See [Kafka Message Keys](../README.md#kafka-message-keys). |
117
+ | use_schema_classes | nil | Set to true or false to enable or disable using the producers schema classes. See [Generated Schema Classes](../README.md#generated-schema-classes). |
118
+
119
+ ### Consumer Settings
120
+
121
+ | Config name | Default | Description |
122
+ |--------------------------|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
123
+ | each_message | false | If true, use `consume_message` for each message rather than `consume_batch` for the full batch. |
124
+ | reraise_errors | false | Default behavior is to swallow uncaught exceptions and log to the metrics provider. Set this to true to instead raise all errors. Note that raising an error will ensure that the message cannot be processed - if there is a bad message which will always raise that error, your consumer will not be able to proceed past it and will be stuck forever until you fix your code. See also the fatal_error configuration. |
125
+ | fatal_error | `proc { false }` | Block taking an exception, payload and metadata and returning true if this should be considered a fatal error and false otherwise. E.g. you can use this to always fail if the database is available. Not needed if reraise_errors is set to true. |
126
+ | max_db_batch_size | nil | Maximum limit for batching database calls to reduce the load on the db. |
127
+ | bulk_import_id_column | `:bulk_import_id` | Name of the column to use for multi-table imports. |
128
+ | replace_associations | true | If false, append to associations in multi-table imports rather than replacing them. |
129
+ | bulk_import_id_generator | nil | Block to determine the bulk_import_id generated during bulk consumption. If no block is specified the provided/default block from the consumers configuration will be used. |
130
+ | save_associations_first |false|Whether to save associated records of primary class prior to upserting primary records. Foreign key of associated records are assigned to the record class prior to saving the record class
131
+
132
+ ### Defining Consumers
133
+
134
+ An example consumer:
228
135
  ```ruby
229
- Deimos.configure do
230
- logger Logger.new(STDOUT)
231
- # Nested config field
232
- kafka.seed_brokers ['my.kafka.broker:9092']
233
-
234
- # Multiple nested config fields via block
235
- consumers do
236
- session_timeout 30
237
- offset_commit_interval 10
136
+ Karafka::App.routes.draw do
137
+ defaults do
138
+ payload_log :keys
238
139
  end
239
-
240
- # Define a new producer
241
- producer do
242
- class_name 'MyProducer'
243
- topic 'MyTopic'
140
+
141
+ topic 'MyTopic' do
142
+ namespace 'my-namespace'
143
+ consumer MyConsumer
244
144
  schema 'MyTopicSchema'
245
145
  key_config field: :id
246
- end
247
146
 
248
- # Define another new producer
249
- producer do
250
- class_name 'AnotherProducer'
251
- topic 'AnotherTopic'
252
- schema 'AnotherSchema'
253
- key_config plain: true
147
+ # If config.schema.path is app/schemas, assumes there is a file in
148
+ # app/schemas/my/namespace/MyTopicSchema.avsc
254
149
  end
150
+ end
151
+ ```
255
152
 
256
- # Define a consumer
257
- consumer do
258
- class_name 'MyConsumer'
259
- topic 'TopicToConsume'
260
- schema 'ConsumerSchema'
261
- key_config plain: true
262
- # include Phobos / RubyKafka configs
263
- start_from_beginning true
264
- heartbeat_interval 10
153
+ ### Producer Settings
154
+
155
+ | Config name | Default | Description |
156
+ |----------------|---------|------------------------------------------------------------------------------------------------------------|
157
+ | producer_class | nil | Class of the producer to use for the current topic. |
158
+ | disabled | false | Disable all actual message producing. Generally more useful to use the `disable_producers` method instead. |
159
+
160
+ ## Defining Producers
161
+
162
+ You can define a new producer almost identically to consumers:
163
+ ```ruby
164
+ Karafka::App.routes.draw do
165
+ defaults do
166
+ namespace 'my.namespace'
265
167
  end
168
+ topic 'MyTopic' do
169
+ producer_class MyProducer
170
+ schema 'MyTopicSchema'
171
+ key_config field: :id
172
+ payload_log :count
266
173
 
174
+ # If config.schema.path is app/schemas, assumes there is a file in
175
+ # app/schemas/my/namespace/MyTopicSchema.avsc
176
+ end
267
177
  end
268
178
  ```
269
179
 
270
- Note that all blocks are evaluated in the context of the configuration object.
271
- If you're calling this inside another class or method, you'll need to save
272
- things you need to reference into local variables before calling `configure`.