deimos-ruby 1.0.0.pre.beta22

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +32 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +752 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos/active_record_consumer.rb +81 -0
  24. data/lib/deimos/active_record_producer.rb +64 -0
  25. data/lib/deimos/avro_data_coder.rb +89 -0
  26. data/lib/deimos/avro_data_decoder.rb +36 -0
  27. data/lib/deimos/avro_data_encoder.rb +51 -0
  28. data/lib/deimos/backends/db.rb +27 -0
  29. data/lib/deimos/backends/kafka.rb +27 -0
  30. data/lib/deimos/backends/kafka_async.rb +27 -0
  31. data/lib/deimos/configuration.rb +90 -0
  32. data/lib/deimos/consumer.rb +164 -0
  33. data/lib/deimos/instrumentation.rb +71 -0
  34. data/lib/deimos/kafka_message.rb +27 -0
  35. data/lib/deimos/kafka_source.rb +126 -0
  36. data/lib/deimos/kafka_topic_info.rb +86 -0
  37. data/lib/deimos/message.rb +74 -0
  38. data/lib/deimos/metrics/datadog.rb +47 -0
  39. data/lib/deimos/metrics/mock.rb +39 -0
  40. data/lib/deimos/metrics/provider.rb +38 -0
  41. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  42. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  43. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  44. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  45. data/lib/deimos/producer.rb +218 -0
  46. data/lib/deimos/publish_backend.rb +30 -0
  47. data/lib/deimos/railtie.rb +8 -0
  48. data/lib/deimos/schema_coercer.rb +108 -0
  49. data/lib/deimos/shared_config.rb +59 -0
  50. data/lib/deimos/test_helpers.rb +356 -0
  51. data/lib/deimos/tracing/datadog.rb +35 -0
  52. data/lib/deimos/tracing/mock.rb +40 -0
  53. data/lib/deimos/tracing/provider.rb +31 -0
  54. data/lib/deimos/utils/db_producer.rb +122 -0
  55. data/lib/deimos/utils/executor.rb +117 -0
  56. data/lib/deimos/utils/inline_consumer.rb +144 -0
  57. data/lib/deimos/utils/lag_reporter.rb +182 -0
  58. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  59. data/lib/deimos/utils/signal_handler.rb +68 -0
  60. data/lib/deimos/version.rb +5 -0
  61. data/lib/deimos.rb +133 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +27 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +120 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +259 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ begin
5
+ require 'rspec/core/rake_task'
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+ task(default: :spec)
9
+ rescue LoadError # rubocop:disable Lint/HandleExceptions
10
+ # no rspec available
11
+ end
12
+
13
+ import('./lib/tasks/phobos.rake')
data/bin/deimos ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'deimos'
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'deimos/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'deimos-ruby'
9
+ spec.version = Deimos::VERSION
10
+ spec.authors = ['Daniel Orner']
11
+ spec.email = ['daniel.orner@wishabi.com']
12
+ spec.summary = 'Kafka libraries for Ruby.'
13
+ spec.homepage = ''
14
+ spec.license = 'Apache-2.0'
15
+
16
+ spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_runtime_dependency('avro-patches', '~> 0.3')
22
+ spec.add_runtime_dependency('avro_turf', '~> 0.8')
23
+ spec.add_runtime_dependency('phobos', '~> 1.8')
24
+ spec.add_runtime_dependency('ruby-kafka', '~> 0.7')
25
+
26
+ spec.add_development_dependency('activerecord', '~> 5.2')
27
+ spec.add_development_dependency('activerecord-import')
28
+ spec.add_development_dependency('bundler', '~> 1')
29
+ spec.add_development_dependency('ddtrace', '~> 0.11')
30
+ spec.add_development_dependency('dogstatsd-ruby', '~> 4.2')
31
+ spec.add_development_dependency('guard', '~> 2')
32
+ spec.add_development_dependency('guard-rspec', '~> 4')
33
+ spec.add_development_dependency('guard-rubocop', '~> 1')
34
+ spec.add_development_dependency('mysql2', '~> 0.5')
35
+ spec.add_development_dependency('pg', '~> 1.1')
36
+ spec.add_development_dependency('rake', '~> 10')
37
+ spec.add_development_dependency('rspec', '~> 3')
38
+ spec.add_development_dependency('rspec_junit_formatter', '~>0.3')
39
+ spec.add_development_dependency('rubocop', '~> 0.72')
40
+ spec.add_development_dependency('rubocop-rspec', '~> 1.27')
41
+ spec.add_development_dependency('sqlite3', '~> 1.3')
42
+ end
@@ -0,0 +1,71 @@
1
+ version: '3.6'
2
+ services:
3
+ mysql:
4
+ image: mysql:5.7
5
+ expose:
6
+ - 3306
7
+ environment:
8
+ - MYSQL_ALLOW_EMPTY_PASSWORD=yes
9
+ - MYSQL_DATABASE=test
10
+ - TZ=America/Toronto
11
+
12
+ postgres:
13
+ image: postgres:11.1
14
+ expose:
15
+ - 5432
16
+ environment:
17
+ POSTGRES_PASSWORD: root
18
+
19
+ test:
20
+ volumes:
21
+ - .:/var/app
22
+ depends_on:
23
+ - kafka-broker
24
+ - mysql
25
+ - postgres
26
+ build: .
27
+ environment:
28
+ - "DEFAULT_TIMEOUT=${DEFAULT_TIMEOUT}"
29
+ - MYSQL_HOST=mysql
30
+ - PG_HOST=postgres
31
+ - SCHEMA_REGISTRY=http://schema-registry:8081
32
+ - KAFKA_SEED_BROKER=kafka-broker:9092
33
+ command: dockerize -wait tcp://mysql:3306 -wait tcp://postgres:5432 -timeout 1m rspec
34
+
35
+ zookeeper:
36
+ image: wurstmeister/zookeeper:latest
37
+ ports:
38
+ - 2181:2181
39
+
40
+ schema-registry:
41
+ image: confluentinc/cp-schema-registry
42
+ hostname: schema-registry
43
+ depends_on:
44
+ - zookeeper
45
+ - kafka-broker
46
+ ports:
47
+ - "8081:8081"
48
+ environment:
49
+ SCHEMA_REGISTRY_HOST_NAME: schema-registry
50
+ SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: 'zookeeper:2181'
51
+
52
+ kafka-broker:
53
+ image: confluentinc/cp-enterprise-kafka
54
+ hostname: kafka-broker
55
+ depends_on:
56
+ - zookeeper
57
+ ports:
58
+ - "9092:9092"
59
+ environment:
60
+ KAFKA_BROKER_ID: 1
61
+ KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
62
+ KAFKA_ADVERTISED_LISTENERS: 'PLAINTEXT://kafka-broker:9092'
63
+ KAFKA_METRIC_REPORTERS: io.confluent.metrics.reporter.ConfluentMetricsReporter
64
+ KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
65
+ KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
66
+ CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka-broker:9092
67
+ CONFLUENT_METRICS_REPORTER_ZOOKEEPER_CONNECT: zookeeper:2181
68
+ CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1
69
+ CONFLUENT_METRICS_ENABLE: 'true'
70
+ CONFLUENT_SUPPORT_CUSTOMER_ID: 'anonymous'
71
+
@@ -0,0 +1,147 @@
1
+ # Database Backend Design
2
+
3
+ Kafka is a messaging protocol, while databases are transactional and relational.
4
+ Marrying the two (e.g. by using Kafka to publish changes to a database table)
5
+ is not a simple task. This document describes the problem and the current
6
+ implementation. There will be references to microservices architecture as that
7
+ informs some of the discussion.
8
+
9
+ ## A Pure Solution
10
+
11
+ The purest solution is to use Kafka as the "source of truth" by first publishing
12
+ all messages synchronously to Kafka, and then using a consumer to read these
13
+ messages back into the database. If there are any errors sending to Kafka,
14
+ the thread will crash and no data would be written. If there are errors
15
+ reading the data back into the database, the data remains in Kafka and can
16
+ be re-read at any time.
17
+
18
+ There are several real-world problems with this pure solution:
19
+
20
+ 1. The solution assumes that creating a consumer is a simple task, which is
21
+ definitely not the case. Depending on how many topics are being produced to,
22
+ a separate consumer thread per topic (which is how Phobos works) is overkill.
23
+ The other option is to introduce an entirely new consumer service to handle
24
+ consuming the topics we've already produced, which is even more overkill.
25
+ 2. For CRUD interfaces or any other UI that saves data to the database, we do
26
+ not want to use an asynchronous method to ensure that data is published to
27
+ Kafka before saving, and then being able to serve that data back to the user
28
+ in a single API call, which is a common use case.
29
+ This could involve a large amount of added complexity and may force the user
30
+ to wait unnecessarily.
31
+ 3. We want to make use of database transactions - i.e. if an error happens
32
+ saving one record, the others should roll back. Once a message is written to
33
+ Kafka, it can't be "rolled back" easily. Kafka transactions do exist but
34
+ they are not widely supported in Kafka clients, and we still would be
35
+ faced with the fact that one transaction (Kafka or database) could finish
36
+ and then the process could be killed before the other one could be finished.
37
+ 4. We want to make use of auto-increment database IDs - we can't do this if we
38
+ write to Kafka first.
39
+ 5. Kafka is an external dependency. If either the DB **or** Kafka goes down,
40
+ our app becomes unusable.
41
+
42
+ Using tools like Kafka Connect and Debezium are not ideal because:
43
+
44
+ 1. They are tied very closely to the internal relational schema, which is not
45
+ ideal, especially for legacy systems. It makes it nearly impossible to make
46
+ internal changes.
47
+ 2. They are separate services and connectors must be created for each
48
+ microservice separately, which is a large overhead.
49
+
50
+ ## Database Backend Solution
51
+
52
+ We will be using the database itself as the source of our Kafka messages.
53
+ We will first write our messages to a database table and then asynchronously
54
+ send those messages to Kafka. This solves our problems:
55
+
56
+ 1. The database is the (interim) source of truth. The Kafka message log is
57
+ essentially the changelog, which we can tail and send out. If our producing
58
+ thread errors out, a new one will simply pick up where it left off.
59
+ This ensures eventual consistency.
60
+ 2. Because we are only using the database in the main application thread, we do
61
+ not need to wait for Kafka production to continue and can return immediately.
62
+ 3. Because we are only saving to the database, we can use transactions normally
63
+ - if a transaction fails, it will roll back along with any Kafka messages we
64
+ intended to send.
65
+ 4. Records are saved normally and messages are created after that, all as part
66
+ of the transaction, so we can use database IDs as usual.
67
+ 5. We remove Kafka entirely as a dependency for normal work - the Kafka sending
68
+ piece is a separate thread.
69
+
70
+ The one downside to this is a slight delay (generally less than 1 second)
71
+ between the message being written to the database and sent to Kafka - in most
72
+ cases this is an acceptable limitation.
73
+
74
+ ### The Implementation
75
+
76
+ The database backend consists of three tables:
77
+
78
+ * `kafka_messages` - this keeps track of the messages that were "published",
79
+ including the payload, topic, key and partition key. These messages
80
+ are *raw data* - all processing, including Avro encoding, must happen
81
+ upstream before they are inserted.
82
+ * `kafka_topic_info` - this table is essentially a lock table used to ensure
83
+ that only one producer thread is ever "working" on a topic at a time.
84
+
85
+ The backend code structure is such that when a producer calls `publish_list`,
86
+ it delegates that logic to the configured backend. A backend of `kafka`
87
+ or `kafka_async` will use existing Phobos logic. A backend of `db` will use
88
+ the database backend instead.
89
+
90
+ ### "Publishing" A Message
91
+
92
+ When `publish_list` is called when the database backend is configured,
93
+ Deimos will instead save the message to the `kafka_messages` table.
94
+
95
+ ### Sending Messages to Kafka
96
+
97
+ The database executor is started by calling `Deimos.start_db_backend!`
98
+ with a specified number of threads. These threads will continually scan the
99
+ two messages tables and send the messages to Kafka.
100
+
101
+ The algorithm for sending the messages makes use of the `kafka_topic_info` table as a lock table. There is also an `error` boolean column which is used to track when a topic has errored out. When this happens, the topic is marked as errored and will not be picked up for the next minute, after which it will be treated as any other topic. The full algorithm is as follows:
102
+
103
+ * Create a UUID for the thread - this is created once on thread start.
104
+ * Find all unique topics in the `kafka_messages` table.
105
+ * For each topic:
106
+ * Create an entry in `kafka_topic_info` for this topic if it doesn't exist.
107
+ * Run the following query:
108
+
109
+ ```sql
110
+ UPDATE kafka_topic_info
111
+ SET locked_by=#{uuid}, locked_at=NOW(), error=0
112
+ WHERE (locked_by IS NULL AND error=0) OR locked_at < #{1.minute.ago}
113
+ LIMIT 1
114
+ ```
115
+ * If the lock was unsuccessful, move on to the next topic in the list
116
+ * Find the first 1000 messages in `kafka_messages` for that topic, ordered by ID (insertion order)
117
+ * Send the messages synchronously to Kafka, with all brokers acking the message.
118
+ * Delete the records from the DB
119
+ * Update the `locked_at` timestamp in `kafka_topic_info` to `NOW()` to ensure liveness in case a particular batch took longer than expected to send.
120
+ * If the current batch is 1000 messages, repeat with the next batch of
121
+ messages until it returns less than 1000
122
+ * When all batches are sent:
123
+ * Unlock the topic by updating the `kafka_topic_info` for this topic, setting `locked_by=NULL, locked_at=NULL, error=0, retries=0`
124
+ * Move on to the next topic
125
+ * If there are errors sending a batch:
126
+ * Update the `kafka_topic_info` for this topic to have `locked_by=NULL, locked_at=NULL, error=1, retries=retries+1` - this will effectively keep it
127
+ locked for the next minute
128
+ * Move on to the next topic.
129
+ * When all topics are done, or if there are no topics, sleep for 0.5 seconds and begin again.
130
+
131
+ ### Class / Method Design
132
+
133
+ The algorithm is split up into the following classes:
134
+
135
+ * Backends::Db - this is the class that saves the message to the database.
136
+ * KafkaMessage: This is an ActiveRecord class that handle saving the messages to the database and querying them.
137
+ * KafkaTopicInfo: This is an ActiveRecord class that handles locking, unlocking and heartbeating.
138
+ * Utils::SignalHandler: This is the equivalent of Phobos's Runner class and
139
+ handles the KILL, INT and TERM signals to gracefully shut down the threads.
140
+ * Utils::Executor is the equivalent of Phobos's Executor class and handles
141
+ the thread pool of producer threads.
142
+ * Utils::DbProducer is the producer thread itself which implements most of the
143
+ algorithm listed above.
144
+
145
+ ### Caveats
146
+
147
+ There is one disadvantage of this pattern, which is that it is possible for events to be sent multiple times if the thread which sent the messages dies before being able to delete it from the database. In general this is an acceptable effect, since Kafka only guarantees at-least-once delivery in any case.
@@ -0,0 +1,34 @@
1
+ # Pull Request Template
2
+
3
+ ## Description
4
+
5
+ Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
6
+
7
+ Fixes # (issue)
8
+
9
+ ## Type of change
10
+
11
+ Please delete options that are not relevant.
12
+
13
+ - [ ] Bug fix (non-breaking change which fixes an issue)
14
+ - [ ] New feature (non-breaking change which adds functionality)
15
+ - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
16
+ - [ ] This change requires a documentation update
17
+
18
+ ## How Has This Been Tested?
19
+
20
+ Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
21
+
22
+ - [ ] Test A
23
+ - [ ] Test B
24
+
25
+ ## Checklist:
26
+
27
+ - [ ] My code follows the style guidelines of this project
28
+ - [ ] I have performed a self-review of my own code
29
+ - [ ] I have commented my code, particularly in hard-to-understand areas
30
+ - [ ] I have made corresponding changes to the documentation
31
+ - [ ] My changes generate no new warnings
32
+ - [ ] I have added tests that prove my fix is effective or that my feature works
33
+ - [ ] New and existing unit tests pass locally with my changes
34
+ - [ ] Any dependent changes have been merged and published in downstream modules
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/consumer'
4
+
5
+ module Deimos
6
+ # Consumer that automatically saves the payload into the database.
7
+ class ActiveRecordConsumer < Consumer
8
+ class << self
9
+ # param klass [Class < ActiveRecord::Base] the class used to save to the
10
+ # database.
11
+ def record_class(klass)
12
+ config[:record_class] = klass
13
+ end
14
+ end
15
+
16
+ # :nodoc:
17
+ def consume(payload, metadata)
18
+ key = metadata.with_indifferent_access[:key]
19
+ klass = self.class.config[:record_class]
20
+ record = klass.where(klass.primary_key => key).first
21
+ if payload.nil?
22
+ destroy_record(record)
23
+ return
24
+ end
25
+ record ||= klass.new
26
+ attrs = record_attributes(payload.with_indifferent_access)
27
+ # don't use attributes= - bypass Rails < 5 attr_protected
28
+ attrs.each do |k, v|
29
+ record.send("#{k}=", v)
30
+ end
31
+ record[klass.primary_key] = key
32
+ record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
33
+ record.updated_at ||= Time.zone.now if record.respond_to?(:updated_at)
34
+ record.save!
35
+ end
36
+
37
+ # Destroy a record that received a null payload. Override if you need
38
+ # to do something other than a straight destroy (e.g. mark as archived).
39
+ # @param record [ActiveRecord::Base]
40
+ def destroy_record(record)
41
+ record&.destroy
42
+ end
43
+
44
+ # Override this method (with `super`) if you want to add/change the default
45
+ # attributes set to the new/existing record.
46
+ # @param payload [Hash]
47
+ def record_attributes(payload)
48
+ klass = self.class.config[:record_class]
49
+ attributes = {}
50
+ schema = self.class.decoder.avro_schema
51
+ schema.fields.each do |field|
52
+ column = klass.columns.find { |c| c.name == field.name }
53
+ next if column.nil?
54
+ next if %w(updated_at created_at).include?(field.name)
55
+
56
+ attributes[field.name] = _coerce_field(field, column, payload[field.name])
57
+ end
58
+ attributes
59
+ end
60
+
61
+ private
62
+
63
+ # @param field [Avro::Schema]
64
+ # @param column [ActiveRecord::ConnectionAdapters::Column]
65
+ # @param val [Object]
66
+ def _coerce_field(field, column, val)
67
+ return nil if val.nil?
68
+
69
+ field_type = field.type.type.to_sym
70
+ if field_type == :union
71
+ union_types = field.type.schemas.map { |s| s.type.to_sym }
72
+ field_type = union_types.find { |t| t != :null }
73
+ end
74
+ if column.type == :datetime && %i(int long).include?(field_type)
75
+ return Time.zone.strptime(val.to_s, '%s')
76
+ end
77
+
78
+ val
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/producer'
4
+
5
+ module Deimos
6
+ # Class which automatically produces a record when given an ActiveRecord
7
+ # instance or a list of them. Just call `send_events` on a list of records
8
+ # and they will be auto-published. You can override `generate_payload`
9
+ # to make changes to the payload before it's published.
10
+ #
11
+ # You can also call this with a list of hashes representing attributes.
12
+ # This is common when using activerecord-import.
13
+ class ActiveRecordProducer < Producer
14
+ class << self
15
+ # Indicate the class this producer is working on.
16
+ # @param klass [Class]
17
+ # @param refetch [Boolean] if true, and we are given a hash instead of
18
+ # a record object, refetch the record to pass into the `generate_payload`
19
+ # method.
20
+ def record_class(klass, refetch: true)
21
+ config[:record_class] = klass
22
+ config[:refetch_record] = refetch
23
+ end
24
+
25
+ # @param record [ActiveRecord::Base]
26
+ # @param force_send [Boolean]
27
+ def send_event(record, force_send: false)
28
+ send_events([record], force_send: force_send)
29
+ end
30
+
31
+ # @param records [Array<ActiveRecord::Base>]
32
+ # @param force_send [Boolean]
33
+ def send_events(records, force_send: false)
34
+ primary_key = config[:record_class]&.primary_key
35
+ messages = records.map do |record|
36
+ if record.respond_to?(:attributes)
37
+ attrs = record.attributes.with_indifferent_access
38
+ else
39
+ attrs = record.with_indifferent_access
40
+ if config[:refetch_record] && attrs[primary_key]
41
+ record = config[:record_class].find(attrs[primary_key])
42
+ end
43
+ end
44
+ generate_payload(attrs, record).with_indifferent_access
45
+ end
46
+ self.publish_list(messages, force_send: force_send)
47
+ end
48
+
49
+ # Generate the payload, given a list of attributes or a record..
50
+ # Can be overridden or added to by subclasses.
51
+ # @param attributes [Hash]
52
+ # @param _record [ActiveRecord::Base] May be nil if refetch_record
53
+ # is not set.
54
+ # @return [Hash]
55
+ def generate_payload(attributes, _record)
56
+ schema = self.encoder.avro_schema
57
+ payload = attributes.stringify_keys
58
+ payload.delete_if do |k, _|
59
+ k.to_sym != :payload_key && !schema.fields.find { |f| f.name == k }
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Base class for the encoder / decoder classes.
5
+ class AvroDataCoder
6
+ attr_accessor :schema, :namespace, :config, :schema_store
7
+
8
+ # @param schema [String]
9
+ # @param namespace [String]
10
+ # @param schema_store [AvroTurf::SchemaStore]
11
+ def initialize(schema:, namespace:, schema_store: nil)
12
+ @schema = schema
13
+ @namespace = namespace
14
+ @schema_store = schema_store ||
15
+ AvroTurf::SchemaStore.new(path: Deimos.config.schema_path)
16
+ end
17
+
18
+ # @param schema [String]
19
+ # @return [Avro::Schema]
20
+ def avro_schema(schema=nil)
21
+ schema ||= @schema
22
+ @schema_store.find(schema, @namespace)
23
+ end
24
+
25
+ private
26
+
27
+ # @return [AvroTurf]
28
+ def avro_turf
29
+ @avro_turf ||= AvroTurf.new(
30
+ schemas_path: Deimos.config.schema_path,
31
+ schema_store: @schema_store
32
+ )
33
+ @avro_turf
34
+ end
35
+
36
+ # @return [AvroTurf::Messaging]
37
+ def avro_turf_messaging
38
+ @avro_turf_messaging ||= AvroTurf::Messaging.new(
39
+ schema_store: @schema_store,
40
+ registry_url: Deimos.config.schema_registry_url,
41
+ schemas_path: Deimos.config.schema_path,
42
+ namespace: @namespace
43
+ )
44
+ end
45
+
46
+ # Generate a key schema from the given value schema and key ID. This
47
+ # is used when encoding or decoding keys from an existing value schema.
48
+ # @param key_id [Symbol]
49
+ # @return [Hash]
50
+ def _generate_key_schema(key_id)
51
+ return @key_schema if @key_schema
52
+
53
+ value_schema = @schema_store.find(@schema, @namespace)
54
+ key_field = value_schema.fields.find { |f| f.name == key_id.to_s }
55
+ name = _key_schema_name(@schema)
56
+ @key_schema = {
57
+ 'type' => 'record',
58
+ 'name' => name,
59
+ 'namespace' => @namespace,
60
+ 'doc' => "Key for #{@namespace}.#{@schema}",
61
+ 'fields' => [
62
+ {
63
+ 'name' => key_id,
64
+ 'type' => key_field.type.type_sym.to_s
65
+ }
66
+ ]
67
+ }
68
+ @schema_store.add_schema(@key_schema)
69
+ @key_schema
70
+ end
71
+
72
+ # @param value_schema [Hash]
73
+ # @return [String]
74
+ def _field_name_from_schema(value_schema)
75
+ raise "Schema #{@schema} not found!" if value_schema.nil?
76
+ if value_schema['fields'].nil? || value_schema['fields'].empty?
77
+ raise "Schema #{@schema} has no fields!"
78
+ end
79
+
80
+ value_schema['fields'][0]['name']
81
+ end
82
+
83
+ # @param schema [String]
84
+ # @return [String]
85
+ def _key_schema_name(schema)
86
+ "#{schema.gsub('-value', '')}_key"
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'avro_turf/messaging'
4
+ require 'deimos/avro_data_coder'
5
+
6
+ module Deimos
7
+ # Service Object to decode avro messages
8
+ class AvroDataDecoder < AvroDataCoder
9
+ # Decode some data.
10
+ # @param payload [Hash|String]
11
+ # @param schema [String]
12
+ # @return [Hash]
13
+ def decode(payload, schema: nil)
14
+ schema ||= @schema
15
+ avro_turf_messaging.decode(payload, schema_name: schema)
16
+ end
17
+
18
+ # Decode against a local schema.
19
+ # @param payload [Hash]
20
+ # @param schema [String]
21
+ # @return [Hash]
22
+ def decode_local(payload, schema: nil)
23
+ schema ||= @schema
24
+ avro_turf.decode(payload, schema_name: schema, namespace: @namespace)
25
+ end
26
+
27
+ # @param payload [String] the encoded key.
28
+ # @param key_id [String|Symbol]
29
+ # @return [Object] the decoded key (int/long/string).
30
+ def decode_key(payload, key_id)
31
+ key_schema = _generate_key_schema(key_id)
32
+ field_name = _field_name_from_schema(key_schema)
33
+ decode(payload, schema: key_schema['name'])[field_name]
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'avro_turf/messaging'
4
+ require 'deimos/avro_data_coder'
5
+
6
+ module Deimos
7
+ # Service Object to decode Avro messages.
8
+ class AvroDataEncoder < AvroDataCoder
9
+ # @param payload [Hash]
10
+ # @param schema [String]
11
+ # @return [String]
12
+ def encode_local(payload, schema: nil)
13
+ schema ||= @schema
14
+ Avro::SchemaValidator.validate!(avro_schema(schema), payload,
15
+ recursive: true,
16
+ fail_on_extra_fields: true)
17
+ avro_turf.encode(payload, schema_name: schema, namespace: @namespace)
18
+ rescue Avro::IO::AvroTypeError
19
+ # throw a more detailed error
20
+ value_schema = @schema_store.find(schema, @namespace)
21
+ Avro::SchemaValidator.validate!(value_schema, payload)
22
+ end
23
+
24
+ # @param payload [Hash]
25
+ # @param schema [String]
26
+ # @param topic [String]
27
+ # @return [String]
28
+ def encode(payload, schema: nil, topic: nil)
29
+ schema ||= @schema
30
+ Avro::SchemaValidator.validate!(avro_schema(schema), payload,
31
+ recursive: true,
32
+ fail_on_extra_fields: true)
33
+ avro_turf_messaging.encode(payload, schema_name: schema, subject: topic)
34
+ rescue Avro::IO::AvroTypeError
35
+ # throw a more detailed error
36
+ schema = @schema_store.find(@schema, @namespace)
37
+ Avro::SchemaValidator.validate!(schema, payload)
38
+ end
39
+
40
+ # @param key_id [Symbol|String]
41
+ # @param key [Object]
42
+ # @param topic [String]
43
+ # @return [String] the encoded key.
44
+ def encode_key(key_id, key, topic=nil)
45
+ key_schema = _generate_key_schema(key_id)
46
+ field_name = _field_name_from_schema(key_schema)
47
+ payload = { field_name => key }
48
+ encode(payload, schema: key_schema['name'], topic: topic)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/kafka_message'
4
+
5
+ module Deimos
6
+ module Backends
7
+ # Backend which saves messages to the database instead of immediately
8
+ # sending them.
9
+ class Db < Deimos::PublishBackend
10
+ class << self
11
+ # :nodoc:
12
+ def execute(producer_class:, messages:)
13
+ records = messages.map do |m|
14
+ message = Deimos::KafkaMessage.new(
15
+ message: m.encoded_payload.to_s.b,
16
+ topic: m.topic,
17
+ partition_key: m.partition_key || m.key
18
+ )
19
+ message.key = m.encoded_key.to_s.b unless producer_class.config[:no_keys]
20
+ message
21
+ end
22
+ Deimos::KafkaMessage.import(records)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end