deimos-kafka 1.0.0.pre.beta15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +9 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +742 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos.rb +134 -0
  24. data/lib/deimos/active_record_consumer.rb +81 -0
  25. data/lib/deimos/active_record_producer.rb +64 -0
  26. data/lib/deimos/avro_data_coder.rb +89 -0
  27. data/lib/deimos/avro_data_decoder.rb +36 -0
  28. data/lib/deimos/avro_data_encoder.rb +51 -0
  29. data/lib/deimos/backends/db.rb +27 -0
  30. data/lib/deimos/backends/kafka.rb +27 -0
  31. data/lib/deimos/backends/kafka_async.rb +27 -0
  32. data/lib/deimos/configuration.rb +88 -0
  33. data/lib/deimos/consumer.rb +164 -0
  34. data/lib/deimos/instrumentation.rb +71 -0
  35. data/lib/deimos/kafka_message.rb +27 -0
  36. data/lib/deimos/kafka_source.rb +126 -0
  37. data/lib/deimos/kafka_topic_info.rb +79 -0
  38. data/lib/deimos/message.rb +74 -0
  39. data/lib/deimos/metrics/datadog.rb +47 -0
  40. data/lib/deimos/metrics/mock.rb +39 -0
  41. data/lib/deimos/metrics/provider.rb +38 -0
  42. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  43. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  44. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  45. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  46. data/lib/deimos/producer.rb +218 -0
  47. data/lib/deimos/publish_backend.rb +30 -0
  48. data/lib/deimos/railtie.rb +8 -0
  49. data/lib/deimos/schema_coercer.rb +108 -0
  50. data/lib/deimos/shared_config.rb +59 -0
  51. data/lib/deimos/test_helpers.rb +356 -0
  52. data/lib/deimos/tracing/datadog.rb +35 -0
  53. data/lib/deimos/tracing/mock.rb +40 -0
  54. data/lib/deimos/tracing/provider.rb +31 -0
  55. data/lib/deimos/utils/db_producer.rb +95 -0
  56. data/lib/deimos/utils/executor.rb +117 -0
  57. data/lib/deimos/utils/inline_consumer.rb +144 -0
  58. data/lib/deimos/utils/lag_reporter.rb +182 -0
  59. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  60. data/lib/deimos/utils/signal_handler.rb +68 -0
  61. data/lib/deimos/version.rb +5 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +17 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +117 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +208 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/consumer'
4
+
5
+ module Deimos
6
+ # Consumer that automatically saves the payload into the database.
7
+ class ActiveRecordConsumer < Consumer
8
+ class << self
9
+ # param klass [Class < ActiveRecord::Base] the class used to save to the
10
+ # database.
11
+ def record_class(klass)
12
+ config[:record_class] = klass
13
+ end
14
+ end
15
+
16
+ # :nodoc:
17
+ def consume(payload, metadata)
18
+ key = metadata.with_indifferent_access[:key]
19
+ klass = self.class.config[:record_class]
20
+ record = klass.where(klass.primary_key => key).first
21
+ if payload.nil?
22
+ destroy_record(record)
23
+ return
24
+ end
25
+ record ||= klass.new
26
+ attrs = record_attributes(payload.with_indifferent_access)
27
+ # don't use attributes= - bypass Rails < 5 attr_protected
28
+ attrs.each do |k, v|
29
+ record.send("#{k}=", v)
30
+ end
31
+ record[klass.primary_key] = key
32
+ record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
33
+ record.updated_at ||= Time.zone.now if record.respond_to?(:updated_at)
34
+ record.save!
35
+ end
36
+
37
+ # Destroy a record that received a null payload. Override if you need
38
+ # to do something other than a straight destroy (e.g. mark as archived).
39
+ # @param record [ActiveRecord::Base]
40
+ def destroy_record(record)
41
+ record&.destroy
42
+ end
43
+
44
+ # Override this method (with `super`) if you want to add/change the default
45
+ # attributes set to the new/existing record.
46
+ # @param payload [Hash]
47
+ def record_attributes(payload)
48
+ klass = self.class.config[:record_class]
49
+ attributes = {}
50
+ schema = self.class.decoder.avro_schema
51
+ schema.fields.each do |field|
52
+ column = klass.columns.find { |c| c.name == field.name }
53
+ next if column.nil?
54
+ next if %w(updated_at created_at).include?(field.name)
55
+
56
+ attributes[field.name] = _coerce_field(field, column, payload[field.name])
57
+ end
58
+ attributes
59
+ end
60
+
61
+ private
62
+
63
+ # @param field [Avro::Schema]
64
+ # @param column [ActiveRecord::ConnectionAdapters::Column]
65
+ # @param val [Object]
66
+ def _coerce_field(field, column, val)
67
+ return nil if val.nil?
68
+
69
+ field_type = field.type.type.to_sym
70
+ if field_type == :union
71
+ union_types = field.type.schemas.map { |s| s.type.to_sym }
72
+ field_type = union_types.find { |t| t != :null }
73
+ end
74
+ if column.type == :datetime && %i(int long).include?(field_type)
75
+ return Time.zone.strptime(val.to_s, '%s')
76
+ end
77
+
78
+ val
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/producer'
4
+
5
+ module Deimos
6
+ # Class which automatically produces a record when given an ActiveRecord
7
+ # instance or a list of them. Just call `send_events` on a list of records
8
+ # and they will be auto-published. You can override `generate_payload`
9
+ # to make changes to the payload before it's published.
10
+ #
11
+ # You can also call this with a list of hashes representing attributes.
12
+ # This is common when using activerecord-import.
13
+ class ActiveRecordProducer < Producer
14
+ class << self
15
+ # Indicate the class this producer is working on.
16
+ # @param klass [Class]
17
+ # @param refetch [Boolean] if true, and we are given a hash instead of
18
+ # a record object, refetch the record to pass into the `generate_payload`
19
+ # method.
20
+ def record_class(klass, refetch: true)
21
+ config[:record_class] = klass
22
+ config[:refetch_record] = refetch
23
+ end
24
+
25
+ # @param record [ActiveRecord::Base]
26
+ # @param force_send [Boolean]
27
+ def send_event(record, force_send: false)
28
+ send_events([record], force_send: force_send)
29
+ end
30
+
31
+ # @param records [Array<ActiveRecord::Base>]
32
+ # @param force_send [Boolean]
33
+ def send_events(records, force_send: false)
34
+ primary_key = config[:record_class]&.primary_key
35
+ messages = records.map do |record|
36
+ if record.respond_to?(:attributes)
37
+ attrs = record.attributes.with_indifferent_access
38
+ else
39
+ attrs = record.with_indifferent_access
40
+ if config[:refetch_record] && attrs[primary_key]
41
+ record = config[:record_class].find(attrs[primary_key])
42
+ end
43
+ end
44
+ generate_payload(attrs, record).with_indifferent_access
45
+ end
46
+ self.publish_list(messages, force_send: force_send)
47
+ end
48
+
49
+ # Generate the payload, given a list of attributes or a record..
50
+ # Can be overridden or added to by subclasses.
51
+ # @param attributes [Hash]
52
+ # @param _record [ActiveRecord::Base] May be nil if refetch_record
53
+ # is not set.
54
+ # @return [Hash]
55
+ def generate_payload(attributes, _record)
56
+ schema = self.encoder.avro_schema
57
+ payload = attributes.stringify_keys
58
+ payload.delete_if do |k, _|
59
+ k.to_sym != :payload_key && !schema.fields.find { |f| f.name == k }
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Base class for the encoder / decoder classes.
5
+ class AvroDataCoder
6
+ attr_accessor :schema, :namespace, :config, :schema_store
7
+
8
+ # @param schema [String]
9
+ # @param namespace [String]
10
+ # @param schema_store [AvroTurf::SchemaStore]
11
+ def initialize(schema:, namespace:, schema_store: nil)
12
+ @schema = schema
13
+ @namespace = namespace
14
+ @schema_store = schema_store ||
15
+ AvroTurf::SchemaStore.new(path: Deimos.config.schema_path)
16
+ end
17
+
18
+ # @param schema [String]
19
+ # @return [Avro::Schema]
20
+ def avro_schema(schema=nil)
21
+ schema ||= @schema
22
+ @schema_store.find(schema, @namespace)
23
+ end
24
+
25
+ private
26
+
27
+ # @return [AvroTurf]
28
+ def avro_turf
29
+ @avro_turf ||= AvroTurf.new(
30
+ schemas_path: Deimos.config.schema_path,
31
+ schema_store: @schema_store
32
+ )
33
+ @avro_turf
34
+ end
35
+
36
+ # @return [AvroTurf::Messaging]
37
+ def avro_turf_messaging
38
+ @avro_turf_messaging ||= AvroTurf::Messaging.new(
39
+ schema_store: @schema_store,
40
+ registry_url: Deimos.config.schema_registry_url,
41
+ schemas_path: Deimos.config.schema_path,
42
+ namespace: @namespace
43
+ )
44
+ end
45
+
46
+ # Generate a key schema from the given value schema and key ID. This
47
+ # is used when encoding or decoding keys from an existing value schema.
48
+ # @param key_id [Symbol]
49
+ # @return [Hash]
50
+ def _generate_key_schema(key_id)
51
+ return @key_schema if @key_schema
52
+
53
+ value_schema = @schema_store.find(@schema, @namespace)
54
+ key_field = value_schema.fields.find { |f| f.name == key_id.to_s }
55
+ name = _key_schema_name(@schema)
56
+ @key_schema = {
57
+ 'type' => 'record',
58
+ 'name' => name,
59
+ 'namespace' => @namespace,
60
+ 'doc' => "Key for #{@namespace}.#{@schema}",
61
+ 'fields' => [
62
+ {
63
+ 'name' => key_id,
64
+ 'type' => key_field.type.type_sym.to_s
65
+ }
66
+ ]
67
+ }
68
+ @schema_store.add_schema(@key_schema)
69
+ @key_schema
70
+ end
71
+
72
+ # @param value_schema [Hash]
73
+ # @return [String]
74
+ def _field_name_from_schema(value_schema)
75
+ raise "Schema #{@schema} not found!" if value_schema.nil?
76
+ if value_schema['fields'].nil? || value_schema['fields'].empty?
77
+ raise "Schema #{@schema} has no fields!"
78
+ end
79
+
80
+ value_schema['fields'][0]['name']
81
+ end
82
+
83
+ # @param schema [String]
84
+ # @return [String]
85
+ def _key_schema_name(schema)
86
+ "#{schema.gsub('-value', '')}_key"
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'avro_turf/messaging'
4
+ require 'deimos/avro_data_coder'
5
+
6
+ module Deimos
7
+ # Service Object to decode avro messages
8
+ class AvroDataDecoder < AvroDataCoder
9
+ # Decode some data.
10
+ # @param payload [Hash|String]
11
+ # @param schema [String]
12
+ # @return [Hash]
13
+ def decode(payload, schema: nil)
14
+ schema ||= @schema
15
+ avro_turf_messaging.decode(payload, schema_name: schema)
16
+ end
17
+
18
+ # Decode against a local schema.
19
+ # @param payload [Hash]
20
+ # @param schema [String]
21
+ # @return [Hash]
22
+ def decode_local(payload, schema: nil)
23
+ schema ||= @schema
24
+ avro_turf.decode(payload, schema_name: schema, namespace: @namespace)
25
+ end
26
+
27
+ # @param payload [String] the encoded key.
28
+ # @param key_id [String|Symbol]
29
+ # @return [Object] the decoded key (int/long/string).
30
+ def decode_key(payload, key_id)
31
+ key_schema = _generate_key_schema(key_id)
32
+ field_name = _field_name_from_schema(key_schema)
33
+ decode(payload, schema: key_schema['name'])[field_name]
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'avro_turf/messaging'
4
+ require 'deimos/avro_data_coder'
5
+
6
+ module Deimos
7
+ # Service Object to decode Avro messages.
8
+ class AvroDataEncoder < AvroDataCoder
9
+ # @param payload [Hash]
10
+ # @param schema [String]
11
+ # @return [String]
12
+ def encode_local(payload, schema: nil)
13
+ schema ||= @schema
14
+ Avro::SchemaValidator.validate!(avro_schema(schema), payload,
15
+ recursive: true,
16
+ fail_on_extra_fields: true)
17
+ avro_turf.encode(payload, schema_name: schema, namespace: @namespace)
18
+ rescue Avro::IO::AvroTypeError
19
+ # throw a more detailed error
20
+ value_schema = @schema_store.find(schema, @namespace)
21
+ Avro::SchemaValidator.validate!(value_schema, payload)
22
+ end
23
+
24
+ # @param payload [Hash]
25
+ # @param schema [String]
26
+ # @param topic [String]
27
+ # @return [String]
28
+ def encode(payload, schema: nil, topic: nil)
29
+ schema ||= @schema
30
+ Avro::SchemaValidator.validate!(avro_schema(schema), payload,
31
+ recursive: true,
32
+ fail_on_extra_fields: true)
33
+ avro_turf_messaging.encode(payload, schema_name: schema, subject: topic)
34
+ rescue Avro::IO::AvroTypeError
35
+ # throw a more detailed error
36
+ schema = @schema_store.find(@schema, @namespace)
37
+ Avro::SchemaValidator.validate!(schema, payload)
38
+ end
39
+
40
+ # @param key_id [Symbol|String]
41
+ # @param key [Object]
42
+ # @param topic [String]
43
+ # @return [String] the encoded key.
44
+ def encode_key(key_id, key, topic=nil)
45
+ key_schema = _generate_key_schema(key_id)
46
+ field_name = _field_name_from_schema(key_schema)
47
+ payload = { field_name => key }
48
+ encode(payload, schema: key_schema['name'], topic: topic)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/kafka_message'
4
+
5
+ module Deimos
6
+ module Backends
7
+ # Backend which saves messages to the database instead of immediately
8
+ # sending them.
9
+ class Db < Deimos::PublishBackend
10
+ class << self
11
+ # :nodoc:
12
+ def execute(producer_class:, messages:)
13
+ records = messages.map do |m|
14
+ message = Deimos::KafkaMessage.new(
15
+ message: m.encoded_payload.to_s.b,
16
+ topic: m.topic,
17
+ partition_key: m.partition_key || m.key
18
+ )
19
+ message.key = m.encoded_key.to_s.b unless producer_class.config[:no_keys]
20
+ message
21
+ end
22
+ Deimos::KafkaMessage.import(records)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Backends
5
+ # Default backend to produce to Kafka.
6
+ class Kafka < Deimos::PublishBackend
7
+ include Phobos::Producer
8
+
9
+ # :nodoc:
10
+ def self.execute(producer_class:, messages:)
11
+ Deimos.instrument(
12
+ 'produce',
13
+ producer: producer_class,
14
+ topic: producer_class.topic,
15
+ payloads: messages.map(&:payload)
16
+ ) do
17
+ producer.publish_list(messages.map(&:encoded_hash))
18
+ Deimos.config.metrics&.increment(
19
+ 'publish',
20
+ tags: %W(status:success topic:#{producer_class.topic}),
21
+ by: messages.size
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Backends
5
+ # Backend which produces to Kafka via an async producer.
6
+ class KafkaAsync < Deimos::PublishBackend
7
+ include Phobos::Producer
8
+
9
+ # :nodoc:
10
+ def self.execute(producer_class:, messages:)
11
+ Deimos.instrument(
12
+ 'produce',
13
+ producer: producer_class,
14
+ topic: producer_class.topic,
15
+ payloads: messages.map(&:payload)
16
+ ) do
17
+ producer.async_publish_list(messages.map(&:encoded_hash))
18
+ Deimos.config.metrics&.increment(
19
+ 'publish',
20
+ tags: %W(status:success topic:#{producer_class.topic}),
21
+ by: messages.size
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Class to hold configuration.
5
+ class Configuration
6
+ # @return [Logger]
7
+ attr_accessor :logger
8
+ attr_accessor :phobos_logger
9
+ attr_accessor :kafka_logger
10
+
11
+ # By default, consumer errors will be consumed and logged to
12
+ # the metrics provider.
13
+ # Set this to true to force the error to be raised.
14
+ # @return [Boolean]
15
+ attr_accessor :reraise_consumer_errors
16
+
17
+ # @return [String]
18
+ attr_accessor :schema_registry_url
19
+
20
+ # @return [String]
21
+ attr_accessor :seed_broker
22
+
23
+ # Local path to schemas.
24
+ # @return [String]
25
+ attr_accessor :schema_path
26
+
27
+ # Default namespace for all producers. Can remain nil. Individual
28
+ # producers can override.
29
+ # @return [String]
30
+ attr_accessor :producer_schema_namespace
31
+
32
+ # Add a prefix to all topic names. This can be useful if you're using
33
+ # the same Kafka broker for different environments that are producing
34
+ # the same topics.
35
+ # @return [String]
36
+ attr_accessor :producer_topic_prefix
37
+
38
+ # Disable all actual message producing. Useful when doing things like
39
+ # mass imports or data space management when events don't need to be
40
+ # fired.
41
+ # @return [Boolean]
42
+ attr_accessor :disable_producers
43
+
44
+ # File path to the Phobos configuration file, relative to the application root.
45
+ # @return [String]
46
+ attr_accessor :phobos_config_file
47
+
48
+ # @return [Boolean]
49
+ attr_accessor :ssl_enabled
50
+
51
+ # @return [String]
52
+ attr_accessor :ssl_ca_cert
53
+
54
+ # @return [String]
55
+ attr_accessor :ssl_client_cert
56
+
57
+ # @return [String]
58
+ attr_accessor :ssl_client_cert_key
59
+
60
+ # Currently can be set to :db, :kafka, or :async_kafka. If using Kafka
61
+ # directly, set to async in your user-facing app, and sync in your
62
+ # consumers or delayed workers.
63
+ # @return [Symbol]
64
+ attr_accessor :publish_backend
65
+
66
+ # @return [Boolean]
67
+ attr_accessor :report_lag
68
+
69
+ # @return [Metrics::Provider]
70
+ attr_accessor :metrics
71
+
72
+ # @return [Tracing::Provider]
73
+ attr_accessor :tracer
74
+
75
+ # :nodoc:
76
+ def initialize
77
+ @phobos_config_file = 'config/phobos.yml'
78
+ @publish_backend = :kafka_async
79
+ end
80
+
81
+ # @param other_config [Configuration]
82
+ # @return [Boolean]
83
+ def phobos_config_changed?(other_config)
84
+ phobos_keys = %w(phobos_config_file ssl_ca_cert ssl_client_cert ssl_client_cert_key)
85
+ phobos_keys.any? { |key| self.send(key) != other_config.send(key) }
86
+ end
87
+ end
88
+ end