deimos-kafka 1.0.0.pre.beta15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +74 -0
  3. data/.gitignore +41 -0
  4. data/.gitmodules +0 -0
  5. data/.rspec +1 -0
  6. data/.rubocop.yml +321 -0
  7. data/.ruby-gemset +1 -0
  8. data/.ruby-version +1 -0
  9. data/CHANGELOG.md +9 -0
  10. data/CODE_OF_CONDUCT.md +77 -0
  11. data/Dockerfile +23 -0
  12. data/Gemfile +6 -0
  13. data/Gemfile.lock +165 -0
  14. data/Guardfile +22 -0
  15. data/LICENSE.md +195 -0
  16. data/README.md +742 -0
  17. data/Rakefile +13 -0
  18. data/bin/deimos +4 -0
  19. data/deimos-kafka.gemspec +42 -0
  20. data/docker-compose.yml +71 -0
  21. data/docs/DATABASE_BACKEND.md +147 -0
  22. data/docs/PULL_REQUEST_TEMPLATE.md +34 -0
  23. data/lib/deimos.rb +134 -0
  24. data/lib/deimos/active_record_consumer.rb +81 -0
  25. data/lib/deimos/active_record_producer.rb +64 -0
  26. data/lib/deimos/avro_data_coder.rb +89 -0
  27. data/lib/deimos/avro_data_decoder.rb +36 -0
  28. data/lib/deimos/avro_data_encoder.rb +51 -0
  29. data/lib/deimos/backends/db.rb +27 -0
  30. data/lib/deimos/backends/kafka.rb +27 -0
  31. data/lib/deimos/backends/kafka_async.rb +27 -0
  32. data/lib/deimos/configuration.rb +88 -0
  33. data/lib/deimos/consumer.rb +164 -0
  34. data/lib/deimos/instrumentation.rb +71 -0
  35. data/lib/deimos/kafka_message.rb +27 -0
  36. data/lib/deimos/kafka_source.rb +126 -0
  37. data/lib/deimos/kafka_topic_info.rb +79 -0
  38. data/lib/deimos/message.rb +74 -0
  39. data/lib/deimos/metrics/datadog.rb +47 -0
  40. data/lib/deimos/metrics/mock.rb +39 -0
  41. data/lib/deimos/metrics/provider.rb +38 -0
  42. data/lib/deimos/monkey_patches/phobos_cli.rb +35 -0
  43. data/lib/deimos/monkey_patches/phobos_producer.rb +51 -0
  44. data/lib/deimos/monkey_patches/ruby_kafka_heartbeat.rb +85 -0
  45. data/lib/deimos/monkey_patches/schema_store.rb +19 -0
  46. data/lib/deimos/producer.rb +218 -0
  47. data/lib/deimos/publish_backend.rb +30 -0
  48. data/lib/deimos/railtie.rb +8 -0
  49. data/lib/deimos/schema_coercer.rb +108 -0
  50. data/lib/deimos/shared_config.rb +59 -0
  51. data/lib/deimos/test_helpers.rb +356 -0
  52. data/lib/deimos/tracing/datadog.rb +35 -0
  53. data/lib/deimos/tracing/mock.rb +40 -0
  54. data/lib/deimos/tracing/provider.rb +31 -0
  55. data/lib/deimos/utils/db_producer.rb +95 -0
  56. data/lib/deimos/utils/executor.rb +117 -0
  57. data/lib/deimos/utils/inline_consumer.rb +144 -0
  58. data/lib/deimos/utils/lag_reporter.rb +182 -0
  59. data/lib/deimos/utils/platform_schema_validation.rb +0 -0
  60. data/lib/deimos/utils/signal_handler.rb +68 -0
  61. data/lib/deimos/version.rb +5 -0
  62. data/lib/generators/deimos/db_backend/templates/migration +24 -0
  63. data/lib/generators/deimos/db_backend/templates/rails3_migration +30 -0
  64. data/lib/generators/deimos/db_backend_generator.rb +48 -0
  65. data/lib/tasks/deimos.rake +17 -0
  66. data/spec/active_record_consumer_spec.rb +81 -0
  67. data/spec/active_record_producer_spec.rb +107 -0
  68. data/spec/avro_data_decoder_spec.rb +18 -0
  69. data/spec/avro_data_encoder_spec.rb +37 -0
  70. data/spec/backends/db_spec.rb +35 -0
  71. data/spec/backends/kafka_async_spec.rb +11 -0
  72. data/spec/backends/kafka_spec.rb +11 -0
  73. data/spec/consumer_spec.rb +169 -0
  74. data/spec/deimos_spec.rb +117 -0
  75. data/spec/kafka_source_spec.rb +168 -0
  76. data/spec/kafka_topic_info_spec.rb +88 -0
  77. data/spec/phobos.bad_db.yml +73 -0
  78. data/spec/phobos.yml +73 -0
  79. data/spec/producer_spec.rb +397 -0
  80. data/spec/publish_backend_spec.rb +10 -0
  81. data/spec/schemas/com/my-namespace/MySchema-key.avsc +13 -0
  82. data/spec/schemas/com/my-namespace/MySchema.avsc +18 -0
  83. data/spec/schemas/com/my-namespace/MySchemaWithBooleans.avsc +18 -0
  84. data/spec/schemas/com/my-namespace/MySchemaWithDateTimes.avsc +33 -0
  85. data/spec/schemas/com/my-namespace/MySchemaWithId.avsc +28 -0
  86. data/spec/schemas/com/my-namespace/MySchemaWithUniqueId.avsc +32 -0
  87. data/spec/schemas/com/my-namespace/Widget.avsc +27 -0
  88. data/spec/schemas/com/my-namespace/WidgetTheSecond.avsc +27 -0
  89. data/spec/spec_helper.rb +207 -0
  90. data/spec/updateable_schema_store_spec.rb +36 -0
  91. data/spec/utils/db_producer_spec.rb +208 -0
  92. data/spec/utils/executor_spec.rb +42 -0
  93. data/spec/utils/lag_reporter_spec.rb +69 -0
  94. data/spec/utils/platform_schema_validation_spec.rb +0 -0
  95. data/spec/utils/signal_handler_spec.rb +16 -0
  96. data/support/deimos-solo.png +0 -0
  97. data/support/deimos-with-name-next.png +0 -0
  98. data/support/deimos-with-name.png +0 -0
  99. data/support/flipp-logo.png +0 -0
  100. metadata +452 -0
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/consumer'
4
+
5
+ module Deimos
6
+ # Consumer that automatically saves the payload into the database.
7
+ class ActiveRecordConsumer < Consumer
8
+ class << self
9
+ # param klass [Class < ActiveRecord::Base] the class used to save to the
10
+ # database.
11
+ def record_class(klass)
12
+ config[:record_class] = klass
13
+ end
14
+ end
15
+
16
+ # :nodoc:
17
+ def consume(payload, metadata)
18
+ key = metadata.with_indifferent_access[:key]
19
+ klass = self.class.config[:record_class]
20
+ record = klass.where(klass.primary_key => key).first
21
+ if payload.nil?
22
+ destroy_record(record)
23
+ return
24
+ end
25
+ record ||= klass.new
26
+ attrs = record_attributes(payload.with_indifferent_access)
27
+ # don't use attributes= - bypass Rails < 5 attr_protected
28
+ attrs.each do |k, v|
29
+ record.send("#{k}=", v)
30
+ end
31
+ record[klass.primary_key] = key
32
+ record.created_at ||= Time.zone.now if record.respond_to?(:created_at)
33
+ record.updated_at ||= Time.zone.now if record.respond_to?(:updated_at)
34
+ record.save!
35
+ end
36
+
37
+ # Destroy a record that received a null payload. Override if you need
38
+ # to do something other than a straight destroy (e.g. mark as archived).
39
+ # @param record [ActiveRecord::Base]
40
+ def destroy_record(record)
41
+ record&.destroy
42
+ end
43
+
44
+ # Override this method (with `super`) if you want to add/change the default
45
+ # attributes set to the new/existing record.
46
+ # @param payload [Hash]
47
+ def record_attributes(payload)
48
+ klass = self.class.config[:record_class]
49
+ attributes = {}
50
+ schema = self.class.decoder.avro_schema
51
+ schema.fields.each do |field|
52
+ column = klass.columns.find { |c| c.name == field.name }
53
+ next if column.nil?
54
+ next if %w(updated_at created_at).include?(field.name)
55
+
56
+ attributes[field.name] = _coerce_field(field, column, payload[field.name])
57
+ end
58
+ attributes
59
+ end
60
+
61
+ private
62
+
63
+ # @param field [Avro::Schema]
64
+ # @param column [ActiveRecord::ConnectionAdapters::Column]
65
+ # @param val [Object]
66
+ def _coerce_field(field, column, val)
67
+ return nil if val.nil?
68
+
69
+ field_type = field.type.type.to_sym
70
+ if field_type == :union
71
+ union_types = field.type.schemas.map { |s| s.type.to_sym }
72
+ field_type = union_types.find { |t| t != :null }
73
+ end
74
+ if column.type == :datetime && %i(int long).include?(field_type)
75
+ return Time.zone.strptime(val.to_s, '%s')
76
+ end
77
+
78
+ val
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/producer'
4
+
5
+ module Deimos
6
+ # Class which automatically produces a record when given an ActiveRecord
7
+ # instance or a list of them. Just call `send_events` on a list of records
8
+ # and they will be auto-published. You can override `generate_payload`
9
+ # to make changes to the payload before it's published.
10
+ #
11
+ # You can also call this with a list of hashes representing attributes.
12
+ # This is common when using activerecord-import.
13
+ class ActiveRecordProducer < Producer
14
+ class << self
15
+ # Indicate the class this producer is working on.
16
+ # @param klass [Class]
17
+ # @param refetch [Boolean] if true, and we are given a hash instead of
18
+ # a record object, refetch the record to pass into the `generate_payload`
19
+ # method.
20
+ def record_class(klass, refetch: true)
21
+ config[:record_class] = klass
22
+ config[:refetch_record] = refetch
23
+ end
24
+
25
+ # @param record [ActiveRecord::Base]
26
+ # @param force_send [Boolean]
27
+ def send_event(record, force_send: false)
28
+ send_events([record], force_send: force_send)
29
+ end
30
+
31
+ # @param records [Array<ActiveRecord::Base>]
32
+ # @param force_send [Boolean]
33
+ def send_events(records, force_send: false)
34
+ primary_key = config[:record_class]&.primary_key
35
+ messages = records.map do |record|
36
+ if record.respond_to?(:attributes)
37
+ attrs = record.attributes.with_indifferent_access
38
+ else
39
+ attrs = record.with_indifferent_access
40
+ if config[:refetch_record] && attrs[primary_key]
41
+ record = config[:record_class].find(attrs[primary_key])
42
+ end
43
+ end
44
+ generate_payload(attrs, record).with_indifferent_access
45
+ end
46
+ self.publish_list(messages, force_send: force_send)
47
+ end
48
+
49
+ # Generate the payload, given a list of attributes or a record..
50
+ # Can be overridden or added to by subclasses.
51
+ # @param attributes [Hash]
52
+ # @param _record [ActiveRecord::Base] May be nil if refetch_record
53
+ # is not set.
54
+ # @return [Hash]
55
+ def generate_payload(attributes, _record)
56
+ schema = self.encoder.avro_schema
57
+ payload = attributes.stringify_keys
58
+ payload.delete_if do |k, _|
59
+ k.to_sym != :payload_key && !schema.fields.find { |f| f.name == k }
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Base class for the encoder / decoder classes.
5
+ class AvroDataCoder
6
+ attr_accessor :schema, :namespace, :config, :schema_store
7
+
8
+ # @param schema [String]
9
+ # @param namespace [String]
10
+ # @param schema_store [AvroTurf::SchemaStore]
11
+ def initialize(schema:, namespace:, schema_store: nil)
12
+ @schema = schema
13
+ @namespace = namespace
14
+ @schema_store = schema_store ||
15
+ AvroTurf::SchemaStore.new(path: Deimos.config.schema_path)
16
+ end
17
+
18
+ # @param schema [String]
19
+ # @return [Avro::Schema]
20
+ def avro_schema(schema=nil)
21
+ schema ||= @schema
22
+ @schema_store.find(schema, @namespace)
23
+ end
24
+
25
+ private
26
+
27
+ # @return [AvroTurf]
28
+ def avro_turf
29
+ @avro_turf ||= AvroTurf.new(
30
+ schemas_path: Deimos.config.schema_path,
31
+ schema_store: @schema_store
32
+ )
33
+ @avro_turf
34
+ end
35
+
36
+ # @return [AvroTurf::Messaging]
37
+ def avro_turf_messaging
38
+ @avro_turf_messaging ||= AvroTurf::Messaging.new(
39
+ schema_store: @schema_store,
40
+ registry_url: Deimos.config.schema_registry_url,
41
+ schemas_path: Deimos.config.schema_path,
42
+ namespace: @namespace
43
+ )
44
+ end
45
+
46
+ # Generate a key schema from the given value schema and key ID. This
47
+ # is used when encoding or decoding keys from an existing value schema.
48
+ # @param key_id [Symbol]
49
+ # @return [Hash]
50
+ def _generate_key_schema(key_id)
51
+ return @key_schema if @key_schema
52
+
53
+ value_schema = @schema_store.find(@schema, @namespace)
54
+ key_field = value_schema.fields.find { |f| f.name == key_id.to_s }
55
+ name = _key_schema_name(@schema)
56
+ @key_schema = {
57
+ 'type' => 'record',
58
+ 'name' => name,
59
+ 'namespace' => @namespace,
60
+ 'doc' => "Key for #{@namespace}.#{@schema}",
61
+ 'fields' => [
62
+ {
63
+ 'name' => key_id,
64
+ 'type' => key_field.type.type_sym.to_s
65
+ }
66
+ ]
67
+ }
68
+ @schema_store.add_schema(@key_schema)
69
+ @key_schema
70
+ end
71
+
72
+ # @param value_schema [Hash]
73
+ # @return [String]
74
+ def _field_name_from_schema(value_schema)
75
+ raise "Schema #{@schema} not found!" if value_schema.nil?
76
+ if value_schema['fields'].nil? || value_schema['fields'].empty?
77
+ raise "Schema #{@schema} has no fields!"
78
+ end
79
+
80
+ value_schema['fields'][0]['name']
81
+ end
82
+
83
+ # @param schema [String]
84
+ # @return [String]
85
+ def _key_schema_name(schema)
86
+ "#{schema.gsub('-value', '')}_key"
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'avro_turf/messaging'
4
+ require 'deimos/avro_data_coder'
5
+
6
+ module Deimos
7
+ # Service Object to decode avro messages
8
+ class AvroDataDecoder < AvroDataCoder
9
+ # Decode some data.
10
+ # @param payload [Hash|String]
11
+ # @param schema [String]
12
+ # @return [Hash]
13
+ def decode(payload, schema: nil)
14
+ schema ||= @schema
15
+ avro_turf_messaging.decode(payload, schema_name: schema)
16
+ end
17
+
18
+ # Decode against a local schema.
19
+ # @param payload [Hash]
20
+ # @param schema [String]
21
+ # @return [Hash]
22
+ def decode_local(payload, schema: nil)
23
+ schema ||= @schema
24
+ avro_turf.decode(payload, schema_name: schema, namespace: @namespace)
25
+ end
26
+
27
+ # @param payload [String] the encoded key.
28
+ # @param key_id [String|Symbol]
29
+ # @return [Object] the decoded key (int/long/string).
30
+ def decode_key(payload, key_id)
31
+ key_schema = _generate_key_schema(key_id)
32
+ field_name = _field_name_from_schema(key_schema)
33
+ decode(payload, schema: key_schema['name'])[field_name]
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'avro_turf/messaging'
4
+ require 'deimos/avro_data_coder'
5
+
6
+ module Deimos
7
+ # Service Object to decode Avro messages.
8
+ class AvroDataEncoder < AvroDataCoder
9
+ # @param payload [Hash]
10
+ # @param schema [String]
11
+ # @return [String]
12
+ def encode_local(payload, schema: nil)
13
+ schema ||= @schema
14
+ Avro::SchemaValidator.validate!(avro_schema(schema), payload,
15
+ recursive: true,
16
+ fail_on_extra_fields: true)
17
+ avro_turf.encode(payload, schema_name: schema, namespace: @namespace)
18
+ rescue Avro::IO::AvroTypeError
19
+ # throw a more detailed error
20
+ value_schema = @schema_store.find(schema, @namespace)
21
+ Avro::SchemaValidator.validate!(value_schema, payload)
22
+ end
23
+
24
+ # @param payload [Hash]
25
+ # @param schema [String]
26
+ # @param topic [String]
27
+ # @return [String]
28
+ def encode(payload, schema: nil, topic: nil)
29
+ schema ||= @schema
30
+ Avro::SchemaValidator.validate!(avro_schema(schema), payload,
31
+ recursive: true,
32
+ fail_on_extra_fields: true)
33
+ avro_turf_messaging.encode(payload, schema_name: schema, subject: topic)
34
+ rescue Avro::IO::AvroTypeError
35
+ # throw a more detailed error
36
+ schema = @schema_store.find(@schema, @namespace)
37
+ Avro::SchemaValidator.validate!(schema, payload)
38
+ end
39
+
40
+ # @param key_id [Symbol|String]
41
+ # @param key [Object]
42
+ # @param topic [String]
43
+ # @return [String] the encoded key.
44
+ def encode_key(key_id, key, topic=nil)
45
+ key_schema = _generate_key_schema(key_id)
46
+ field_name = _field_name_from_schema(key_schema)
47
+ payload = { field_name => key }
48
+ encode(payload, schema: key_schema['name'], topic: topic)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deimos/kafka_message'
4
+
5
+ module Deimos
6
+ module Backends
7
+ # Backend which saves messages to the database instead of immediately
8
+ # sending them.
9
+ class Db < Deimos::PublishBackend
10
+ class << self
11
+ # :nodoc:
12
+ def execute(producer_class:, messages:)
13
+ records = messages.map do |m|
14
+ message = Deimos::KafkaMessage.new(
15
+ message: m.encoded_payload.to_s.b,
16
+ topic: m.topic,
17
+ partition_key: m.partition_key || m.key
18
+ )
19
+ message.key = m.encoded_key.to_s.b unless producer_class.config[:no_keys]
20
+ message
21
+ end
22
+ Deimos::KafkaMessage.import(records)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Backends
5
+ # Default backend to produce to Kafka.
6
+ class Kafka < Deimos::PublishBackend
7
+ include Phobos::Producer
8
+
9
+ # :nodoc:
10
+ def self.execute(producer_class:, messages:)
11
+ Deimos.instrument(
12
+ 'produce',
13
+ producer: producer_class,
14
+ topic: producer_class.topic,
15
+ payloads: messages.map(&:payload)
16
+ ) do
17
+ producer.publish_list(messages.map(&:encoded_hash))
18
+ Deimos.config.metrics&.increment(
19
+ 'publish',
20
+ tags: %W(status:success topic:#{producer_class.topic}),
21
+ by: messages.size
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ module Backends
5
+ # Backend which produces to Kafka via an async producer.
6
+ class KafkaAsync < Deimos::PublishBackend
7
+ include Phobos::Producer
8
+
9
+ # :nodoc:
10
+ def self.execute(producer_class:, messages:)
11
+ Deimos.instrument(
12
+ 'produce',
13
+ producer: producer_class,
14
+ topic: producer_class.topic,
15
+ payloads: messages.map(&:payload)
16
+ ) do
17
+ producer.async_publish_list(messages.map(&:encoded_hash))
18
+ Deimos.config.metrics&.increment(
19
+ 'publish',
20
+ tags: %W(status:success topic:#{producer_class.topic}),
21
+ by: messages.size
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deimos
4
+ # Class to hold configuration.
5
+ class Configuration
6
+ # @return [Logger]
7
+ attr_accessor :logger
8
+ attr_accessor :phobos_logger
9
+ attr_accessor :kafka_logger
10
+
11
+ # By default, consumer errors will be consumed and logged to
12
+ # the metrics provider.
13
+ # Set this to true to force the error to be raised.
14
+ # @return [Boolean]
15
+ attr_accessor :reraise_consumer_errors
16
+
17
+ # @return [String]
18
+ attr_accessor :schema_registry_url
19
+
20
+ # @return [String]
21
+ attr_accessor :seed_broker
22
+
23
+ # Local path to schemas.
24
+ # @return [String]
25
+ attr_accessor :schema_path
26
+
27
+ # Default namespace for all producers. Can remain nil. Individual
28
+ # producers can override.
29
+ # @return [String]
30
+ attr_accessor :producer_schema_namespace
31
+
32
+ # Add a prefix to all topic names. This can be useful if you're using
33
+ # the same Kafka broker for different environments that are producing
34
+ # the same topics.
35
+ # @return [String]
36
+ attr_accessor :producer_topic_prefix
37
+
38
+ # Disable all actual message producing. Useful when doing things like
39
+ # mass imports or data space management when events don't need to be
40
+ # fired.
41
+ # @return [Boolean]
42
+ attr_accessor :disable_producers
43
+
44
+ # File path to the Phobos configuration file, relative to the application root.
45
+ # @return [String]
46
+ attr_accessor :phobos_config_file
47
+
48
+ # @return [Boolean]
49
+ attr_accessor :ssl_enabled
50
+
51
+ # @return [String]
52
+ attr_accessor :ssl_ca_cert
53
+
54
+ # @return [String]
55
+ attr_accessor :ssl_client_cert
56
+
57
+ # @return [String]
58
+ attr_accessor :ssl_client_cert_key
59
+
60
+ # Currently can be set to :db, :kafka, or :async_kafka. If using Kafka
61
+ # directly, set to async in your user-facing app, and sync in your
62
+ # consumers or delayed workers.
63
+ # @return [Symbol]
64
+ attr_accessor :publish_backend
65
+
66
+ # @return [Boolean]
67
+ attr_accessor :report_lag
68
+
69
+ # @return [Metrics::Provider]
70
+ attr_accessor :metrics
71
+
72
+ # @return [Tracing::Provider]
73
+ attr_accessor :tracer
74
+
75
+ # :nodoc:
76
+ def initialize
77
+ @phobos_config_file = 'config/phobos.yml'
78
+ @publish_backend = :kafka_async
79
+ end
80
+
81
+ # @param other_config [Configuration]
82
+ # @return [Boolean]
83
+ def phobos_config_changed?(other_config)
84
+ phobos_keys = %w(phobos_config_file ssl_ca_cert ssl_client_cert ssl_client_cert_key)
85
+ phobos_keys.any? { |key| self.send(key) != other_config.send(key) }
86
+ end
87
+ end
88
+ end