rom-kafka 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +9 -0
  4. data/.metrics +9 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +2 -0
  7. data/.travis.yml +34 -0
  8. data/.yardopts +3 -0
  9. data/CHANGELOG.md +3 -0
  10. data/Gemfile +7 -0
  11. data/Guardfile +14 -0
  12. data/LICENSE +21 -0
  13. data/README.md +83 -0
  14. data/Rakefile +34 -0
  15. data/config/metrics/STYLEGUIDE +230 -0
  16. data/config/metrics/cane.yml +5 -0
  17. data/config/metrics/churn.yml +6 -0
  18. data/config/metrics/flay.yml +2 -0
  19. data/config/metrics/metric_fu.yml +14 -0
  20. data/config/metrics/reek.yml +1 -0
  21. data/config/metrics/roodi.yml +24 -0
  22. data/config/metrics/rubocop.yml +71 -0
  23. data/config/metrics/saikuro.yml +3 -0
  24. data/config/metrics/simplecov.yml +6 -0
  25. data/config/metrics/yardstick.yml +37 -0
  26. data/lib/rom-kafka.rb +3 -0
  27. data/lib/rom/kafka.rb +29 -0
  28. data/lib/rom/kafka/brokers.rb +72 -0
  29. data/lib/rom/kafka/brokers/broker.rb +68 -0
  30. data/lib/rom/kafka/connection.rb +22 -0
  31. data/lib/rom/kafka/connection/consumer.rb +105 -0
  32. data/lib/rom/kafka/connection/producer.rb +114 -0
  33. data/lib/rom/kafka/create.rb +75 -0
  34. data/lib/rom/kafka/dataset.rb +132 -0
  35. data/lib/rom/kafka/gateway.rb +165 -0
  36. data/lib/rom/kafka/relation.rb +78 -0
  37. data/lib/rom/kafka/version.rb +13 -0
  38. data/rom-kafka.gemspec +33 -0
  39. data/spec/integration/basic_usage_spec.rb +58 -0
  40. data/spec/integration/keys_usage_spec.rb +34 -0
  41. data/spec/shared/scholars_topic.rb +28 -0
  42. data/spec/spec_helper.rb +20 -0
  43. data/spec/unit/brokers/broker_spec.rb +89 -0
  44. data/spec/unit/brokers_spec.rb +46 -0
  45. data/spec/unit/connection/consumer_spec.rb +90 -0
  46. data/spec/unit/connection/producer_spec.rb +79 -0
  47. data/spec/unit/create_spec.rb +79 -0
  48. data/spec/unit/dataset_spec.rb +165 -0
  49. data/spec/unit/gateway_spec.rb +171 -0
  50. data/spec/unit/relation_spec.rb +96 -0
  51. metadata +219 -0
@@ -0,0 +1,114 @@
1
+ # encoding: utf-8
2
+
3
+ module ROM::Kafka
4
+
5
+ class Connection
6
+
7
+ # The producer-specific connection to Kafka cluster
8
+ #
9
+ # It is wrapped around `Poseidon::Producer` driver, and responsible for
10
+ # adopting poseidon API to ROM::Gateway via [#initializer] and [#publish]
11
+ # methods.
12
+ #
13
+ # ROM::Kafka producer deals with tuples, hiding poseidon-specific
14
+ # implementation of messages from the rest of the gem.
15
+ #
16
+ # @api private
17
+ #
18
+ class Producer < Connection
19
+
20
+ # The 'poseidon' class describing a producer
21
+ #
22
+ # @return [Class]
23
+ #
24
+ DRIVER = Poseidon::Producer
25
+
26
+ # The 'poseidon' class describing a message acceptable by producer
27
+ #
28
+ # @return [Class]
29
+ #
30
+ MESSAGE = Poseidon::MessageToSend
31
+
32
+ # Attributes, acceptable by the `Poseidon::Producer` driver
33
+ attribute :partitioner
34
+ attribute :type, default: :sync
35
+ attribute :compression_codec
36
+ attribute :metadata_refresh_interval_ms
37
+ attribute :max_send_retries
38
+ attribute :retry_backoff_ms
39
+ attribute :required_acks
40
+ attribute :ack_timeout_ms
41
+ attribute :socket_timeout_ms
42
+
43
+ # @!attribute [r] connection
44
+ #
45
+ # @return [ROM::Kafka::Connections::Producer::DRIVER] driver to Kafka
46
+ #
47
+ attr_reader :connection
48
+
49
+ # Initializes a producer connection
50
+ #
51
+ # The initializer is attributes-agnostic. This means it doesn't validate
52
+ # attributes, but skips unused.
53
+ #
54
+ # @option options [#to_s] :client_id
55
+ # A required unique id used to indentify the Kafka client.
56
+ # @option options [Array<String>] :brokers
57
+ # A list of seed brokers to find a lead broker to fetch messages from.
58
+ # @option options [Proc, nil] :partitioner
59
+ # A proc used to provide partition from given key.
60
+ # @option options [:gzip, :snappy, nil] :compression_codec (nil)
61
+ # Type of compression to be used.
62
+ # @option options [Integer] :metadata_refresh_interval_ms (600_000)
63
+ # How frequently the topic metadata should be updated (in milliseconds).
64
+ # @option options [Integer] :max_send_retries (3)
65
+ # Number of times to retry sending of messages to a leader.
66
+ # @option options [Integer] :retry_backoff_ms (100)
67
+ # An amount of time (in milliseconds) to wait before refreshing
68
+ # the metadata after we are unable to send messages.
69
+ # @option options [Integer] :required_acks (0)
70
+ # A number of acks required per request.
71
+ # @option options [Integer] :ack_timeout_ms (1_500)
72
+ # How long the producer waits for acks.
73
+ # @option options [Integer] :socket_timeout_ms (10_000)
74
+ # How long the producer/consumer socket waits for any reply from server.
75
+ #
76
+ def initialize(options)
77
+ super # takes declared attributes only, skipping brokers and client_id
78
+ brokers = options.fetch(:brokers)
79
+ client = options.fetch(:client_id)
80
+ @connection = DRIVER.new(brokers, client, attributes)
81
+ end
82
+
83
+ # Sends tuples to the underlying connection
84
+ #
85
+ # Stringifies non-empty hash values to conform to 'poseidon' API.
86
+ #
87
+ # @param [Array<Hash>] data
88
+ #
89
+ # @return [Array<Hash{Symbol => String, nil}>]
90
+ # The list of published tuples
91
+ #
92
+ def publish(*data)
93
+ tuples = data.flatten.map(&method(:stringify_keys))
94
+ @connection.send_messages tuples.map(&method(:message))
95
+
96
+ tuples
97
+ end
98
+
99
+ private
100
+
101
+ def stringify_keys(tuple)
102
+ keys = [:value, :topic, :key]
103
+ Hash[keys.zip(tuple.values_at(*keys).map { |v| v.to_s if v })]
104
+ end
105
+
106
+ def message(tuple)
107
+ MESSAGE.new(*tuple.values_at(:topic, :value, :key))
108
+ end
109
+
110
+ end # class Producer
111
+
112
+ end # class Connection
113
+
114
+ end # module ROM::Kafka
@@ -0,0 +1,75 @@
1
+ # encoding: utf-8
2
+
3
+ module ROM::Kafka
4
+
5
+ # The namespace for Kafka-specific ROM commands
6
+ #
7
+ module Commands
8
+
9
+ # The Kafka-specific implementation of ROM::Commands::Create
10
+ #
11
+ # @example
12
+ # ROM.use(:auto_registration)
13
+ # ROM.setup(:kafka, "localhost:9092")
14
+ #
15
+ # class Users < ROM::Relation[:kafka]
16
+ # dataset :users
17
+ # end
18
+ #
19
+ # class GreetUsers < ROM::Commands::Create[:kafka]
20
+ # relation :users
21
+ # register_as :greet
22
+ # end
23
+ #
24
+ # rom = ROM.finalize.env
25
+ # greet = rom.commands(:users).greet
26
+ # greet.with(key: "greetings").call "Hi!"
27
+ # # => [{ value: "Hi!", topic: "users", key: "greetings" }]
28
+ #
29
+ class Create < ROM::Commands::Create
30
+
31
+ adapter :kafka
32
+ option :key, reader: true
33
+
34
+ # Sends messages to the current topic/partition of Kafka
35
+ #
36
+ # @param [#to_s, Array<#to_s>] messages
37
+ #
38
+ # @return [Array<Hash>]
39
+ #
40
+ def execute(*messages)
41
+ tuples = messages.flatten.map(&method(:tuple))
42
+ producer.publish(*tuples)
43
+ end
44
+
45
+ # Returns a new command where `:key` option is updated
46
+ #
47
+ # @param [Hash] options
48
+ # @options options [Object] :key
49
+ # The key to be used by Kafka to define a partition
50
+ #
51
+ # @return [ROM::Kafka::Commands::Create]
52
+ #
53
+ def with(options)
54
+ self.class.new relation, key: options.fetch(:key)
55
+ end
56
+
57
+ private
58
+
59
+ def producer
60
+ dataset.producer
61
+ end
62
+
63
+ def dataset
64
+ relation.dataset
65
+ end
66
+
67
+ def tuple(text)
68
+ { value: text.to_s, topic: dataset.topic, key: key }
69
+ end
70
+
71
+ end # class Create
72
+
73
+ end # module Commands
74
+
75
+ end # module ROM::Kafka
@@ -0,0 +1,132 @@
1
+ # encoding: utf-8
2
+
3
+ module ROM::Kafka
4
+
5
+ # The dataset describes Kafka topic
6
+ #
7
+ # @api private
8
+ #
9
+ class Dataset
10
+
11
+ extend AttributesDSL
12
+ include Enumerable
13
+
14
+ # Customizable attributes for a consumer connection
15
+ attribute :partition, default: 0
16
+ attribute :offset, default: 0
17
+ attribute :limit, default: 0
18
+ attribute :min_bytes
19
+ attribute :max_bytes
20
+ attribute :max_wait_ms
21
+
22
+ # @!attribute [r] gateway
23
+ #
24
+ # @return [ROM::Kafka::Gateway]
25
+ # The back reference to the gateway, that provided the dataset
26
+ #
27
+ attr_reader :gateway
28
+
29
+ # @!attribute [r] topic
30
+ #
31
+ # @return [String] The name of the topic, described by the dataset
32
+ #
33
+ attr_reader :topic
34
+
35
+ # @!attribute [r] producer
36
+ #
37
+ # @return [ROM::Kafka::Connection::Producer]
38
+ # The producer connection to Kafka brokers, defined by a gateway.
39
+ # It is stored for being used by a `Create` command.
40
+ #
41
+ attr_reader :producer
42
+
43
+ # @!attribute [r] consumer
44
+ #
45
+ # @return [ROM::Kafka::Connection::Consumer]
46
+ # The consumer connection to Kafka brokers, used to fetch messages
47
+ # via [#each] method call.
48
+ #
49
+ attr_reader :consumer
50
+
51
+ # Initializes the dataset with a gateway and topic name
52
+ #
53
+ # Attributes are set by default from a gateway. Later you can create
54
+ # new dataset for the same gateway and topic, but with attributes,
55
+ # updated via [#using] method.
56
+ #
57
+ # @param [ROM::Kafka::Gateway] gateway
58
+ # @param [String] topic
59
+ #
60
+ # @option options [Integer] :partition (0)
61
+ # A partition number to fetch messages from
62
+ # @option options [Integer] :offset (0)
63
+ # An initial offset to start fetching from.
64
+ # @option options [Integer] :min_bytes (1)
65
+ # A smallest amount of data the server should send.
66
+ # (By default send us data as soon as it is ready).
67
+ # @option options [Integer] :max_bytes (1_048_576)
68
+ # A maximum number of bytes to fetch by consumer (1MB by default).
69
+ # @option options [Integer] :max_wait_ms (100)
70
+ # How long to block until the server sends data.
71
+ # NOTE: This is only enforced if min_bytes is > 0.
72
+ #
73
+ def initialize(gateway, topic, options = {})
74
+ super gateway.attributes.merge(options)
75
+ @topic = topic.to_s
76
+ @gateway = gateway
77
+ @producer = gateway.producer
78
+ @consumer = prepare_consumer
79
+ end
80
+
81
+ # Returns a new dataset with updated consumer attributes
82
+ #
83
+ # @param [Hash] options The part of attributes to be updated
84
+ #
85
+ # @return [ROM::Kafka::Dataset]
86
+ #
87
+ def using(options)
88
+ self.class.new(gateway, topic, attributes.merge(options))
89
+ end
90
+
91
+ # Returns the enumerator to iterate via tuples, fetched from a [#consumer].
92
+ #
93
+ # If a `limit` of messages is set, iterator stops after achieving it.
94
+ #
95
+ # @param [Proc] block
96
+ #
97
+ # @yieldparam [Hash] tuple
98
+ #
99
+ # @return [Enumerator<Hash{Symbol => String, Integer}>]
100
+ #
101
+ def each(&block)
102
+ return to_enum unless block_given?
103
+ limit.equal?(0) ? unlimited_each(&block) : limited_each(&block)
104
+ end
105
+
106
+ private
107
+
108
+ def prepare_consumer
109
+ Connection::Consumer.new consumer_options
110
+ end
111
+
112
+ def consumer_options
113
+ attributes.merge(
114
+ topic: topic,
115
+ client_id: gateway.client_id,
116
+ brokers: gateway.brokers
117
+ )
118
+ end
119
+
120
+ def unlimited_each
121
+ enum = consumer.each
122
+ loop { yield(enum.next) }
123
+ end
124
+
125
+ def limited_each
126
+ enum = consumer.each
127
+ limit.times { yield(enum.next) }
128
+ end
129
+
130
+ end # class Dataset
131
+
132
+ end # module ROM::Kafka
@@ -0,0 +1,165 @@
1
+ # encoding: utf-8
2
+
3
+ module ROM::Kafka
4
+
5
+ # Describes the gateway to Kafka
6
+ #
7
+ # The gateway has 3 responsibilities:
8
+ # - registers the datasets describing various topics and partitions
9
+ # - instantiates the producer connection to Kafka brokers that doesn't
10
+ # depend on a specific topic/partition settings
11
+ # - stores settings for the consumer connections to Kafka, that
12
+ # depends on a specific topic/partition/offset
13
+ #
14
+ # Every dataset uses the same producer connection (defined by gateway) and
15
+ # individual consumer's one. The consumer's connection is reloaded
16
+ # every time the topic, partition or current offset is changed by a relation.
17
+ #
18
+ class Gateway < ROM::Gateway
19
+
20
+ extend AttributesDSL
21
+
22
+ # Attributes used by both producer and consumer
23
+ attribute :client_id, required: true, &:to_s
24
+ attribute :brokers
25
+
26
+ # Producer-specific attributes
27
+ attribute :partitioner
28
+ attribute :compression_codec
29
+ attribute :metadata_refresh_interval_ms, default: 600_000
30
+ attribute :max_send_retries, default: 3
31
+ attribute :retry_backoff_ms, default: 100
32
+ attribute :required_acks, default: 0
33
+ attribute :ack_timeout_ms, default: 1_500
34
+ attribute :socket_timeout_ms, default: 10_000
35
+
36
+ # Consumer-specific attributes
37
+ attribute :min_bytes, default: 1
38
+ attribute :max_bytes, default: 1_048_576
39
+ attribute :max_wait_ms, default: 100
40
+
41
+ # Initializes the gateway to Kafka broker(s).
42
+ #
43
+ # The initializer is attributes-agnostic. This means it doesn't validate
44
+ # attributes, but skips unused.
45
+ #
46
+ # @example Initialize a producer's gateway to Kafka
47
+ # gateway = Gateway.new(
48
+ # hosts: ["127.0.0.1", "127.0.0.2:9093"],
49
+ # port: 9092,
50
+ # client_id: :my_user,
51
+ # compression-codec: :gzip
52
+ # )
53
+ # gateway.brokers # => ["127.0.0.1:9092", "127.0.0.2:9093"]
54
+ #
55
+ # @example Alternative syntax
56
+ # gateway = Gateway.new(
57
+ # "127.0.0.1:9092",
58
+ # "127.0.0.2:9093",
59
+ # client_id: :my_user,
60
+ # compression-codec: :gzip
61
+ # )
62
+ # gateway.brokers # => ["127.0.0.1:9092", "127.0.0.2:9093"]
63
+ #
64
+ # @example Mixed syntax
65
+ # gateway = Gateway.new(
66
+ # "127.0.0.1:9092",
67
+ # hosts: ["127.0.0.2"]
68
+ # port: 9093,
69
+ # client_id: :my_user,
70
+ # min_bytes: 1024 # wait until 1Kb of messages is prepared
71
+ # )
72
+ # gateway.brokers # => ["127.0.0.1:9092", "127.0.0.2:9093"]
73
+ #
74
+ # @param [nil, String, Array<String>] addresses
75
+ # The address(es) of broker(s) to connect (optional).
76
+ # Brokers can be alternatively set with `:hosts` and `:port` options.
77
+ #
78
+ # @option options [#to_s] :client_id
79
+ # A required unique id used to indentify the Kafka client.
80
+ # @option options [String, Array<String>] :hosts
81
+ # A host or list of hosts in the form "host1:port1" or "host1".
82
+ # In case of a consumer, only the first host is actually used.
83
+ # @option options [Integer] :port
84
+ # The port shared by all hosts.
85
+ #
86
+ # @option options [Proc, nil] :partitioner
87
+ # A proc used to provide partition from given key.
88
+ # @option options [:gzip, :snappy, nil] :compression_codec (nil)
89
+ # Type of compression to be used.
90
+ # @option options [Integer] :metadata_refresh_interval_ms (600_000)
91
+ # How frequently the topic metadata should be updated (in milliseconds).
92
+ # @option options [Integer] :max_send_retries (3)
93
+ # Number of times to retry sending of messages to a leader.
94
+ # @option options [Integer] :retry_backoff_ms (100)
95
+ # An amount of time (in milliseconds) to wait before refreshing
96
+ # the metadata after we are unable to send messages.
97
+ # @option options [Integer] :required_acks (0)
98
+ # A number of acks required per request.
99
+ # @option options [Integer] :ack_timeout_ms (1_500)
100
+ # How long the producer waits for acks.
101
+ # @option options [Integer] :socket_timeout_ms (10_000)
102
+ # How long the producer/consumer socket waits for any reply from server.
103
+ #
104
+ # @option options [Integer] :offset
105
+ # An initial offset to start fetching from.
106
+ # @option options [Integer] :min_bytes (1)
107
+ # A smallest amount of data the server should send.
108
+ # (By default send us data as soon as it is ready).
109
+ # @option options [Integer] :max_bytes (1_048_576)
110
+ # A maximum number of bytes to fetch by consumer (1MB by default).
111
+ # @option options [Integer] :max_wait_ms (100)
112
+ # How long to block until the server sends data.
113
+ # NOTE: This is only enforced if min_bytes is > 0.
114
+ #
115
+ def initialize(*addresses)
116
+ options = Hash[addresses.pop]
117
+ brokers = Brokers.new(addresses, options).to_a
118
+ super options.merge(brokers: brokers) # prepares #attributes
119
+
120
+ @producer = Connection::Producer.new(attributes)
121
+ @datasets = {}
122
+ end
123
+
124
+ # @!attribute [r] producer
125
+ #
126
+ # @return [ROM::Kafka::Producer] the producer's connection to Kafka brockers
127
+ #
128
+ attr_reader :producer
129
+
130
+ # Returns the registered dataset by topic
131
+ #
132
+ # @param [#to_sym] topic
133
+ #
134
+ # @return [ROM::Kafka::Dataset]
135
+ #
136
+ def [](topic)
137
+ @datasets[topic.to_sym]
138
+ end
139
+
140
+ # Registers the dataset by topic
141
+ #
142
+ # By default the dataset is registered with 0 partition and 0 offset.
143
+ # That settings can be changed from either relation of a command.
144
+ #
145
+ # @param [#to_sym] topic
146
+ #
147
+ # @return [self] itself
148
+ #
149
+ def dataset(topic)
150
+ @datasets[topic.to_sym] ||= Dataset.new(self, topic)
151
+ end
152
+
153
+ # Checks whether a dataset is registered by topic
154
+ #
155
+ # @param [#to_sym] topic
156
+ #
157
+ # @return [Boolean]
158
+ #
159
+ def dataset?(topic)
160
+ self[topic] ? true : false
161
+ end
162
+
163
+ end # class Gateway
164
+
165
+ end # module ROM::Kafka