rom-kafka 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +9 -0
  4. data/.metrics +9 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +2 -0
  7. data/.travis.yml +34 -0
  8. data/.yardopts +3 -0
  9. data/CHANGELOG.md +3 -0
  10. data/Gemfile +7 -0
  11. data/Guardfile +14 -0
  12. data/LICENSE +21 -0
  13. data/README.md +83 -0
  14. data/Rakefile +34 -0
  15. data/config/metrics/STYLEGUIDE +230 -0
  16. data/config/metrics/cane.yml +5 -0
  17. data/config/metrics/churn.yml +6 -0
  18. data/config/metrics/flay.yml +2 -0
  19. data/config/metrics/metric_fu.yml +14 -0
  20. data/config/metrics/reek.yml +1 -0
  21. data/config/metrics/roodi.yml +24 -0
  22. data/config/metrics/rubocop.yml +71 -0
  23. data/config/metrics/saikuro.yml +3 -0
  24. data/config/metrics/simplecov.yml +6 -0
  25. data/config/metrics/yardstick.yml +37 -0
  26. data/lib/rom-kafka.rb +3 -0
  27. data/lib/rom/kafka.rb +29 -0
  28. data/lib/rom/kafka/brokers.rb +72 -0
  29. data/lib/rom/kafka/brokers/broker.rb +68 -0
  30. data/lib/rom/kafka/connection.rb +22 -0
  31. data/lib/rom/kafka/connection/consumer.rb +105 -0
  32. data/lib/rom/kafka/connection/producer.rb +114 -0
  33. data/lib/rom/kafka/create.rb +75 -0
  34. data/lib/rom/kafka/dataset.rb +132 -0
  35. data/lib/rom/kafka/gateway.rb +165 -0
  36. data/lib/rom/kafka/relation.rb +78 -0
  37. data/lib/rom/kafka/version.rb +13 -0
  38. data/rom-kafka.gemspec +33 -0
  39. data/spec/integration/basic_usage_spec.rb +58 -0
  40. data/spec/integration/keys_usage_spec.rb +34 -0
  41. data/spec/shared/scholars_topic.rb +28 -0
  42. data/spec/spec_helper.rb +20 -0
  43. data/spec/unit/brokers/broker_spec.rb +89 -0
  44. data/spec/unit/brokers_spec.rb +46 -0
  45. data/spec/unit/connection/consumer_spec.rb +90 -0
  46. data/spec/unit/connection/producer_spec.rb +79 -0
  47. data/spec/unit/create_spec.rb +79 -0
  48. data/spec/unit/dataset_spec.rb +165 -0
  49. data/spec/unit/gateway_spec.rb +171 -0
  50. data/spec/unit/relation_spec.rb +96 -0
  51. metadata +219 -0
@@ -0,0 +1,114 @@
1
+ # encoding: utf-8
2
+
3
+ module ROM::Kafka
4
+
5
+ class Connection
6
+
7
+ # The producer-specific connection to Kafka cluster
8
+ #
9
+ # It is wrapped around `Poseidon::Producer` driver, and responsible for
10
+ # adopting poseidon API to ROM::Gateway via [#initializer] and [#publish]
11
+ # methods.
12
+ #
13
+ # ROM::Kafka producer deals with tuples, hiding poseidon-specific
14
+ # implementation of messages from the rest of the gem.
15
+ #
16
+ # @api private
17
+ #
18
+ class Producer < Connection
19
+
20
+ # The 'poseidon' class describing a producer
21
+ #
22
+ # @return [Class]
23
+ #
24
+ DRIVER = Poseidon::Producer
25
+
26
+ # The 'poseidon' class describing a message acceptable by producer
27
+ #
28
+ # @return [Class]
29
+ #
30
+ MESSAGE = Poseidon::MessageToSend
31
+
32
+ # Attributes, acceptable by the `Poseidon::Producer` driver
33
+ attribute :partitioner
34
+ attribute :type, default: :sync
35
+ attribute :compression_codec
36
+ attribute :metadata_refresh_interval_ms
37
+ attribute :max_send_retries
38
+ attribute :retry_backoff_ms
39
+ attribute :required_acks
40
+ attribute :ack_timeout_ms
41
+ attribute :socket_timeout_ms
42
+
43
+ # @!attribute [r] connection
44
+ #
45
+ # @return [ROM::Kafka::Connections::Producer::DRIVER] driver to Kafka
46
+ #
47
+ attr_reader :connection
48
+
49
+ # Initializes a producer connection
50
+ #
51
+ # The initializer is attributes-agnostic. This means it doesn't validate
52
+ # attributes, but skips unused.
53
+ #
54
+ # @option options [#to_s] :client_id
55
+ # A required unique id used to indentify the Kafka client.
56
+ # @option options [Array<String>] :brokers
57
+ # A list of seed brokers to find a lead broker to fetch messages from.
58
+ # @option options [Proc, nil] :partitioner
59
+ # A proc used to provide partition from given key.
60
+ # @option options [:gzip, :snappy, nil] :compression_codec (nil)
61
+ # Type of compression to be used.
62
+ # @option options [Integer] :metadata_refresh_interval_ms (600_000)
63
+ # How frequently the topic metadata should be updated (in milliseconds).
64
+ # @option options [Integer] :max_send_retries (3)
65
+ # Number of times to retry sending of messages to a leader.
66
+ # @option options [Integer] :retry_backoff_ms (100)
67
+ # An amount of time (in milliseconds) to wait before refreshing
68
+ # the metadata after we are unable to send messages.
69
+ # @option options [Integer] :required_acks (0)
70
+ # A number of acks required per request.
71
+ # @option options [Integer] :ack_timeout_ms (1_500)
72
+ # How long the producer waits for acks.
73
+ # @option options [Integer] :socket_timeout_ms (10_000)
74
+ # How long the producer/consumer socket waits for any reply from server.
75
+ #
76
+ def initialize(options)
77
+ super # takes declared attributes only, skipping brokers and client_id
78
+ brokers = options.fetch(:brokers)
79
+ client = options.fetch(:client_id)
80
+ @connection = DRIVER.new(brokers, client, attributes)
81
+ end
82
+
83
+ # Sends tuples to the underlying connection
84
+ #
85
+ # Stringifies non-empty hash values to conform to 'poseidon' API.
86
+ #
87
+ # @param [Array<Hash>] data
88
+ #
89
+ # @return [Array<Hash{Symbol => String, nil}>]
90
+ # The list of published tuples
91
+ #
92
+ def publish(*data)
93
+ tuples = data.flatten.map(&method(:stringify_keys))
94
+ @connection.send_messages tuples.map(&method(:message))
95
+
96
+ tuples
97
+ end
98
+
99
+ private
100
+
101
+ def stringify_keys(tuple)
102
+ keys = [:value, :topic, :key]
103
+ Hash[keys.zip(tuple.values_at(*keys).map { |v| v.to_s if v })]
104
+ end
105
+
106
+ def message(tuple)
107
+ MESSAGE.new(*tuple.values_at(:topic, :value, :key))
108
+ end
109
+
110
+ end # class Producer
111
+
112
+ end # class Connection
113
+
114
+ end # module ROM::Kafka
@@ -0,0 +1,75 @@
1
+ # encoding: utf-8
2
+
3
+ module ROM::Kafka
4
+
5
+ # The namespace for Kafka-specific ROM commands
6
+ #
7
+ module Commands
8
+
9
+ # The Kafka-specific implementation of ROM::Commands::Create
10
+ #
11
+ # @example
12
+ # ROM.use(:auto_registration)
13
+ # ROM.setup(:kafka, "localhost:9092")
14
+ #
15
+ # class Users < ROM::Relation[:kafka]
16
+ # dataset :users
17
+ # end
18
+ #
19
+ # class GreetUsers < ROM::Commands::Create[:kafka]
20
+ # relation :users
21
+ # register_as :greet
22
+ # end
23
+ #
24
+ # rom = ROM.finalize.env
25
+ # greet = rom.commands(:users).greet
26
+ # greet.with(key: "greetings").call "Hi!"
27
+ # # => [{ value: "Hi!", topic: "users", key: "greetings" }]
28
+ #
29
+ class Create < ROM::Commands::Create
30
+
31
+ adapter :kafka
32
+ option :key, reader: true
33
+
34
+ # Sends messages to the current topic/partition of Kafka
35
+ #
36
+ # @param [#to_s, Array<#to_s>] messages
37
+ #
38
+ # @return [Array<Hash>]
39
+ #
40
+ def execute(*messages)
41
+ tuples = messages.flatten.map(&method(:tuple))
42
+ producer.publish(*tuples)
43
+ end
44
+
45
+ # Returns a new command where `:key` option is updated
46
+ #
47
+ # @param [Hash] options
48
+ # @options options [Object] :key
49
+ # The key to be used by Kafka to define a partition
50
+ #
51
+ # @return [ROM::Kafka::Commands::Create]
52
+ #
53
+ def with(options)
54
+ self.class.new relation, key: options.fetch(:key)
55
+ end
56
+
57
+ private
58
+
59
+ def producer
60
+ dataset.producer
61
+ end
62
+
63
+ def dataset
64
+ relation.dataset
65
+ end
66
+
67
+ def tuple(text)
68
+ { value: text.to_s, topic: dataset.topic, key: key }
69
+ end
70
+
71
+ end # class Create
72
+
73
+ end # module Commands
74
+
75
+ end # module ROM::Kafka
@@ -0,0 +1,132 @@
1
+ # encoding: utf-8
2
+
3
+ module ROM::Kafka
4
+
5
+ # The dataset describes Kafka topic
6
+ #
7
+ # @api private
8
+ #
9
+ class Dataset
10
+
11
+ extend AttributesDSL
12
+ include Enumerable
13
+
14
+ # Customizable attributes for a consumer connection
15
+ attribute :partition, default: 0
16
+ attribute :offset, default: 0
17
+ attribute :limit, default: 0
18
+ attribute :min_bytes
19
+ attribute :max_bytes
20
+ attribute :max_wait_ms
21
+
22
+ # @!attribute [r] gateway
23
+ #
24
+ # @return [ROM::Kafka::Gateway]
25
+ # The back reference to the gateway, that provided the dataset
26
+ #
27
+ attr_reader :gateway
28
+
29
+ # @!attribute [r] topic
30
+ #
31
+ # @return [String] The name of the topic, described by the dataset
32
+ #
33
+ attr_reader :topic
34
+
35
+ # @!attribute [r] producer
36
+ #
37
+ # @return [ROM::Kafka::Connection::Producer]
38
+ # The producer connection to Kafka brokers, defined by a gateway.
39
+ # It is stored for being used by a `Create` command.
40
+ #
41
+ attr_reader :producer
42
+
43
+ # @!attribute [r] consumer
44
+ #
45
+ # @return [ROM::Kafka::Connection::Consumer]
46
+ # The consumer connection to Kafka brokers, used to fetch messages
47
+ # via [#each] method call.
48
+ #
49
+ attr_reader :consumer
50
+
51
+ # Initializes the dataset with a gateway and topic name
52
+ #
53
+ # Attributes are set by default from a gateway. Later you can create
54
+ # new dataset for the same gateway and topic, but with attributes,
55
+ # updated via [#using] method.
56
+ #
57
+ # @param [ROM::Kafka::Gateway] gateway
58
+ # @param [String] topic
59
+ #
60
+ # @option options [Integer] :partition (0)
61
+ # A partition number to fetch messages from
62
+ # @option options [Integer] :offset (0)
63
+ # An initial offset to start fetching from.
64
+ # @option options [Integer] :min_bytes (1)
65
+ # A smallest amount of data the server should send.
66
+ # (By default send us data as soon as it is ready).
67
+ # @option options [Integer] :max_bytes (1_048_576)
68
+ # A maximum number of bytes to fetch by consumer (1MB by default).
69
+ # @option options [Integer] :max_wait_ms (100)
70
+ # How long to block until the server sends data.
71
+ # NOTE: This is only enforced if min_bytes is > 0.
72
+ #
73
+ def initialize(gateway, topic, options = {})
74
+ super gateway.attributes.merge(options)
75
+ @topic = topic.to_s
76
+ @gateway = gateway
77
+ @producer = gateway.producer
78
+ @consumer = prepare_consumer
79
+ end
80
+
81
+ # Returns a new dataset with updated consumer attributes
82
+ #
83
+ # @param [Hash] options The part of attributes to be updated
84
+ #
85
+ # @return [ROM::Kafka::Dataset]
86
+ #
87
+ def using(options)
88
+ self.class.new(gateway, topic, attributes.merge(options))
89
+ end
90
+
91
+ # Returns the enumerator to iterate via tuples, fetched from a [#consumer].
92
+ #
93
+ # If a `limit` of messages is set, iterator stops after achieving it.
94
+ #
95
+ # @param [Proc] block
96
+ #
97
+ # @yieldparam [Hash] tuple
98
+ #
99
+ # @return [Enumerator<Hash{Symbol => String, Integer}>]
100
+ #
101
+ def each(&block)
102
+ return to_enum unless block_given?
103
+ limit.equal?(0) ? unlimited_each(&block) : limited_each(&block)
104
+ end
105
+
106
+ private
107
+
108
+ def prepare_consumer
109
+ Connection::Consumer.new consumer_options
110
+ end
111
+
112
+ def consumer_options
113
+ attributes.merge(
114
+ topic: topic,
115
+ client_id: gateway.client_id,
116
+ brokers: gateway.brokers
117
+ )
118
+ end
119
+
120
+ def unlimited_each
121
+ enum = consumer.each
122
+ loop { yield(enum.next) }
123
+ end
124
+
125
+ def limited_each
126
+ enum = consumer.each
127
+ limit.times { yield(enum.next) }
128
+ end
129
+
130
+ end # class Dataset
131
+
132
+ end # module ROM::Kafka
@@ -0,0 +1,165 @@
1
+ # encoding: utf-8
2
+
3
+ module ROM::Kafka
4
+
5
+ # Describes the gateway to Kafka
6
+ #
7
+ # The gateway has 3 responsibilities:
8
+ # - registers the datasets describing various topics and partitions
9
+ # - instantiates the producer connection to Kafka brokers that doesn't
10
+ # depend on a specific topic/partition settings
11
+ # - stores settings for the consumer connections to Kafka, that
12
+ # depends on a specific topic/partition/offset
13
+ #
14
+ # Every dataset uses the same producer connection (defined by gateway) and
15
+ # individual consumer's one. The consumer's connection is reloaded
16
+ # every time the topic, partition or current offset is changed by a relation.
17
+ #
18
+ class Gateway < ROM::Gateway
19
+
20
+ extend AttributesDSL
21
+
22
+ # Attributes used by both producer and consumer
23
+ attribute :client_id, required: true, &:to_s
24
+ attribute :brokers
25
+
26
+ # Producer-specific attributes
27
+ attribute :partitioner
28
+ attribute :compression_codec
29
+ attribute :metadata_refresh_interval_ms, default: 600_000
30
+ attribute :max_send_retries, default: 3
31
+ attribute :retry_backoff_ms, default: 100
32
+ attribute :required_acks, default: 0
33
+ attribute :ack_timeout_ms, default: 1_500
34
+ attribute :socket_timeout_ms, default: 10_000
35
+
36
+ # Consumer-specific attributes
37
+ attribute :min_bytes, default: 1
38
+ attribute :max_bytes, default: 1_048_576
39
+ attribute :max_wait_ms, default: 100
40
+
41
+ # Initializes the gateway to Kafka broker(s).
42
+ #
43
+ # The initializer is attributes-agnostic. This means it doesn't validate
44
+ # attributes, but skips unused.
45
+ #
46
+ # @example Initialize a producer's gateway to Kafka
47
+ # gateway = Gateway.new(
48
+ # hosts: ["127.0.0.1", "127.0.0.2:9093"],
49
+ # port: 9092,
50
+ # client_id: :my_user,
51
+ # compression-codec: :gzip
52
+ # )
53
+ # gateway.brokers # => ["127.0.0.1:9092", "127.0.0.2:9093"]
54
+ #
55
+ # @example Alternative syntax
56
+ # gateway = Gateway.new(
57
+ # "127.0.0.1:9092",
58
+ # "127.0.0.2:9093",
59
+ # client_id: :my_user,
60
+ # compression-codec: :gzip
61
+ # )
62
+ # gateway.brokers # => ["127.0.0.1:9092", "127.0.0.2:9093"]
63
+ #
64
+ # @example Mixed syntax
65
+ # gateway = Gateway.new(
66
+ # "127.0.0.1:9092",
67
+ # hosts: ["127.0.0.2"]
68
+ # port: 9093,
69
+ # client_id: :my_user,
70
+ # min_bytes: 1024 # wait until 1Kb of messages is prepared
71
+ # )
72
+ # gateway.brokers # => ["127.0.0.1:9092", "127.0.0.2:9093"]
73
+ #
74
+ # @param [nil, String, Array<String>] addresses
75
+ # The address(es) of broker(s) to connect (optional).
76
+ # Brokers can be alternatively set with `:hosts` and `:port` options.
77
+ #
78
+ # @option options [#to_s] :client_id
79
+ # A required unique id used to indentify the Kafka client.
80
+ # @option options [String, Array<String>] :hosts
81
+ # A host or list of hosts in the form "host1:port1" or "host1".
82
+ # In case of a consumer, only the first host is actually used.
83
+ # @option options [Integer] :port
84
+ # The port shared by all hosts.
85
+ #
86
+ # @option options [Proc, nil] :partitioner
87
+ # A proc used to provide partition from given key.
88
+ # @option options [:gzip, :snappy, nil] :compression_codec (nil)
89
+ # Type of compression to be used.
90
+ # @option options [Integer] :metadata_refresh_interval_ms (600_000)
91
+ # How frequently the topic metadata should be updated (in milliseconds).
92
+ # @option options [Integer] :max_send_retries (3)
93
+ # Number of times to retry sending of messages to a leader.
94
+ # @option options [Integer] :retry_backoff_ms (100)
95
+ # An amount of time (in milliseconds) to wait before refreshing
96
+ # the metadata after we are unable to send messages.
97
+ # @option options [Integer] :required_acks (0)
98
+ # A number of acks required per request.
99
+ # @option options [Integer] :ack_timeout_ms (1_500)
100
+ # How long the producer waits for acks.
101
+ # @option options [Integer] :socket_timeout_ms (10_000)
102
+ # How long the producer/consumer socket waits for any reply from server.
103
+ #
104
+ # @option options [Integer] :offset
105
+ # An initial offset to start fetching from.
106
+ # @option options [Integer] :min_bytes (1)
107
+ # A smallest amount of data the server should send.
108
+ # (By default send us data as soon as it is ready).
109
+ # @option options [Integer] :max_bytes (1_048_576)
110
+ # A maximum number of bytes to fetch by consumer (1MB by default).
111
+ # @option options [Integer] :max_wait_ms (100)
112
+ # How long to block until the server sends data.
113
+ # NOTE: This is only enforced if min_bytes is > 0.
114
+ #
115
+ def initialize(*addresses)
116
+ options = Hash[addresses.pop]
117
+ brokers = Brokers.new(addresses, options).to_a
118
+ super options.merge(brokers: brokers) # prepares #attributes
119
+
120
+ @producer = Connection::Producer.new(attributes)
121
+ @datasets = {}
122
+ end
123
+
124
+ # @!attribute [r] producer
125
+ #
126
+ # @return [ROM::Kafka::Producer] the producer's connection to Kafka brockers
127
+ #
128
+ attr_reader :producer
129
+
130
+ # Returns the registered dataset by topic
131
+ #
132
+ # @param [#to_sym] topic
133
+ #
134
+ # @return [ROM::Kafka::Dataset]
135
+ #
136
+ def [](topic)
137
+ @datasets[topic.to_sym]
138
+ end
139
+
140
+ # Registers the dataset by topic
141
+ #
142
+ # By default the dataset is registered with 0 partition and 0 offset.
143
+ # That settings can be changed from either relation of a command.
144
+ #
145
+ # @param [#to_sym] topic
146
+ #
147
+ # @return [self] itself
148
+ #
149
+ def dataset(topic)
150
+ @datasets[topic.to_sym] ||= Dataset.new(self, topic)
151
+ end
152
+
153
+ # Checks whether a dataset is registered by topic
154
+ #
155
+ # @param [#to_sym] topic
156
+ #
157
+ # @return [Boolean]
158
+ #
159
+ def dataset?(topic)
160
+ self[topic] ? true : false
161
+ end
162
+
163
+ end # class Gateway
164
+
165
+ end # module ROM::Kafka