turbine 1.0.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.rubocop.yml +15 -0
- data/.travis.yml +13 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +15 -0
- data/Guardfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +109 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +6 -0
- data/lib/turbine/batch.rb +28 -0
- data/lib/turbine/consumer/kafka.rb +31 -0
- data/lib/turbine/consumer.rb +5 -0
- data/lib/turbine/processor.rb +102 -0
- data/lib/turbine/rspec/kafka_helper.rb +85 -0
- data/lib/turbine/version.rb +4 -0
- data/lib/turbine.rb +11 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/turbine/batch_spec.rb +35 -0
- data/spec/turbine/consumer/kafka_spec.rb +54 -0
- data/spec/turbine/processor_spec.rb +97 -0
- data/spec/turbine_spec.rb +7 -0
- data/tasks/kafka.rake +55 -0
- data/tasks/rspec.rake +3 -0
- data/tasks/rubocop.rake +2 -0
- data/tasks/zookeeper.rake +63 -0
- data/turbine.gemspec +30 -0
- data/turbine.png +0 -0
- metadata +172 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 06c2b81075e5071fcc7ca1eaaab8c1e7edaaf787
|
4
|
+
data.tar.gz: 34a741dd15ec2e4a24d6e9b89a7710d222c4b758
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 24e0625293f5306c2f22615e5d6d2822536427bda3c811130de5fff9f60faf05b2348f6554fdfe7db58c3b163bba428cc4e0e2383f854a471d0d61fe12075f98
|
7
|
+
data.tar.gz: cbec4a21aa14a47fd9878fbe48e12d60aa20a910828532a37f70b0ee69bf9fc565d2fe8d2ab56827dcb6746812d6f10e9b33eb6143646aad9b9c08e6093251cf
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
|
4
|
+
|
5
|
+
We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, age, or religion.
|
6
|
+
|
7
|
+
Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.
|
8
|
+
|
9
|
+
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.
|
10
|
+
|
11
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
|
12
|
+
|
13
|
+
This Code of Conduct is adapted from the [Contributor Covenant](http:contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
|
data/Gemfile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
source "https://rubygems.org"
|
2
|
+
|
3
|
+
gem "colorize"
|
4
|
+
gem "poseidon_cluster", github: "bsm/poseidon_cluster"
|
5
|
+
|
6
|
+
group :development, :test do
|
7
|
+
gem "guard-rspec"
|
8
|
+
gem "rubocop"
|
9
|
+
end
|
10
|
+
|
11
|
+
group :test do
|
12
|
+
gem "coveralls", require: false
|
13
|
+
end
|
14
|
+
|
15
|
+
gemspec
|
data/Guardfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Tony Arcieri
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
![Turbine](https://raw.githubusercontent.com/tarcieri/turbine/master/turbine.png)
|
2
|
+
=======
|
3
|
+
[![Build Status](https://travis-ci.org/tarcieri/turbine.svg)](https://travis-ci.org/tarcieri/turbine)
|
4
|
+
[![Code Climate](https://codeclimate.com/github/tarcieri/turbine/badges/gpa.svg)](https://codeclimate.com/github/tarcieri/turbine)
|
5
|
+
[![Coverage Status](https://coveralls.io/repos/tarcieri/turbine/badge.svg)](https://coveralls.io/r/tarcieri/turbine)
|
6
|
+
|
7
|
+
Fault-tolerant multithreaded stream processing for Ruby.
|
8
|
+
|
9
|
+
Turbine is a perforance-oriented stream processing library built on Zookeeper.
|
10
|
+
It presently supports Kafka as a message queue, but is designed to be pluggable
|
11
|
+
in order to potentially support other message queues in the future.
|
12
|
+
|
13
|
+
Turbine is not a job queue and is missing most of the features you'd expect
|
14
|
+
from a job queue by design. Turbine is designed to be small, simple, and fast.
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
Add these lines to your application's Gemfile:
|
19
|
+
|
20
|
+
**NOTE:** Turbine relies on an unreleased version (0.3.0) of poseidon_cluster,
|
21
|
+
so you will also need to use the version off of GitHub for now.
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
gem "turbine"
|
25
|
+
gem "poseidon_cluster", github: "bsm/poseidon_cluster"
|
26
|
+
```
|
27
|
+
|
28
|
+
And then execute:
|
29
|
+
|
30
|
+
$ bundle
|
31
|
+
|
32
|
+
Or install it yourself as:
|
33
|
+
|
34
|
+
$ gem install turbine
|
35
|
+
|
36
|
+
## Usage
|
37
|
+
|
38
|
+
Turbine presently supports stream processing from the Kafka message queue
|
39
|
+
using the [poseidon_cluster](https://github.com/bsm/poseidon_cluster) gem,
|
40
|
+
which implements self-rebalancing Consumer Groups.
|
41
|
+
|
42
|
+
To create a new Kafka consumer for a topic, do the following:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
require "turbine"
|
46
|
+
require "turbine/consumer/kafka"
|
47
|
+
|
48
|
+
consumer = Turbine::Consumer::Kafka.new(
|
49
|
+
"my-group", # Group name
|
50
|
+
["kafka1.host:9092", "kafka2.host:9092"], # Kafka brokers
|
51
|
+
["zk1.host:2181", "zk2.host:2181"], # Zookeeper hosts
|
52
|
+
"my-topic" # Topic name
|
53
|
+
)
|
54
|
+
|
55
|
+
processor = Turbine::Processor.new(min_threads: 5, max_threads: 5, max_queue: 1000)
|
56
|
+
|
57
|
+
processor.process(consumer) do |msg|
|
58
|
+
...
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
## Error handling
|
63
|
+
|
64
|
+
By default, Turbine prints exceptions that occur during message processing to STDERR.
|
65
|
+
Chances are, you'd probably rather log them to an exception logging service.
|
66
|
+
|
67
|
+
After creating a processor object, you can configure a custom exception handler so
|
68
|
+
you can log these exceptions to a more appropriate place than STDERR:
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
processor = Turbine::Processor.new(min_threads: 5, max_threads: 5, max_queue: 1000)
|
72
|
+
|
73
|
+
processor.error_handler do |ex, _msg|
|
74
|
+
MyBugHandler.notify_or_ignore(ex)
|
75
|
+
end
|
76
|
+
|
77
|
+
processor.process(consumer) do |msg|
|
78
|
+
...
|
79
|
+
end
|
80
|
+
```
|
81
|
+
|
82
|
+
## Semantics (PLEASE READ)
|
83
|
+
|
84
|
+
Turbine automatically reschedules processing of messages in the stream in the event of faults or rebalancing of resources. Because of this, the same message may be received multiple times. Stream processing jobs written in Turbine MUST account for this.
|
85
|
+
|
86
|
+
An example of where things could go wrong is a "counter" job. Imagine we look for a particular event and increment a counter in statsd/memcached/redis. This will not give accurate numbers, because message replays will spuriously increment the counter.
|
87
|
+
|
88
|
+
The contract of Turbine is as follows:
|
89
|
+
|
90
|
+
* Turbine messages are guaranteed to be delivered AT LEAST once but Turbine MAY replay the same message many times
|
91
|
+
* Because of this, stream processing jobs written in Turbine MUST be idempotent (i.e. repeat processing of the same message is gracefully tolerated)
|
92
|
+
|
93
|
+
## Development
|
94
|
+
|
95
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
96
|
+
|
97
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
98
|
+
|
99
|
+
## Contributing
|
100
|
+
|
101
|
+
* Fork this repository on github
|
102
|
+
* Make your changes and send us a pull request
|
103
|
+
* If we like them we'll merge them
|
104
|
+
* If we've accepted a patch, feel free to ask for commit access
|
105
|
+
|
106
|
+
## License
|
107
|
+
|
108
|
+
Copyright (c) 2015 Tony Arcieri. Distributed under the MIT License. See
|
109
|
+
LICENSE.txt for further details.
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "turbine"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
module Turbine
|
2
|
+
# Batches of messages to be processed
|
3
|
+
class Batch
|
4
|
+
attr_reader :messages, :partition
|
5
|
+
|
6
|
+
def initialize(messages, partition)
|
7
|
+
@messages = messages.freeze
|
8
|
+
@partition = partition
|
9
|
+
@completed = Concurrent::AtomicBoolean.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def complete
|
13
|
+
@completed.value = true
|
14
|
+
end
|
15
|
+
|
16
|
+
def completed?
|
17
|
+
@completed.value
|
18
|
+
end
|
19
|
+
|
20
|
+
def [](n)
|
21
|
+
@messages.at(n)
|
22
|
+
end
|
23
|
+
|
24
|
+
def size
|
25
|
+
@messages.size
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "poseidon_cluster"
|
2
|
+
|
3
|
+
module Turbine
|
4
|
+
module Consumer
|
5
|
+
# Turbine consumer for the Kafka message queue
|
6
|
+
class Kafka
|
7
|
+
def initialize(*args)
|
8
|
+
@consumer = Poseidon::ConsumerGroup.new(*args)
|
9
|
+
end
|
10
|
+
|
11
|
+
def fetch
|
12
|
+
batch = nil
|
13
|
+
|
14
|
+
@consumer.fetch commit: false do |partition, messages|
|
15
|
+
batch = Batch.new(messages, partition)
|
16
|
+
end
|
17
|
+
|
18
|
+
batch
|
19
|
+
end
|
20
|
+
|
21
|
+
def commit(batch)
|
22
|
+
return if batch.messages.empty?
|
23
|
+
@consumer.commit batch.partition, batch.messages.last.offset + 1
|
24
|
+
end
|
25
|
+
|
26
|
+
def close
|
27
|
+
@consumer.close
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module Turbine
|
2
|
+
# Multithreaded message processor
|
3
|
+
class Processor
|
4
|
+
# How long to sleep when busy waiting for the queue to empty
|
5
|
+
BUSY_WAIT_INTERVAL = 0.0001
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
@running = Concurrent::AtomicBoolean.new
|
9
|
+
@pool = Concurrent::ThreadPoolExecutor.new(*args)
|
10
|
+
@completed_count = Concurrent::AtomicFixnum.new
|
11
|
+
@pending = []
|
12
|
+
@error_handler = proc do |ex|
|
13
|
+
STDERR.puts "*** Error processing message: #{ex.class} #{ex}\n#{ex.backtrace.join("\n")}"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def process(consumer, &block)
|
18
|
+
fail ArgumentError, "no block given" unless block
|
19
|
+
processor_method = method(:process_batch)
|
20
|
+
|
21
|
+
@running.value = true
|
22
|
+
while @running.value && (batch = consumer.fetch)
|
23
|
+
enqueue_batch(batch)
|
24
|
+
|
25
|
+
begin
|
26
|
+
@pool.post(batch, block, &processor_method)
|
27
|
+
rescue Concurrent::RejectedExecutionError
|
28
|
+
busy_wait(consumer)
|
29
|
+
retry
|
30
|
+
end
|
31
|
+
|
32
|
+
commit_completions(consumer)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def stop
|
37
|
+
@running.value = false
|
38
|
+
end
|
39
|
+
|
40
|
+
def drain(timeout = nil)
|
41
|
+
stop
|
42
|
+
@pool.shutdown
|
43
|
+
@pool.wait_for_termination(timeout)
|
44
|
+
end
|
45
|
+
|
46
|
+
def completed_count
|
47
|
+
@completed_count.value
|
48
|
+
end
|
49
|
+
|
50
|
+
def running?
|
51
|
+
@running.value
|
52
|
+
end
|
53
|
+
|
54
|
+
def error_handler(&block)
|
55
|
+
@error_handler = block
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def enqueue_batch(batch)
|
61
|
+
partition = @pending[batch.partition] ||= []
|
62
|
+
partition << batch
|
63
|
+
end
|
64
|
+
|
65
|
+
def commit_completions(consumer)
|
66
|
+
for partition in @pending
|
67
|
+
next unless partition
|
68
|
+
|
69
|
+
last_completed_batch = nil
|
70
|
+
while (batch = partition.first) && batch.completed?
|
71
|
+
last_completed_batch = partition.shift
|
72
|
+
end
|
73
|
+
|
74
|
+
consumer.commit(last_completed_batch) if last_completed_batch
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def process_batch(batch, block)
|
79
|
+
for index in (0...batch.size)
|
80
|
+
msg = batch[index]
|
81
|
+
|
82
|
+
begin
|
83
|
+
block.call(msg)
|
84
|
+
rescue => ex
|
85
|
+
@error_handler.call(ex, msg)
|
86
|
+
end
|
87
|
+
|
88
|
+
@completed_count.increment
|
89
|
+
end
|
90
|
+
|
91
|
+
batch.complete
|
92
|
+
end
|
93
|
+
|
94
|
+
def busy_wait(consumer)
|
95
|
+
commit_completions(consumer)
|
96
|
+
|
97
|
+
# We exceeded the pool's queue, so busy-wait and retry
|
98
|
+
# TODO: more intelligent busy-waiting strategy
|
99
|
+
sleep BUSY_WAIT_INTERVAL
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require "open3"
|
2
|
+
require "poseidon"
|
3
|
+
|
4
|
+
# Helper functions for integration testing with Kafka
|
5
|
+
module KafkaHelper
|
6
|
+
extend self
|
7
|
+
|
8
|
+
ZOOKEEPER_ADDR = "localhost:2181"
|
9
|
+
KAFKA_ADDR = "localhost:9092"
|
10
|
+
|
11
|
+
def delete_topic(topic)
|
12
|
+
log "*** Deleting Kafka topic: #{topic}"
|
13
|
+
|
14
|
+
topic_command :delete, topic: topic
|
15
|
+
end
|
16
|
+
|
17
|
+
def create_topic(topic)
|
18
|
+
log "*** Creating Kafka topic: #{topic}"
|
19
|
+
|
20
|
+
required_topic_command :create,
|
21
|
+
"replication-factor" => 1,
|
22
|
+
"partitions" => 1,
|
23
|
+
"topic" => topic
|
24
|
+
end
|
25
|
+
|
26
|
+
def list_topics
|
27
|
+
topic_command(:list).split("\n")
|
28
|
+
end
|
29
|
+
|
30
|
+
def topic_exists?(topic)
|
31
|
+
list_topics.include?(topic)
|
32
|
+
end
|
33
|
+
|
34
|
+
def fill_topic(topic, n = 100_000)
|
35
|
+
fail ArgumentError, "min messages is 1000" if n < 1000
|
36
|
+
|
37
|
+
producer = Poseidon::Producer.new([KAFKA_ADDR], "my_test_producer", type: :sync)
|
38
|
+
|
39
|
+
log "*** Filling topic with #{n} messages: #{topic}"
|
40
|
+
|
41
|
+
(n / 1000).times do |i|
|
42
|
+
messages = []
|
43
|
+
|
44
|
+
1000.times do |j|
|
45
|
+
n = (i * 1000 + j)
|
46
|
+
messages << Poseidon::MessageToSend.new(topic, n.to_s)
|
47
|
+
end
|
48
|
+
|
49
|
+
producer.send_messages(messages)
|
50
|
+
end
|
51
|
+
ensure
|
52
|
+
producer.close if producer
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def kafka_path
|
58
|
+
File.expand_path("../../../../kafka", __FILE__)
|
59
|
+
end
|
60
|
+
|
61
|
+
def kafka_topics_bin_path
|
62
|
+
"#{kafka_path}/bin/kafka-topics.sh"
|
63
|
+
end
|
64
|
+
|
65
|
+
def kafka_args(args = {})
|
66
|
+
{ zookeeper: ZOOKEEPER_ADDR }.merge(args).map { |k, v| "--#{k} #{v}" }.join(" ")
|
67
|
+
end
|
68
|
+
|
69
|
+
def topic_command(command, args = {})
|
70
|
+
cmd = "#{kafka_topics_bin_path} --#{command} #{kafka_args(args)}"
|
71
|
+
stdout_str, _stderr_str, status = Open3.capture3(cmd)
|
72
|
+
return unless status.success?
|
73
|
+
stdout_str
|
74
|
+
end
|
75
|
+
|
76
|
+
def required_topic_command(command, args = {})
|
77
|
+
result = topic_command(command, args)
|
78
|
+
fail "Kafka command failed!" unless result
|
79
|
+
true
|
80
|
+
end
|
81
|
+
|
82
|
+
def log(message)
|
83
|
+
STDERR.puts(message)
|
84
|
+
end
|
85
|
+
end
|
data/lib/turbine.rb
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
RSpec.describe Turbine::Batch do
|
4
|
+
let(:example_batch_size) { 13 }
|
5
|
+
let(:example_elements) { (0...13).to_a }
|
6
|
+
let(:example_partition) { 0 }
|
7
|
+
let(:example_batch) { described_class.new(example_elements, example_partition) }
|
8
|
+
|
9
|
+
it "creates batches from arrays" do
|
10
|
+
example_elements.size.times do |n|
|
11
|
+
expect(example_batch[n]).to eq example_elements[n]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
it "knows its size" do
|
16
|
+
expect(example_batch.size).to eq example_batch_size
|
17
|
+
end
|
18
|
+
|
19
|
+
it "knows its partition" do
|
20
|
+
expect(example_batch.partition).to eq example_partition
|
21
|
+
end
|
22
|
+
|
23
|
+
it "begins incomplete" do
|
24
|
+
expect(example_batch).not_to be_completed
|
25
|
+
end
|
26
|
+
|
27
|
+
it "can be completed" do
|
28
|
+
example_batch.complete
|
29
|
+
expect(example_batch).to be_completed
|
30
|
+
end
|
31
|
+
|
32
|
+
it "inspects" do
|
33
|
+
expect(example_batch.inspect).to include described_class.to_s
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
require "turbine/consumer/kafka"
|
3
|
+
require "turbine/rspec/kafka_helper"
|
4
|
+
require "benchmark"
|
5
|
+
|
6
|
+
RSpec.describe Turbine::Consumer::Kafka do
|
7
|
+
MESSAGE_COUNT = 100_000
|
8
|
+
|
9
|
+
let(:example_topic) { @example_topic }
|
10
|
+
|
11
|
+
def with_consumer
|
12
|
+
consumer = described_class.new(
|
13
|
+
"my-consumer-group",
|
14
|
+
["localhost:9092"],
|
15
|
+
["localhost:2181"],
|
16
|
+
example_topic
|
17
|
+
)
|
18
|
+
|
19
|
+
begin
|
20
|
+
yield consumer
|
21
|
+
ensure
|
22
|
+
consumer.close
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
before :all do
|
27
|
+
timestamp = Time.now.strftime("%Y%m%d%H%M%S%L")
|
28
|
+
|
29
|
+
@example_topic = "turbike-kafka-specs-#{timestamp}"
|
30
|
+
KafkaHelper.create_topic(@example_topic)
|
31
|
+
KafkaHelper.fill_topic(@example_topic, MESSAGE_COUNT)
|
32
|
+
end
|
33
|
+
|
34
|
+
after :all do
|
35
|
+
KafkaHelper.delete_topic(@example_topic)
|
36
|
+
end
|
37
|
+
|
38
|
+
it "fetches batches of messages" do
|
39
|
+
count = 0
|
40
|
+
with_consumer do |consumer|
|
41
|
+
rt = Benchmark.realtime do
|
42
|
+
while count < MESSAGE_COUNT
|
43
|
+
messages = consumer.fetch
|
44
|
+
count += messages.size
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
rate = "#{format('%.2f', count / rt)} msg/sec)"
|
49
|
+
STDERR.puts("*** Performance: #{count} messages in #{format('%.2f', rt)} seconds (#{rate})")
|
50
|
+
end
|
51
|
+
|
52
|
+
expect(count).to eq MESSAGE_COUNT
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
RSpec.describe Turbine::Processor do
|
4
|
+
MIN_THREAD_COUNT = 2
|
5
|
+
MAX_THREAD_COUNT = 16
|
6
|
+
QUEUE_SIZE = 100
|
7
|
+
|
8
|
+
let(:example_batch_size) { 100 }
|
9
|
+
let(:example_elements) { (0...example_batch_size).to_a }
|
10
|
+
let(:example_partition) { 0 }
|
11
|
+
let(:example_batch_count) { 1000 }
|
12
|
+
let(:example_message_count) { example_batch_size * example_batch_count }
|
13
|
+
|
14
|
+
let(:example_batches) do
|
15
|
+
Array.new(example_batch_count).fill do
|
16
|
+
Turbine::Batch.new(example_elements, example_partition)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
let(:mock_consumer) do
|
21
|
+
double(:consumer).tap do |consumer|
|
22
|
+
allow(consumer).to receive(:fetch).and_return(*example_batches, nil)
|
23
|
+
allow(consumer).to receive(:commit)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
let(:example_processor) do
|
28
|
+
described_class.new(
|
29
|
+
min_threads: MAX_THREAD_COUNT,
|
30
|
+
max_threads: MAX_THREAD_COUNT,
|
31
|
+
max_queue: QUEUE_SIZE
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "supports stopping the event loop" do
|
36
|
+
example_processor.stop
|
37
|
+
expect(example_processor.running?).to eq false
|
38
|
+
end
|
39
|
+
|
40
|
+
it "counts the number of messages processed" do
|
41
|
+
example_processor.process(mock_consumer) do |_msg|
|
42
|
+
# noop!
|
43
|
+
end
|
44
|
+
|
45
|
+
example_processor.drain
|
46
|
+
|
47
|
+
expect(example_processor.completed_count).to eq example_message_count
|
48
|
+
end
|
49
|
+
|
50
|
+
it "tolerates processing errors gracefully" do
|
51
|
+
# Check the default handler is printing to STDERR
|
52
|
+
expect(STDERR).to receive(:puts).exactly(example_message_count).times
|
53
|
+
|
54
|
+
example_processor.process(mock_consumer) do |_msg, _ex|
|
55
|
+
fail "uhoh!"
|
56
|
+
end
|
57
|
+
|
58
|
+
example_processor.drain
|
59
|
+
end
|
60
|
+
|
61
|
+
it "supports a custom error handler" do
|
62
|
+
handler_called = false
|
63
|
+
|
64
|
+
example_processor.error_handler do |_ex, _msg|
|
65
|
+
handler_called = true
|
66
|
+
end
|
67
|
+
|
68
|
+
example_processor.process(mock_consumer) do |_ex, _msg|
|
69
|
+
fail "uhoh!"
|
70
|
+
end
|
71
|
+
|
72
|
+
example_processor.drain
|
73
|
+
expect(handler_called).to eq true
|
74
|
+
end
|
75
|
+
|
76
|
+
context "message processing" do
|
77
|
+
MIN_THREAD_COUNT.upto(MAX_THREAD_COUNT) do |thread_count|
|
78
|
+
it "processes batches of messages with #{thread_count} threads" do
|
79
|
+
processor = described_class.new(
|
80
|
+
min_threads: thread_count,
|
81
|
+
max_threads: thread_count,
|
82
|
+
max_queue: QUEUE_SIZE
|
83
|
+
)
|
84
|
+
|
85
|
+
processor.process(mock_consumer) do |_msg|
|
86
|
+
# noop!
|
87
|
+
end
|
88
|
+
|
89
|
+
processor.drain
|
90
|
+
|
91
|
+
example_batches.each do |example_batch|
|
92
|
+
expect(example_batch).to be_completed
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
data/tasks/kafka.rake
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require "rake/clean"
|
2
|
+
require "colorize"
|
3
|
+
require "socket"
|
4
|
+
require "timeout"
|
5
|
+
|
6
|
+
KAFKA_PORT = 9092
|
7
|
+
START_TIMEOUT = 5
|
8
|
+
|
9
|
+
namespace :kafka do
|
10
|
+
KAFKA_VERSION = "0.8.2.1"
|
11
|
+
KAFKA_TARBALL = "kafka_2.10-#{KAFKA_VERSION}.tgz"
|
12
|
+
|
13
|
+
task download: "tmp/#{KAFKA_TARBALL}"
|
14
|
+
directory "tmp"
|
15
|
+
|
16
|
+
file "tmp/#{KAFKA_TARBALL}" => "tmp" do
|
17
|
+
puts "#{'***'.blue} #{'Downloading Kafka'.light_white}"
|
18
|
+
url = "https://www.apache.org/dist/kafka/#{KAFKA_VERSION}/kafka_2.10-#{KAFKA_VERSION}.tgz"
|
19
|
+
sh "curl #{url} -o tmp/#{KAFKA_TARBALL}"
|
20
|
+
end
|
21
|
+
|
22
|
+
task install: :download do
|
23
|
+
puts "#{'***'.blue} #{'Unpacking Kafka'.light_white}"
|
24
|
+
|
25
|
+
rm_rf "kafka" if File.exist? "kafka"
|
26
|
+
sh "tar -zxf tmp/#{KAFKA_TARBALL}"
|
27
|
+
mv "kafka_2.10-#{KAFKA_VERSION}", "kafka"
|
28
|
+
end
|
29
|
+
|
30
|
+
task start: %w(kafka zookeeper:start) do
|
31
|
+
puts "#{'***'.blue} #{'Starting Kafka'.light_white}"
|
32
|
+
sh "cd kafka && bin/kafka-server-start.sh config/server.properties &"
|
33
|
+
|
34
|
+
Timeout.timeout(START_TIMEOUT) do
|
35
|
+
begin
|
36
|
+
socket = TCPSocket.open("localhost", 9092)
|
37
|
+
rescue Errno::ECONNREFUSED
|
38
|
+
sleep 0.01
|
39
|
+
retry
|
40
|
+
end
|
41
|
+
|
42
|
+
socket.close
|
43
|
+
end
|
44
|
+
|
45
|
+
# Give Kafka some time to finish printing startup messages
|
46
|
+
sleep 0.5
|
47
|
+
puts "#{'***'.blue} #{'Kafka started!'.light_white}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
file "kafka" do
|
52
|
+
Rake::Task["kafka:install"].invoke
|
53
|
+
end
|
54
|
+
|
55
|
+
CLEAN.include "tmp", "kafka"
|
data/tasks/rspec.rake
ADDED
data/tasks/rubocop.rake
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require "rake/clean"
|
2
|
+
require "colorize"
|
3
|
+
|
4
|
+
def zookeeper_config(data)
|
5
|
+
<<-CONFIG
|
6
|
+
# Zookeeper configuration
|
7
|
+
|
8
|
+
# The number of milliseconds of each tick
|
9
|
+
tickTime=2000
|
10
|
+
# The number of ticks that the initial
|
11
|
+
# synchronization phase can take
|
12
|
+
initLimit=10
|
13
|
+
# The number of ticks that can pass between
|
14
|
+
# sending a request and getting an acknowledgement
|
15
|
+
syncLimit=5
|
16
|
+
# the directory where the snapshot is stored.
|
17
|
+
dataDir=#{data}
|
18
|
+
# the port at which the clients will connect
|
19
|
+
clientPort=2181
|
20
|
+
CONFIG
|
21
|
+
end
|
22
|
+
|
23
|
+
namespace :zookeeper do
|
24
|
+
ZK_VERSION = "3.4.6"
|
25
|
+
ZK_TARBALL = "zookeeper-#{ZK_VERSION}.tar.gz"
|
26
|
+
|
27
|
+
task download: "tmp/#{ZK_TARBALL}"
|
28
|
+
directory "tmp"
|
29
|
+
|
30
|
+
file "tmp/#{ZK_TARBALL}" => "tmp" do
|
31
|
+
puts "#{'***'.blue} #{'Downloading Zookeeper'.light_white}"
|
32
|
+
url = "https://archive.apache.org/dist/zookeeper/zookeeper-#{ZK_VERSION}/#{ZK_TARBALL}"
|
33
|
+
sh "curl #{url} -o tmp/#{ZK_TARBALL}"
|
34
|
+
end
|
35
|
+
|
36
|
+
task install: :download do
|
37
|
+
puts "#{'***'.blue} #{'Unpacking Zookeeper'.light_white}"
|
38
|
+
|
39
|
+
rm_rf "zookeeper" if File.exist? "zookeeper"
|
40
|
+
sh "tar -zxf tmp/#{ZK_TARBALL}"
|
41
|
+
mv "zookeeper-#{ZK_VERSION}", "zookeeper"
|
42
|
+
home = File.expand_path("../../zookeeper", __FILE__)
|
43
|
+
|
44
|
+
# Create base configuration
|
45
|
+
data = File.join(home, "data")
|
46
|
+
mkdir_p data
|
47
|
+
config = File.join(home, "conf", "zoo.cfg")
|
48
|
+
rm_r File.join(home, "conf", "zoo_sample.cfg")
|
49
|
+
|
50
|
+
File.open(config, "w") { |file| file << zookeeper_config(data) }
|
51
|
+
end
|
52
|
+
|
53
|
+
task start: :zookeeper do
|
54
|
+
puts "#{'***'.blue} #{'Starting Zookeeper'.light_white}"
|
55
|
+
sh "cd zookeeper && bin/zkServer.sh start"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
file "zookeeper" do
|
60
|
+
Rake::Task["zookeeper:install"].invoke
|
61
|
+
end
|
62
|
+
|
63
|
+
CLEAN.include "tmp", "zookeeper"
|
data/turbine.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "turbine/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "turbine"
|
8
|
+
spec.version = Turbine::VERSION
|
9
|
+
spec.authors = ["Tony Arcieri"]
|
10
|
+
spec.email = ["bascule@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "Fault-tolerant multithreaded stream processing for Ruby"
|
13
|
+
spec.description = "Turbine is a performance-oriented stream processor built on Zookeeper"
|
14
|
+
spec.homepage = "https://github.com/tarcieri/turbine"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_runtime_dependency "zk"
|
23
|
+
spec.add_runtime_dependency "poseidon", ">= 0.0.5"
|
24
|
+
spec.add_runtime_dependency "poseidon_cluster", ">= 0.3.0"
|
25
|
+
spec.add_runtime_dependency "concurrent-ruby"
|
26
|
+
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.9"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
29
|
+
spec.add_development_dependency "rspec", "~> 3.2"
|
30
|
+
end
|
data/turbine.png
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: turbine
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0.pre
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tony Arcieri
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: zk
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: poseidon
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.0.5
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.0.5
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: poseidon_cluster
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.3.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.3.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: concurrent-ruby
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: bundler
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.9'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.9'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '10.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '10.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '3.2'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '3.2'
|
111
|
+
description: Turbine is a performance-oriented stream processor built on Zookeeper
|
112
|
+
email:
|
113
|
+
- bascule@gmail.com
|
114
|
+
executables: []
|
115
|
+
extensions: []
|
116
|
+
extra_rdoc_files: []
|
117
|
+
files:
|
118
|
+
- ".gitignore"
|
119
|
+
- ".rspec"
|
120
|
+
- ".rubocop.yml"
|
121
|
+
- ".travis.yml"
|
122
|
+
- CODE_OF_CONDUCT.md
|
123
|
+
- Gemfile
|
124
|
+
- Guardfile
|
125
|
+
- LICENSE.txt
|
126
|
+
- README.md
|
127
|
+
- Rakefile
|
128
|
+
- bin/console
|
129
|
+
- bin/setup
|
130
|
+
- lib/turbine.rb
|
131
|
+
- lib/turbine/batch.rb
|
132
|
+
- lib/turbine/consumer.rb
|
133
|
+
- lib/turbine/consumer/kafka.rb
|
134
|
+
- lib/turbine/processor.rb
|
135
|
+
- lib/turbine/rspec/kafka_helper.rb
|
136
|
+
- lib/turbine/version.rb
|
137
|
+
- spec/spec_helper.rb
|
138
|
+
- spec/turbine/batch_spec.rb
|
139
|
+
- spec/turbine/consumer/kafka_spec.rb
|
140
|
+
- spec/turbine/processor_spec.rb
|
141
|
+
- spec/turbine_spec.rb
|
142
|
+
- tasks/kafka.rake
|
143
|
+
- tasks/rspec.rake
|
144
|
+
- tasks/rubocop.rake
|
145
|
+
- tasks/zookeeper.rake
|
146
|
+
- turbine.gemspec
|
147
|
+
- turbine.png
|
148
|
+
homepage: https://github.com/tarcieri/turbine
|
149
|
+
licenses:
|
150
|
+
- MIT
|
151
|
+
metadata: {}
|
152
|
+
post_install_message:
|
153
|
+
rdoc_options: []
|
154
|
+
require_paths:
|
155
|
+
- lib
|
156
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
157
|
+
requirements:
|
158
|
+
- - ">="
|
159
|
+
- !ruby/object:Gem::Version
|
160
|
+
version: '0'
|
161
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ">"
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: 1.3.1
|
166
|
+
requirements: []
|
167
|
+
rubyforge_project:
|
168
|
+
rubygems_version: 2.4.6
|
169
|
+
signing_key:
|
170
|
+
specification_version: 4
|
171
|
+
summary: Fault-tolerant multithreaded stream processing for Ruby
|
172
|
+
test_files: []
|