telekinesis 2.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +2 -0
  5. data/README.md +401 -0
  6. data/Rakefile +111 -0
  7. data/ext/.gitignore +3 -0
  8. data/ext/pom.xml +63 -0
  9. data/ext/pom.xml.template +65 -0
  10. data/ext/src/main/java/com/kickstarter/jruby/Telekinesis.java +103 -0
  11. data/lib/telekinesis/aws/client_adapter.rb +61 -0
  12. data/lib/telekinesis/aws/java_client_adapter.rb +72 -0
  13. data/lib/telekinesis/aws/ruby_client_adapter.rb +40 -0
  14. data/lib/telekinesis/aws.rb +9 -0
  15. data/lib/telekinesis/consumer/base_processor.rb +12 -0
  16. data/lib/telekinesis/consumer/block.rb +22 -0
  17. data/lib/telekinesis/consumer/distributed_consumer.rb +114 -0
  18. data/lib/telekinesis/consumer.rb +3 -0
  19. data/lib/telekinesis/java_util.rb +46 -0
  20. data/lib/telekinesis/logging/java_logging.rb +18 -0
  21. data/lib/telekinesis/logging/ruby_logger_handler.rb +54 -0
  22. data/lib/telekinesis/producer/async_producer.rb +157 -0
  23. data/lib/telekinesis/producer/async_producer_worker.rb +110 -0
  24. data/lib/telekinesis/producer/noop_failure_handler.rb +12 -0
  25. data/lib/telekinesis/producer/sync_producer.rb +52 -0
  26. data/lib/telekinesis/producer/warn_failure_handler.rb +25 -0
  27. data/lib/telekinesis/producer.rb +4 -0
  28. data/lib/telekinesis/telekinesis-2.0.0.jar +0 -0
  29. data/lib/telekinesis/version.rb +3 -0
  30. data/lib/telekinesis.rb +14 -0
  31. data/telekinesis.gemspec +21 -0
  32. data/test/aws/test_client_adapter.rb +29 -0
  33. data/test/aws/test_java_client_adapter.rb +72 -0
  34. data/test/producer/test_async_producer.rb +158 -0
  35. data/test/producer/test_async_producer_worker.rb +390 -0
  36. data/test/producer/test_helper.rb +1 -0
  37. data/test/producer/test_sync_producer.rb +144 -0
  38. data/test/test_helper.rb +6 -0
  39. metadata +149 -0
@@ -0,0 +1,103 @@
1
+ package com.kickstarter.jruby;
2
+
3
+ import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer;
4
+ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;
5
+ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker;
6
+ import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason;
7
+ import com.amazonaws.services.kinesis.model.Record;
8
+
9
+ import java.util.List;
10
+
11
+ /**
12
+ * A shim that makes it possible to use the Kinesis Client Library from JRuby.
13
+ * Without the shim, {@code initialize} method in
14
+ * {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor}
15
+ * conflicts with the special {@code initialize} method in Ruby. The shim
16
+ * interface renames {@code initialize} to {@code init}.
17
+ * <p />
18
+ *
19
+ * For convenience a {@link #newWorker(KinesisClientLibConfiguration, IRecordProcessorFactory)}
20
+ * method is provided, so you can use closure conversion in JRuby to specify an
21
+ * {@link IRecordProcessorFactory}. For example
22
+ *
23
+ * <p />
24
+ *
25
+ * <pre>
26
+ * some_thing = ...
27
+ *
28
+ * com.kickstarter.jruby.Telekinesis.new_worker(my_config) do
29
+ * MyRecordProcessor.new(some_thing, some_other_thing)
30
+ * end
31
+ * </pre>
32
+ */
33
+ public class Telekinesis {
34
+ /**
35
+ * Create a new KCL {@link Worker} that processes records using the given
36
+ * {@link IRecordProcessorFactory}.
37
+ */
38
+ public static Worker newWorker(final KinesisClientLibConfiguration config, final IRecordProcessorFactory factory) {
39
+ return new Worker(new com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory() {
40
+ @Override
41
+ public com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor createProcessor() {
42
+ return new RecordProcessorShim(factory.createProcessor());
43
+ }
44
+ }, config);
45
+ }
46
+
47
+ // ========================================================================
48
+ /**
49
+ * A shim that wraps a {@link IRecordProcessor} so it can get used by the KCL.
50
+ */
51
+ private static class RecordProcessorShim implements com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor {
52
+ private final IRecordProcessor underlying;
53
+
54
+ public RecordProcessorShim(final IRecordProcessor underlying) { this.underlying = underlying; }
55
+
56
+ @Override
57
+ public void initialize(final String shardId) {
58
+ underlying.init(shardId);
59
+ }
60
+
61
+ @Override
62
+ public void processRecords(final List<Record> records, final IRecordProcessorCheckpointer checkpointer) {
63
+ underlying.processRecords(records, checkpointer);
64
+ }
65
+
66
+ @Override
67
+ public void shutdown(final IRecordProcessorCheckpointer checkpointer, final ShutdownReason reason) {
68
+ underlying.shutdown(checkpointer, reason);
69
+ }
70
+ }
71
+
72
+ /**
73
+ * A parallel {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor}
74
+ * that avoids naming conflicts with reserved words in Ruby.
75
+ */
76
+ public static interface IRecordProcessor {
77
+ /**
78
+ * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor#initialize(String)
79
+ */
80
+ void init(final String shardId);
81
+
82
+ /**
83
+ * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor#processRecords(List, IRecordProcessorCheckpointer)
84
+ */
85
+ void processRecords(List<Record> records, IRecordProcessorCheckpointer checkpointer);
86
+
87
+ /**
88
+ * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor#shutdown(IRecordProcessorCheckpointer, ShutdownReason)
89
+ */
90
+ void shutdown(IRecordProcessorCheckpointer checkpointer, ShutdownReason reason);
91
+ }
92
+
93
+ /**
94
+ * A parallel {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory}
95
+ * for {@link IRecordProcessor}.
96
+ */
97
+ public static interface IRecordProcessorFactory {
98
+ /**
99
+ * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory#createProcessor()
100
+ */
101
+ IRecordProcessor createProcessor();
102
+ }
103
+ }
@@ -0,0 +1,61 @@
1
+ module Telekinesis
2
+ module Aws
3
+ # NOTE: wrapping the cause is necessary since JRuby isn't 2.1 compatible (yet)
4
+ class KinesisError < RuntimeError
5
+ attr_reader :cause
6
+
7
+ def initialize(cause)
8
+ @cause = cause
9
+ end
10
+ end
11
+
12
+ # Base class for other ClientAdapters. Client adapters exist to make
13
+ # switching between platforms easy and painless.
14
+ #
15
+ # The base adapter defines the interface and provides convience methods.
16
+ class ClientAdapter
17
+ # Build a new client given AWS credentials.
18
+ #
19
+ # Credentials must be supplied as a hash that contains symbolized
20
+ # :access_key_id and :secret_access_key keys.
21
+ def self.build(credentials)
22
+ raise NotImplementedError
23
+ end
24
+
25
+ def initialize(client)
26
+ @client = client
27
+ end
28
+
29
+ # Make a put_record call to the underlying client. Must return an object
30
+ # that responds to `shard_id` and `sequence_number`.
31
+ def put_record(stream, key, value)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ # Make a put_records call to the underlying client. If the request
36
+ # succeeds but returns errors for some records, the original [key, value]
37
+ # pair is zipped with the [error_code, error_message] pair and the
38
+ # offending records are returned.
39
+ def put_records(stream, items)
40
+ response = do_put_records(stream, items)
41
+ failures = items.zip(response).reject{|_, r| r.error_code.nil?}
42
+
43
+ failures.map do |(k, v), r|
44
+ [k, v, r.error_code, r.error_message]
45
+ end
46
+ end
47
+
48
+ protected
49
+
50
+ # Put an enumerable of [key, value] pairs to the given stream. Returns an
51
+ # enumerable of response objects the same size as the given list of items.
52
+ #
53
+ # Response objects must respond to `error_code` and `error_message`. Any
54
+ # response with a nil error_code is considered successful.
55
+ def do_put_records(stream, items)
56
+ raise NotImplementedError
57
+ end
58
+ end
59
+ end
60
+ end
61
+
@@ -0,0 +1,72 @@
1
+ module Telekinesis
2
+ module Aws
3
+ java_import java.nio.ByteBuffer
4
+ java_import com.amazonaws.AmazonClientException
5
+ java_import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
6
+ java_import com.amazonaws.services.kinesis.AmazonKinesisClient
7
+ java_import com.amazonaws.services.kinesis.model.PutRecordRequest
8
+ java_import com.amazonaws.services.kinesis.model.PutRecordsRequest
9
+ java_import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry
10
+
11
+ # A ClientAdapter that wraps the AWS Java SDK.
12
+ #
13
+ # Since the underlying Java client is thread safe, this adapter is thread
14
+ # safe.
15
+ class JavaClientAdapter < ClientAdapter
16
+ # Build a new client adapter. `credentials` is a hash keyed with
17
+ # `:access_key_id` and `:secret_access_key`. If this hash is left blank
18
+ # (the default) the client uses the DefaultAWSCredentialsProviderChain to
19
+ # look for credentials.
20
+ def self.build(credentials = {})
21
+ client = AmazonKinesisClient.new(build_credentials_provider(credentials))
22
+ new(client)
23
+ end
24
+
25
+ def self.build_credentials_provider(credentials)
26
+ if credentials.empty?
27
+ DefaultAWSCredentialsProviderChain.new
28
+ else
29
+ StaticCredentialsProvider.new(
30
+ BasicAWSCredentials.new(
31
+ credentials[:access_key_id],
32
+ credentials[:secret_access_key]
33
+ )
34
+ )
35
+ end
36
+ end
37
+
38
+ def put_record(stream, key, value)
39
+ r = PutRecordRequest.new.tap do |request|
40
+ request.stream_name = stream
41
+ request.partition_key = key.to_s
42
+ request.data = ByteBuffer.wrap(value.to_s.to_java_bytes)
43
+ end
44
+ @client.put_record(r)
45
+ rescue AmazonClientException => e
46
+ raise KinesisError.new(e)
47
+ end
48
+
49
+ protected
50
+
51
+ def do_put_records(stream, items)
52
+ result = @client.put_records(build_put_records_request(stream, items))
53
+ result.records
54
+ rescue AmazonClientException => e
55
+ raise KinesisError.new(e)
56
+ end
57
+
58
+ def build_put_records_request(stream, items)
59
+ entries = items.map do |key, value|
60
+ PutRecordsRequestEntry.new.tap do |entry|
61
+ entry.partition_key = key.to_s
62
+ entry.data = ByteBuffer.wrap(value.to_s.to_java_bytes)
63
+ end
64
+ end
65
+ PutRecordsRequest.new.tap do |request|
66
+ request.stream_name = stream
67
+ request.records = entries
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,40 @@
1
+ module Telekinesis
2
+ module Aws
3
+ # A ClientAdapter that wraps the ruby aws-sdk gem (version 2).
4
+ #
5
+ # Since the aws-sdk gem does not appear to be thread-safe, this adapter
6
+ # should not be considered thread safe.
7
+ class RubyClientAdapter < ClientAdapter
8
+ # Build a new client adapter. Credentials are passed directly to the
9
+ # constructor for Aws::Kinesis::Client.
10
+ #
11
+ # See: http://docs.aws.amazon.com/sdkforruby/api/Aws/Kinesis/Client.html
12
+ def self.build(credentials)
13
+ new(Aws::Kinesis::Client.new(credentials))
14
+ end
15
+
16
+ def put_record(stream, key, value)
17
+ @client.put_record(stream: stream, partition_key: key, data: value)
18
+ rescue Aws::Errors::ServiceError => e
19
+ raise KinesisError.new(e)
20
+ end
21
+
22
+ protected
23
+
24
+ def do_put_records(stream, items)
25
+ @client.put_records(build_put_records_request(stream, items)).flat_map do |page|
26
+ page.records
27
+ end
28
+ rescue Aws::Errors::ServiceError => e
29
+ raise KinesisError.new(e)
30
+ end
31
+
32
+ def build_put_records_request(stream, items)
33
+ {
34
+ stream: stream,
35
+ records: items.map{|k, v| {partition_key: k, data: v}}
36
+ }
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,9 @@
1
+ require "telekinesis/aws/client_adapter.rb"
2
+ require "telekinesis/aws/java_client_adapter"
3
+
4
+ module Telekinesis
5
+ module Aws
6
+ KINESIS_MAX_PUT_RECORDS_SIZE = 500
7
+ Client = JavaClientAdapter
8
+ end
9
+ end
@@ -0,0 +1,12 @@
1
+ module Telekinesis
2
+ module Consumer
3
+ # A RecordProcessor with no-op implementations of all of the required
4
+ # IRecordProcessor methods. Override it to implement simple IRecordProcessors
5
+ # that don't need to do anything special on init or shutdown.
6
+ class BaseProcessor
7
+ def init(shard_id); end
8
+ def process_records(records, checkpointer); end
9
+ def shutdown(checkpointer, reason); end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,22 @@
1
+ module Telekinesis
2
+ module Consumer
3
+ # A RecordProcessor that uses the given block to process records. Useful to
4
+ # quickly define a consumer.
5
+ #
6
+ # Telekinesis::Consumer::Worker.new(stream: 'my-stream', app: 'tail') do
7
+ # Telekinesis::Consumer::Block.new do |records, checkpointer|
8
+ # records.each {|r| puts r}
9
+ # end
10
+ # end
11
+ class Block < BaseProcessor
12
+ def initialize(&block)
13
+ raise ArgumentError, "No block given" unless block_given?
14
+ @block = block
15
+ end
16
+
17
+ def process_records(records, checkpointer)
18
+ @block.call(records, checkpointer)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,114 @@
1
+ module Telekinesis
2
+ module Consumer
3
+ java_import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
4
+ java_import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration
5
+
6
+ class DistributedConsumer
7
+ # Create a new consumer that consumes data from a Kinesis stream.
8
+ # DistributedConsumers use DynamoDB to register as part of the same
9
+ # application and evenly distribute work between them. See the
10
+ # AWS Docs for more information:
11
+ #
12
+ # http://docs.aws.amazon.com/kinesis/latest/dev/developing-consumer-apps-with-kcl.html
13
+ #
14
+ # DistributedConsumers are configured with a hash. The Kinesis `:stream`
15
+ # to consume from is required.
16
+ #
17
+ # DistribtuedConsumers operate in groups. All consumers with the same
18
+ # `:app` id use dynamo to attempt to distribute work evenly among
19
+ # themselves. The `:worker_id` is used to distinguish individual clients
20
+ # (`:worker_id` defaults to the current hostname. If you plan to run more
21
+ # than one DistributedConsumer in the same `:app` per host, make sure you
22
+ # set this to something unique!).
23
+ #
24
+ # Any other valid KCL Worker `:options` may be passed as a hash.
25
+ #
26
+ # For example, to configure a `tail` app on `some-stream` and use the
27
+ # default `:worker_id`, you might pass the following configuration to your
28
+ # DistributedConsumer.
29
+ #
30
+ # config = {
31
+ # app: 'tail',
32
+ # stream: 'some-stream',
33
+ # options: {initial_position_in_stream: 'TRIM_HORIZON'}
34
+ # }
35
+ #
36
+ # To actually process the stream, a DistribtuedConsumer creates
37
+ # record processors. These are objects that correspond to the KCL's
38
+ # RecordProcessor interface - processors must implement `init`,
39
+ # `process_records`, and `shutdown` methods.
40
+ #
41
+ # http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html#kinesis-record-processor-implementation-interface-java
42
+ #
43
+ # To specify which record processor to create, pass a block to your
44
+ # distribtued consumer that returns a new record processor. This block
45
+ # may (nay, WILL) be called from a background thread so make sure that
46
+ # it's thread-safe.
47
+ #
48
+ # Telekinesis provides a BaseProcessor that implements no-op versions
49
+ # of all of the required methods to make writing quick processors easier
50
+ # and a Block processor that executes the specified block every time
51
+ # `process_records` is called.
52
+ #
53
+ # To write a stream tailer, you might use Block as follows:
54
+ #
55
+ # Telekinesis::Consumer::DistributedConsumer.new(config) do
56
+ # Telekinesis::Consumer::Block do |records, _|
57
+ # records.each {|r| puts r}
58
+ # end
59
+ # end
60
+ #
61
+ def initialize(config, &block)
62
+ raise ArgumentError, "No block given!" unless block_given?
63
+ kcl_config = self.class.build_config(config)
64
+ @under = com.kickstarter.jruby.Telekinesis.new_worker(kcl_config, &block)
65
+ end
66
+
67
+ # Return the underlying KCL worker. It's a java.lang.Runnable.
68
+ def as_runnable
69
+ @under
70
+ end
71
+
72
+ # Start the KCL worker. If background is set to `true`, the worker is
73
+ # started in its own JRuby Thread and the Thread is returned. Otherwise,
74
+ # starts in the current thread and returns nil.
75
+ def run(background = false)
76
+ if background
77
+ Thread.new { @under.run }
78
+ else
79
+ @under.run
80
+ end
81
+ end
82
+
83
+ protected
84
+
85
+ def self.build_config(config)
86
+ creds_hash = config.fetch(:credentials, {})
87
+ credentials_provider = Telekinesis::Aws::JavaClientAdapter.build_credentials_provider(creds_hash)
88
+
89
+ # App and Stream are mandatory.
90
+ app, stream = [:app, :stream].map do |k|
91
+ raise ArgumentError, "#{k} is required" unless config.include?(k)
92
+ config[k]
93
+ end
94
+ # Use this host as the worker_id by default.
95
+ worker_id = config.fetch(:worker_id, `hostname`.chomp)
96
+
97
+ KinesisClientLibConfiguration.new(app, stream, credentials_provider, worker_id).tap do |kcl_config|
98
+ config.fetch(:options, {}).each do |k, v|
99
+ # Handle initial position in stream separately. It's the only option
100
+ # that requires a value conversion.
101
+ if k.to_s == 'initial_position_in_stream'
102
+ kcl_config.with_initial_position_in_stream(InitialPositionInStream.value_of(v))
103
+ else
104
+ setter = "with_#{k}".to_sym
105
+ if kcl_config.respond_to?(setter)
106
+ kcl_config.send(setter, v)
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,3 @@
1
+ require "telekinesis/consumer/distributed_consumer"
2
+ require "telekinesis/consumer/base_processor"
3
+ require "telekinesis/consumer/block"
@@ -0,0 +1,46 @@
1
+ module Telekinesis
2
+ module JavaUtil
3
+ java_import java.util.concurrent.locks.ReentrantReadWriteLock
4
+
5
+ # Sugar around java.util.concurrent.ReentrantReadWriteLock so that it's
6
+ # easy to use with blocks.
7
+ #
8
+ # e.g.
9
+ #
10
+ # lock = ReentrantReadWriteLock.new
11
+ # some_value = 12345
12
+ #
13
+ # # In a reader thread
14
+ # lock.read_lock do
15
+ # # Read some data! This won't block any other calls to read_lock, but will
16
+ # # block if another thread is in a section guarded by write_lock.
17
+ # end
18
+ #
19
+ # # In a writer thread
20
+ # lock.write_lock do
21
+ # # Write some data! This is exclusive with *any* other code guarded by
22
+ # # either read_lock or write_lock.
23
+ # end
24
+ class ReadWriteLock
25
+ def initialize(fair = false)
26
+ lock = ReentrantReadWriteLock.new(fair)
27
+ @read = lock.read_lock
28
+ @write = lock.write_lock
29
+ end
30
+
31
+ def read_lock
32
+ @read.lock_interruptibly
33
+ yield
34
+ ensure
35
+ @read.unlock
36
+ end
37
+
38
+ def write_lock
39
+ @write.lock_interruptibly
40
+ yield
41
+ ensure
42
+ @write.unlock
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,18 @@
1
+ require "logger"
2
+ require "telekinesis/logging/ruby_logger_handler"
3
+
4
+ module Telekinesis
5
+ module Logging
6
+ java_import java.util.logging.Logger
7
+ java_import java.util.logging.LogManager
8
+
9
+ def self.capture_java_logging(logger)
10
+ LogManager.log_manager.reset
11
+ Logger.get_logger("").add_handler(RubyLoggerHandler.create(logger))
12
+ end
13
+
14
+ def self.disable_java_logging
15
+ LogManager.log_manager.reset
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,54 @@
1
+ module Telekinesis
2
+ module Logging
3
+ java_import java.util.logging.Level
4
+ java_import java.util.logging.Handler
5
+
6
+ # A java logging Handler that delegates to a Ruby logger. The name of the
7
+ # j.u.l. logger is used as the progname argument to Logger.add.
8
+ #
9
+ # The translation between j.u.l. serverity levels and Ruby Logger levels
10
+ # isn't exact.
11
+ class RubyLoggerHandler < Handler
12
+ # NOTE: Since this class overrides a Java class, we have to use the Java
13
+ # constructor and set the logger after instantiation. (Overriding in
14
+ # JRuby is weird). Use this method to create a new logger that delegates
15
+ # to the passed logger.
16
+ def self.create(logger)
17
+ new.tap do |s|
18
+ s.set_logger(logger)
19
+ end
20
+ end
21
+
22
+ SEVERITY = {
23
+ # NOTE: There's no Java equivalent of FATAL.
24
+ Level::SEVERE => Logger::ERROR,
25
+ Level::WARNING => Logger::WARN,
26
+ Level::INFO => Logger::INFO,
27
+ Level::CONFIG => Logger::INFO,
28
+ Level::FINE=> Logger::DEBUG,
29
+ Level::FINER=> Logger::DEBUG,
30
+ Level::FINEST=> Logger::DEBUG,
31
+ }
32
+
33
+ def set_logger(l)
34
+ @logger = l
35
+ end
36
+
37
+ def close
38
+ @logger.close
39
+ end
40
+
41
+ # Ruby's logger has no flush method.
42
+ def flush; end
43
+
44
+ def publish(log_record)
45
+ message = if log_record.thrown.nil?
46
+ log_record.message
47
+ else
48
+ "#{log_record.message}: #{log_record.thrown}"
49
+ end
50
+ @logger.add(SEVERITY[log_record.level], message, log_record.logger_name)
51
+ end
52
+ end
53
+ end
54
+ end