telekinesis 2.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +2 -0
  5. data/README.md +401 -0
  6. data/Rakefile +111 -0
  7. data/ext/.gitignore +3 -0
  8. data/ext/pom.xml +63 -0
  9. data/ext/pom.xml.template +65 -0
  10. data/ext/src/main/java/com/kickstarter/jruby/Telekinesis.java +103 -0
  11. data/lib/telekinesis/aws/client_adapter.rb +61 -0
  12. data/lib/telekinesis/aws/java_client_adapter.rb +72 -0
  13. data/lib/telekinesis/aws/ruby_client_adapter.rb +40 -0
  14. data/lib/telekinesis/aws.rb +9 -0
  15. data/lib/telekinesis/consumer/base_processor.rb +12 -0
  16. data/lib/telekinesis/consumer/block.rb +22 -0
  17. data/lib/telekinesis/consumer/distributed_consumer.rb +114 -0
  18. data/lib/telekinesis/consumer.rb +3 -0
  19. data/lib/telekinesis/java_util.rb +46 -0
  20. data/lib/telekinesis/logging/java_logging.rb +18 -0
  21. data/lib/telekinesis/logging/ruby_logger_handler.rb +54 -0
  22. data/lib/telekinesis/producer/async_producer.rb +157 -0
  23. data/lib/telekinesis/producer/async_producer_worker.rb +110 -0
  24. data/lib/telekinesis/producer/noop_failure_handler.rb +12 -0
  25. data/lib/telekinesis/producer/sync_producer.rb +52 -0
  26. data/lib/telekinesis/producer/warn_failure_handler.rb +25 -0
  27. data/lib/telekinesis/producer.rb +4 -0
  28. data/lib/telekinesis/telekinesis-2.0.0.jar +0 -0
  29. data/lib/telekinesis/version.rb +3 -0
  30. data/lib/telekinesis.rb +14 -0
  31. data/telekinesis.gemspec +21 -0
  32. data/test/aws/test_client_adapter.rb +29 -0
  33. data/test/aws/test_java_client_adapter.rb +72 -0
  34. data/test/producer/test_async_producer.rb +158 -0
  35. data/test/producer/test_async_producer_worker.rb +390 -0
  36. data/test/producer/test_helper.rb +1 -0
  37. data/test/producer/test_sync_producer.rb +144 -0
  38. data/test/test_helper.rb +6 -0
  39. metadata +149 -0
@@ -0,0 +1,103 @@
1
+ package com.kickstarter.jruby;
2
+
3
+ import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer;
4
+ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;
5
+ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker;
6
+ import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason;
7
+ import com.amazonaws.services.kinesis.model.Record;
8
+
9
+ import java.util.List;
10
+
11
+ /**
12
+ * A shim that makes it possible to use the Kinesis Client Library from JRuby.
13
+ * Without the shim, {@code initialize} method in
14
+ * {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor}
15
+ * conflicts with the special {@code initialize} method in Ruby. The shim
16
+ * interface renames {@code initialize} to {@code init}.
17
+ * <p />
18
+ *
19
+ * For convenience a {@link #newWorker(KinesisClientLibConfiguration, IRecordProcessorFactory)}
20
+ * method is provided, so you can use closure conversion in JRuby to specify an
21
+ * {@link IRecordProcessorFactory}. For example
22
+ *
23
+ * <p />
24
+ *
25
+ * <pre>
26
+ * some_thing = ...
27
+ *
28
+ * com.kickstarter.jruby.Telekinesis.new_worker(my_config) do
29
+ * MyRecordProcessor.new(some_thing, some_other_thing)
30
+ * end
31
+ * </pre>
32
+ */
33
+ public class Telekinesis {
34
+ /**
35
+ * Create a new KCL {@link Worker} that processes records using the given
36
+ * {@link IRecordProcessorFactory}.
37
+ */
38
+ public static Worker newWorker(final KinesisClientLibConfiguration config, final IRecordProcessorFactory factory) {
39
+ return new Worker(new com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory() {
40
+ @Override
41
+ public com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor createProcessor() {
42
+ return new RecordProcessorShim(factory.createProcessor());
43
+ }
44
+ }, config);
45
+ }
46
+
47
+ // ========================================================================
48
+ /**
49
+ * A shim that wraps a {@link IRecordProcessor} so it can get used by the KCL.
50
+ */
51
+ private static class RecordProcessorShim implements com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor {
52
+ private final IRecordProcessor underlying;
53
+
54
+ public RecordProcessorShim(final IRecordProcessor underlying) { this.underlying = underlying; }
55
+
56
+ @Override
57
+ public void initialize(final String shardId) {
58
+ underlying.init(shardId);
59
+ }
60
+
61
+ @Override
62
+ public void processRecords(final List<Record> records, final IRecordProcessorCheckpointer checkpointer) {
63
+ underlying.processRecords(records, checkpointer);
64
+ }
65
+
66
+ @Override
67
+ public void shutdown(final IRecordProcessorCheckpointer checkpointer, final ShutdownReason reason) {
68
+ underlying.shutdown(checkpointer, reason);
69
+ }
70
+ }
71
+
72
+ /**
73
+ * A parallel {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor}
74
+ * that avoids naming conflicts with reserved words in Ruby.
75
+ */
76
+ public static interface IRecordProcessor {
77
+ /**
78
+ * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor#initialize(String)
79
+ */
80
+ void init(final String shardId);
81
+
82
+ /**
83
+ * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor#processRecords(List, IRecordProcessorCheckpointer)
84
+ */
85
+ void processRecords(List<Record> records, IRecordProcessorCheckpointer checkpointer);
86
+
87
+ /**
88
+ * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor#shutdown(IRecordProcessorCheckpointer, ShutdownReason)
89
+ */
90
+ void shutdown(IRecordProcessorCheckpointer checkpointer, ShutdownReason reason);
91
+ }
92
+
93
+ /**
94
+ * A parallel {@link com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory}
95
+ * for {@link IRecordProcessor}.
96
+ */
97
+ public static interface IRecordProcessorFactory {
98
+ /**
99
+ * @see com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory#createProcessor()
100
+ */
101
+ IRecordProcessor createProcessor();
102
+ }
103
+ }
@@ -0,0 +1,61 @@
1
+ module Telekinesis
2
+ module Aws
3
+ # NOTE: wrapping the cause is necessary since JRuby isn't 2.1 compatible (yet)
4
+ class KinesisError < RuntimeError
5
+ attr_reader :cause
6
+
7
+ def initialize(cause)
8
+ @cause = cause
9
+ end
10
+ end
11
+
12
+ # Base class for other ClientAdapters. Client adapters exist to make
13
+ # switching between platforms easy and painless.
14
+ #
15
+ # The base adapter defines the interface and provides convience methods.
16
+ class ClientAdapter
17
+ # Build a new client given AWS credentials.
18
+ #
19
+ # Credentials must be supplied as a hash that contains symbolized
20
+ # :access_key_id and :secret_access_key keys.
21
+ def self.build(credentials)
22
+ raise NotImplementedError
23
+ end
24
+
25
+ def initialize(client)
26
+ @client = client
27
+ end
28
+
29
+ # Make a put_record call to the underlying client. Must return an object
30
+ # that responds to `shard_id` and `sequence_number`.
31
+ def put_record(stream, key, value)
32
+ raise NotImplementedError
33
+ end
34
+
35
+ # Make a put_records call to the underlying client. If the request
36
+ # succeeds but returns errors for some records, the original [key, value]
37
+ # pair is zipped with the [error_code, error_message] pair and the
38
+ # offending records are returned.
39
+ def put_records(stream, items)
40
+ response = do_put_records(stream, items)
41
+ failures = items.zip(response).reject{|_, r| r.error_code.nil?}
42
+
43
+ failures.map do |(k, v), r|
44
+ [k, v, r.error_code, r.error_message]
45
+ end
46
+ end
47
+
48
+ protected
49
+
50
+ # Put an enumerable of [key, value] pairs to the given stream. Returns an
51
+ # enumerable of response objects the same size as the given list of items.
52
+ #
53
+ # Response objects must respond to `error_code` and `error_message`. Any
54
+ # response with a nil error_code is considered successful.
55
+ def do_put_records(stream, items)
56
+ raise NotImplementedError
57
+ end
58
+ end
59
+ end
60
+ end
61
+
@@ -0,0 +1,72 @@
1
+ module Telekinesis
2
+ module Aws
3
+ java_import java.nio.ByteBuffer
4
+ java_import com.amazonaws.AmazonClientException
5
+ java_import com.amazonaws.auth.DefaultAWSCredentialsProviderChain
6
+ java_import com.amazonaws.services.kinesis.AmazonKinesisClient
7
+ java_import com.amazonaws.services.kinesis.model.PutRecordRequest
8
+ java_import com.amazonaws.services.kinesis.model.PutRecordsRequest
9
+ java_import com.amazonaws.services.kinesis.model.PutRecordsRequestEntry
10
+
11
+ # A ClientAdapter that wraps the AWS Java SDK.
12
+ #
13
+ # Since the underlying Java client is thread safe, this adapter is thread
14
+ # safe.
15
+ class JavaClientAdapter < ClientAdapter
16
+ # Build a new client adapter. `credentials` is a hash keyed with
17
+ # `:access_key_id` and `:secret_access_key`. If this hash is left blank
18
+ # (the default) the client uses the DefaultAWSCredentialsProviderChain to
19
+ # look for credentials.
20
+ def self.build(credentials = {})
21
+ client = AmazonKinesisClient.new(build_credentials_provider(credentials))
22
+ new(client)
23
+ end
24
+
25
+ def self.build_credentials_provider(credentials)
26
+ if credentials.empty?
27
+ DefaultAWSCredentialsProviderChain.new
28
+ else
29
+ StaticCredentialsProvider.new(
30
+ BasicAWSCredentials.new(
31
+ credentials[:access_key_id],
32
+ credentials[:secret_access_key]
33
+ )
34
+ )
35
+ end
36
+ end
37
+
38
+ def put_record(stream, key, value)
39
+ r = PutRecordRequest.new.tap do |request|
40
+ request.stream_name = stream
41
+ request.partition_key = key.to_s
42
+ request.data = ByteBuffer.wrap(value.to_s.to_java_bytes)
43
+ end
44
+ @client.put_record(r)
45
+ rescue AmazonClientException => e
46
+ raise KinesisError.new(e)
47
+ end
48
+
49
+ protected
50
+
51
+ def do_put_records(stream, items)
52
+ result = @client.put_records(build_put_records_request(stream, items))
53
+ result.records
54
+ rescue AmazonClientException => e
55
+ raise KinesisError.new(e)
56
+ end
57
+
58
+ def build_put_records_request(stream, items)
59
+ entries = items.map do |key, value|
60
+ PutRecordsRequestEntry.new.tap do |entry|
61
+ entry.partition_key = key.to_s
62
+ entry.data = ByteBuffer.wrap(value.to_s.to_java_bytes)
63
+ end
64
+ end
65
+ PutRecordsRequest.new.tap do |request|
66
+ request.stream_name = stream
67
+ request.records = entries
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,40 @@
1
+ module Telekinesis
2
+ module Aws
3
+ # A ClientAdapter that wraps the ruby aws-sdk gem (version 2).
4
+ #
5
+ # Since the aws-sdk gem does not appear to be thread-safe, this adapter
6
+ # should not be considered thread safe.
7
+ class RubyClientAdapter < ClientAdapter
8
+ # Build a new client adapter. Credentials are passed directly to the
9
+ # constructor for Aws::Kinesis::Client.
10
+ #
11
+ # See: http://docs.aws.amazon.com/sdkforruby/api/Aws/Kinesis/Client.html
12
+ def self.build(credentials)
13
+ new(Aws::Kinesis::Client.new(credentials))
14
+ end
15
+
16
+ def put_record(stream, key, value)
17
+ @client.put_record(stream: stream, partition_key: key, data: value)
18
+ rescue Aws::Errors::ServiceError => e
19
+ raise KinesisError.new(e)
20
+ end
21
+
22
+ protected
23
+
24
+ def do_put_records(stream, items)
25
+ @client.put_records(build_put_records_request(stream, items)).flat_map do |page|
26
+ page.records
27
+ end
28
+ rescue Aws::Errors::ServiceError => e
29
+ raise KinesisError.new(e)
30
+ end
31
+
32
+ def build_put_records_request(stream, items)
33
+ {
34
+ stream: stream,
35
+ records: items.map{|k, v| {partition_key: k, data: v}}
36
+ }
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,9 @@
1
+ require "telekinesis/aws/client_adapter.rb"
2
+ require "telekinesis/aws/java_client_adapter"
3
+
4
+ module Telekinesis
5
+ module Aws
6
+ KINESIS_MAX_PUT_RECORDS_SIZE = 500
7
+ Client = JavaClientAdapter
8
+ end
9
+ end
@@ -0,0 +1,12 @@
1
+ module Telekinesis
2
+ module Consumer
3
+ # A RecordProcessor with no-op implementations of all of the required
4
+ # IRecordProcessor methods. Override it to implement simple IRecordProcessors
5
+ # that don't need to do anything special on init or shutdown.
6
+ class BaseProcessor
7
+ def init(shard_id); end
8
+ def process_records(records, checkpointer); end
9
+ def shutdown(checkpointer, reason); end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,22 @@
1
+ module Telekinesis
2
+ module Consumer
3
+ # A RecordProcessor that uses the given block to process records. Useful to
4
+ # quickly define a consumer.
5
+ #
6
+ # Telekinesis::Consumer::Worker.new(stream: 'my-stream', app: 'tail') do
7
+ # Telekinesis::Consumer::Block.new do |records, checkpointer|
8
+ # records.each {|r| puts r}
9
+ # end
10
+ # end
11
+ class Block < BaseProcessor
12
+ def initialize(&block)
13
+ raise ArgumentError, "No block given" unless block_given?
14
+ @block = block
15
+ end
16
+
17
+ def process_records(records, checkpointer)
18
+ @block.call(records, checkpointer)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,114 @@
1
+ module Telekinesis
2
+ module Consumer
3
+ java_import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
4
+ java_import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration
5
+
6
+ class DistributedConsumer
7
+ # Create a new consumer that consumes data from a Kinesis stream.
8
+ # DistributedConsumers use DynamoDB to register as part of the same
9
+ # application and evenly distribute work between them. See the
10
+ # AWS Docs for more information:
11
+ #
12
+ # http://docs.aws.amazon.com/kinesis/latest/dev/developing-consumer-apps-with-kcl.html
13
+ #
14
+ # DistributedConsumers are configured with a hash. The Kinesis `:stream`
15
+ # to consume from is required.
16
+ #
17
+ # DistribtuedConsumers operate in groups. All consumers with the same
18
+ # `:app` id use dynamo to attempt to distribute work evenly among
19
+ # themselves. The `:worker_id` is used to distinguish individual clients
20
+ # (`:worker_id` defaults to the current hostname. If you plan to run more
21
+ # than one DistributedConsumer in the same `:app` per host, make sure you
22
+ # set this to something unique!).
23
+ #
24
+ # Any other valid KCL Worker `:options` may be passed as a hash.
25
+ #
26
+ # For example, to configure a `tail` app on `some-stream` and use the
27
+ # default `:worker_id`, you might pass the following configuration to your
28
+ # DistributedConsumer.
29
+ #
30
+ # config = {
31
+ # app: 'tail',
32
+ # stream: 'some-stream',
33
+ # options: {initial_position_in_stream: 'TRIM_HORIZON'}
34
+ # }
35
+ #
36
+ # To actually process the stream, a DistribtuedConsumer creates
37
+ # record processors. These are objects that correspond to the KCL's
38
+ # RecordProcessor interface - processors must implement `init`,
39
+ # `process_records`, and `shutdown` methods.
40
+ #
41
+ # http://docs.aws.amazon.com/kinesis/latest/dev/kinesis-record-processor-implementation-app-java.html#kinesis-record-processor-implementation-interface-java
42
+ #
43
+ # To specify which record processor to create, pass a block to your
44
+ # distribtued consumer that returns a new record processor. This block
45
+ # may (nay, WILL) be called from a background thread so make sure that
46
+ # it's thread-safe.
47
+ #
48
+ # Telekinesis provides a BaseProcessor that implements no-op versions
49
+ # of all of the required methods to make writing quick processors easier
50
+ # and a Block processor that executes the specified block every time
51
+ # `process_records` is called.
52
+ #
53
+ # To write a stream tailer, you might use Block as follows:
54
+ #
55
+ # Telekinesis::Consumer::DistributedConsumer.new(config) do
56
+ # Telekinesis::Consumer::Block do |records, _|
57
+ # records.each {|r| puts r}
58
+ # end
59
+ # end
60
+ #
61
+ def initialize(config, &block)
62
+ raise ArgumentError, "No block given!" unless block_given?
63
+ kcl_config = self.class.build_config(config)
64
+ @under = com.kickstarter.jruby.Telekinesis.new_worker(kcl_config, &block)
65
+ end
66
+
67
+ # Return the underlying KCL worker. It's a java.lang.Runnable.
68
+ def as_runnable
69
+ @under
70
+ end
71
+
72
+ # Start the KCL worker. If background is set to `true`, the worker is
73
+ # started in its own JRuby Thread and the Thread is returned. Otherwise,
74
+ # starts in the current thread and returns nil.
75
+ def run(background = false)
76
+ if background
77
+ Thread.new { @under.run }
78
+ else
79
+ @under.run
80
+ end
81
+ end
82
+
83
+ protected
84
+
85
+ def self.build_config(config)
86
+ creds_hash = config.fetch(:credentials, {})
87
+ credentials_provider = Telekinesis::Aws::JavaClientAdapter.build_credentials_provider(creds_hash)
88
+
89
+ # App and Stream are mandatory.
90
+ app, stream = [:app, :stream].map do |k|
91
+ raise ArgumentError, "#{k} is required" unless config.include?(k)
92
+ config[k]
93
+ end
94
+ # Use this host as the worker_id by default.
95
+ worker_id = config.fetch(:worker_id, `hostname`.chomp)
96
+
97
+ KinesisClientLibConfiguration.new(app, stream, credentials_provider, worker_id).tap do |kcl_config|
98
+ config.fetch(:options, {}).each do |k, v|
99
+ # Handle initial position in stream separately. It's the only option
100
+ # that requires a value conversion.
101
+ if k.to_s == 'initial_position_in_stream'
102
+ kcl_config.with_initial_position_in_stream(InitialPositionInStream.value_of(v))
103
+ else
104
+ setter = "with_#{k}".to_sym
105
+ if kcl_config.respond_to?(setter)
106
+ kcl_config.send(setter, v)
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,3 @@
1
+ require "telekinesis/consumer/distributed_consumer"
2
+ require "telekinesis/consumer/base_processor"
3
+ require "telekinesis/consumer/block"
@@ -0,0 +1,46 @@
1
+ module Telekinesis
2
+ module JavaUtil
3
+ java_import java.util.concurrent.locks.ReentrantReadWriteLock
4
+
5
+ # Sugar around java.util.concurrent.ReentrantReadWriteLock so that it's
6
+ # easy to use with blocks.
7
+ #
8
+ # e.g.
9
+ #
10
+ # lock = ReentrantReadWriteLock.new
11
+ # some_value = 12345
12
+ #
13
+ # # In a reader thread
14
+ # lock.read_lock do
15
+ # # Read some data! This won't block any other calls to read_lock, but will
16
+ # # block if another thread is in a section guarded by write_lock.
17
+ # end
18
+ #
19
+ # # In a writer thread
20
+ # lock.write_lock do
21
+ # # Write some data! This is exclusive with *any* other code guarded by
22
+ # # either read_lock or write_lock.
23
+ # end
24
+ class ReadWriteLock
25
+ def initialize(fair = false)
26
+ lock = ReentrantReadWriteLock.new(fair)
27
+ @read = lock.read_lock
28
+ @write = lock.write_lock
29
+ end
30
+
31
+ def read_lock
32
+ @read.lock_interruptibly
33
+ yield
34
+ ensure
35
+ @read.unlock
36
+ end
37
+
38
+ def write_lock
39
+ @write.lock_interruptibly
40
+ yield
41
+ ensure
42
+ @write.unlock
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,18 @@
1
+ require "logger"
2
+ require "telekinesis/logging/ruby_logger_handler"
3
+
4
+ module Telekinesis
5
+ module Logging
6
+ java_import java.util.logging.Logger
7
+ java_import java.util.logging.LogManager
8
+
9
+ def self.capture_java_logging(logger)
10
+ LogManager.log_manager.reset
11
+ Logger.get_logger("").add_handler(RubyLoggerHandler.create(logger))
12
+ end
13
+
14
+ def self.disable_java_logging
15
+ LogManager.log_manager.reset
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,54 @@
1
+ module Telekinesis
2
+ module Logging
3
+ java_import java.util.logging.Level
4
+ java_import java.util.logging.Handler
5
+
6
+ # A java logging Handler that delegates to a Ruby logger. The name of the
7
+ # j.u.l. logger is used as the progname argument to Logger.add.
8
+ #
9
+ # The translation between j.u.l. serverity levels and Ruby Logger levels
10
+ # isn't exact.
11
+ class RubyLoggerHandler < Handler
12
+ # NOTE: Since this class overrides a Java class, we have to use the Java
13
+ # constructor and set the logger after instantiation. (Overriding in
14
+ # JRuby is weird). Use this method to create a new logger that delegates
15
+ # to the passed logger.
16
+ def self.create(logger)
17
+ new.tap do |s|
18
+ s.set_logger(logger)
19
+ end
20
+ end
21
+
22
+ SEVERITY = {
23
+ # NOTE: There's no Java equivalent of FATAL.
24
+ Level::SEVERE => Logger::ERROR,
25
+ Level::WARNING => Logger::WARN,
26
+ Level::INFO => Logger::INFO,
27
+ Level::CONFIG => Logger::INFO,
28
+ Level::FINE=> Logger::DEBUG,
29
+ Level::FINER=> Logger::DEBUG,
30
+ Level::FINEST=> Logger::DEBUG,
31
+ }
32
+
33
+ def set_logger(l)
34
+ @logger = l
35
+ end
36
+
37
+ def close
38
+ @logger.close
39
+ end
40
+
41
+ # Ruby's logger has no flush method.
42
+ def flush; end
43
+
44
+ def publish(log_record)
45
+ message = if log_record.thrown.nil?
46
+ log_record.message
47
+ else
48
+ "#{log_record.message}: #{log_record.thrown}"
49
+ end
50
+ @logger.add(SEVERITY[log_record.level], message, log_record.logger_name)
51
+ end
52
+ end
53
+ end
54
+ end