rails-pipeline 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +227 -0
  4. data/Rakefile +27 -0
  5. data/bin/pipeline +138 -0
  6. data/bin/redis-to-ironmq.rb +20 -0
  7. data/lib/rails-pipeline.rb +34 -0
  8. data/lib/rails-pipeline/emitter.rb +121 -0
  9. data/lib/rails-pipeline/handlers/activerecord_crud.rb +35 -0
  10. data/lib/rails-pipeline/handlers/base_handler.rb +19 -0
  11. data/lib/rails-pipeline/handlers/logger.rb +13 -0
  12. data/lib/rails-pipeline/ironmq_publisher.rb +37 -0
  13. data/lib/rails-pipeline/ironmq_pulling_subscriber.rb +96 -0
  14. data/lib/rails-pipeline/ironmq_subscriber.rb +21 -0
  15. data/lib/rails-pipeline/pipeline_version.rb +40 -0
  16. data/lib/rails-pipeline/protobuf/encrypted_message.pb.rb +37 -0
  17. data/lib/rails-pipeline/protobuf/encrypted_message.proto +18 -0
  18. data/lib/rails-pipeline/redis_forwarder.rb +207 -0
  19. data/lib/rails-pipeline/redis_ironmq_forwarder.rb +12 -0
  20. data/lib/rails-pipeline/redis_publisher.rb +71 -0
  21. data/lib/rails-pipeline/sns_publisher.rb +62 -0
  22. data/lib/rails-pipeline/subscriber.rb +185 -0
  23. data/lib/rails-pipeline/symmetric_encryptor.rb +127 -0
  24. data/lib/rails-pipeline/version.rb +3 -0
  25. data/lib/tasks/rails-pipeline_tasks.rake +4 -0
  26. data/spec/emitter_spec.rb +141 -0
  27. data/spec/handlers/activerecord_crud_spec.rb +100 -0
  28. data/spec/handlers/logger_spec.rb +42 -0
  29. data/spec/ironmp_pulling_subscriber_spec.rb +98 -0
  30. data/spec/ironmq_publisher_spec.rb +37 -0
  31. data/spec/pipeline_version_spec.rb +35 -0
  32. data/spec/redis_forwarder_spec.rb +99 -0
  33. data/spec/redis_publisher_spec.rb +36 -0
  34. data/spec/sns_publisher_spec.rb +28 -0
  35. data/spec/subscriber_spec.rb +278 -0
  36. data/spec/symmetric_encryptor_spec.rb +21 -0
  37. metadata +175 -0
@@ -0,0 +1,207 @@
1
+
2
+ require 'redis'
3
+ require 'active_support/core_ext'
4
+ require 'rails-pipeline/protobuf/encrypted_message.pb'
5
+
6
+ # Pipeline forwarder base class that
7
+ # - reads from redis queue using BRPOPLPUSH for reliable queue pattern
8
+ # - keeps track of failed tasks in the in_progress queue
9
+ # - designed to be used with e.g. IronmqPublisher
10
+
11
+ $redis = ENV["REDISCLOUD_URL"] || ENV["REDISTOGO_URL"] || "localhost:6379"
12
+
13
+ module RailsPipeline
14
+ class RedisForwarder
15
+ if RailsPipeline::HAS_NEWRELIC
16
+ include ::NewRelic::Agent::MethodTracer
17
+ end
18
+
19
+ def initialize(key)
20
+ _trap_signals
21
+ @redis = nil
22
+ @stop = false
23
+ @queue = key
24
+ @in_progress_queue = _in_progress_queue
25
+
26
+ @processed = 0
27
+ @blocking_timeout = 2
28
+ @failure_check_interval = 30
29
+ @message_processing_limit = 10 # number of seconds before a message is considered failed
30
+ @failure_last_checked = Time.now - @failure_check_interval.seconds # TODO: randomize start time?
31
+ end
32
+
33
+ def _trap_signals
34
+ trap('SIGTERM') do
35
+ puts 'Exiting (SIGTERM)'
36
+ stop
37
+ end
38
+ trap('SIGINT') do
39
+ puts 'Exiting (SIGINT)'
40
+ stop
41
+ end
42
+ end
43
+
44
+
45
+ # Blocking right pop from the queue
46
+ # - use BRPOPLPUSH to tenporarily mark the message as "in progress"
47
+ # - delete from the in_prgress queue on success
48
+ # - restore to the main queue on failure
49
+ def process_queue
50
+ # pop from the queue and push onto the in_progress queue
51
+ data = _redis.brpoplpush(@queue, @in_progress_queue, timeout: @blocking_timeout)
52
+ if data.nil? # Timed-out with nothing to process
53
+ return
54
+ end
55
+
56
+ begin
57
+ encrypted_data = RailsPipeline::EncryptedMessage.parse(data)
58
+ RailsPipeline.logger.debug "Processing #{encrypted_data.uuid}"
59
+
60
+ # re-publish to wherever (e.g. IronMQ)
61
+ topic_name = encrypted_data.topic
62
+ if topic_name.nil?
63
+ RailsPipeline.logger.error "Damaged message, no topic name"
64
+ return
65
+ end
66
+
67
+ publish(topic_name, data)
68
+ @processed += 1
69
+
70
+ # Now remove this message from the in_progress queue
71
+ removed = _redis.lrem(@in_progress_queue, 1, data)
72
+ if removed != 1
73
+ RailsPipeline.logger.warn "OHNO! Didn't remove the data I was expecting to: #{data}"
74
+ end
75
+ rescue Exception => e
76
+ RailsPipeline.logger.info e
77
+ RailsPipeline.logger.info e.backtrace.join("\n")
78
+ if !data.nil?
79
+ RailsPipeline.logger.info "Putting message #{encrypted_data.uuid} back on main queue"
80
+ _put_back_on_queue(data)
81
+ end
82
+ end
83
+ end
84
+ add_method_tracer :process_queue, "Pipeline/RedisForwarder/process_queue" if RailsPipeline::HAS_NEWRELIC
85
+
86
+ # note in redis that we are processing this message
87
+ def report(uuid)
88
+ _redis.setex(_report_key(uuid), @message_processing_limit, _client_id)
89
+ end
90
+
91
+ # Search the in-progress queue for messages that are likely to be abandoned
92
+ # and re-queue them on the main queue
93
+ def check_for_failures
94
+ # Lock in_progress queue or return
95
+ num_in_progress = _redis.llen(@in_progress_queue)
96
+ if num_in_progress == 0
97
+ RailsPipeline.logger.debug "No messages in progress, skipping check for failures"
98
+ return
99
+ end
100
+
101
+ RailsPipeline.logger.debug "Locking '#{@in_progress_queue}' for #{num_in_progress} seconds"
102
+
103
+ # Attempt to lock this queue for the next num_in_progress seconds
104
+ lock_key = "#{@in_progress_queue}__lock"
105
+ locked = _redis.set(lock_key, _client_id, ex: num_in_progress, nx: true)
106
+ if !locked
107
+ RailsPipeline.logger.debug "in progress queue is locked"
108
+ return
109
+ end
110
+
111
+ # Go through each message, see if there's a 'report' entry. If not,
112
+ # requeue!
113
+ in_progress = _redis.lrange(@in_progress_queue, 0, num_in_progress)
114
+ in_progress.each do |message|
115
+ enc_message = EncryptedMessage.parse(message)
116
+ owner = _redis.get(_report_key(enc_message.uuid))
117
+ if owner.nil?
118
+ RailsPipeline.logger.info "Putting timed-out message #{enc_message.uuid} back on main queue"
119
+ _put_back_on_queue(message)
120
+ else
121
+ RailsPipeline.logger.debug "Message #{uuid} is owned by #{owner}"
122
+ end
123
+ end
124
+ end
125
+ add_method_tracer :check_for_failures, "Pipeline/RedisForwarder/check_for_failures" if RailsPipeline::HAS_NEWRELIC
126
+
127
+ # Function that runs in the loop
128
+ def run
129
+ process_queue
130
+ RailsPipeline.logger.info "Queue: '#{@queue}'. Processed: #{@processed}"
131
+ if Time.now - @failure_last_checked > @failure_check_interval
132
+ @failure_last_checked = Time.now
133
+ check_for_failures
134
+ end
135
+ end
136
+
137
+ # Main loop
138
+ def start
139
+ while true
140
+ begin
141
+ if @stop
142
+ RailsPipeline.logger.info "Finished"
143
+ if RailsPipeline::HAS_NEWRELIC
144
+ RailsPipeline.logger.info "Shutting down NewRelic"
145
+ ::NewRelic::Agent.shutdown
146
+ end
147
+ break
148
+ end
149
+ run
150
+ rescue Exception => e
151
+ RailsPipeline.logger.info e
152
+ RailsPipeline.logger.info e.backtrace.join("\n")
153
+ end
154
+ end
155
+ end
156
+
157
+ def stop
158
+ puts "stopping..."
159
+ @stop = true
160
+ end
161
+
162
+ def _redis
163
+ if !@redis.nil?
164
+ return @redis
165
+ end
166
+ if $redis.start_with?("redis://")
167
+ @redis = Redis.new(url: $redis)
168
+ else
169
+ host, port = $redis.split(":")
170
+ @redis = Redis.new(host: host, port: port)
171
+ end
172
+ return @redis
173
+ end
174
+
175
+ def _processed
176
+ return @processed
177
+ end
178
+
179
+ def _in_progress_queue
180
+ "#{@queue}_in_progress"
181
+ end
182
+
183
+ # The redis key at which we 'claim' the message when we start processing it.
184
+ def _report_key(uuid)
185
+ "#{@queue}__#{uuid}"
186
+ end
187
+
188
+ def _client_id
189
+ self.class.name
190
+ end
191
+
192
+ # Atomically remove a message from the in_progress queue and put it back on
193
+ # the main queue
194
+ def _put_back_on_queue(message)
195
+ future = nil
196
+ _redis.multi do
197
+ _redis.rpush(@queue, message)
198
+ future = _redis.lrem(@in_progress_queue, 1, message)
199
+ end
200
+ removed = future.value
201
+ if removed !=1
202
+ RailsPipeline.logger.error "ERROR: Didn't remove message from in_progress queue?!!!"
203
+ end
204
+ end
205
+
206
+ end
207
+ end
@@ -0,0 +1,12 @@
1
+
2
+ require 'rails-pipeline/redis_forwarder'
3
+ require 'rails-pipeline/ironmq_publisher'
4
+
5
+ # Mix-in the IronMQ publisher into a RedisForwarder to create a
6
+ # class that will forward redis messages onto IronMQ
7
+
8
+ module RailsPipeline
9
+ class RedisIronmqForwarder < RedisForwarder
10
+ include IronmqPublisher
11
+ end
12
+ end
@@ -0,0 +1,71 @@
1
+
2
+ require 'redis'
3
+
4
+ # Backend for data pipeline that publishes to redis queue
5
+ # (typically for consumption by a log sender)
6
+ #
7
+ # Typically initialized in rails initialzer e.g.
8
+ # RailsPipeline::RedisPublisher.redis = Redis.new(ENV["REDIS_URL"])
9
+ # RailsPipeline::RedisPublisher.namespace = "my-app-pipeline"
10
+
11
+ module RailsPipeline
12
+ module RedisPublisher
13
+ class << self
14
+ # Allow configuration via initializer
15
+ @@redis = nil
16
+ @@namespace = "pipeline" # default redis queue name
17
+ attr_accessor :namespace
18
+ def _redis
19
+ if @@redis.nil?
20
+ if $redis.start_with?("redis://")
21
+ @@redis = Redis.new(url: $redis)
22
+ else
23
+ host, port = $redis.split(":")
24
+ @@redis = Redis.new(host: host, port: port)
25
+ end
26
+ end
27
+ @@redis
28
+ end
29
+ def redis=(redis)
30
+ @@redis = redis
31
+ end
32
+ def namespace=(namespace)
33
+ @@namespace = namespace
34
+ end
35
+ def namespace
36
+ @@namespace
37
+ end
38
+
39
+
40
+ end
41
+
42
+ def self.included(base)
43
+ base.extend ClassMethods
44
+ base.send :include, InstanceMethods
45
+ end
46
+
47
+ module InstanceMethods
48
+ def publish(topic_name, data)
49
+ t0 = Time.now
50
+ _redis.lpush(_key, data)
51
+ t1 = Time.now
52
+ ::NewRelic::Agent.record_metric('Pipeline/Redis/publish', t1-t0) if RailsPipeline::HAS_NEWRELIC
53
+ RailsPipeline.logger.debug "Publishing to redis '#{topic_name}' took #{t1-t0}s"
54
+ end
55
+
56
+ def _redis
57
+ RedisPublisher._redis
58
+ end
59
+
60
+ def _key
61
+ RedisPublisher.namespace
62
+ end
63
+
64
+ end
65
+
66
+ module ClassMethods
67
+
68
+ end
69
+ end
70
+
71
+ end
@@ -0,0 +1,62 @@
1
+
2
+
3
+ require 'aws-sdk'
4
+
5
+ # Backend for data pipeline that publishes to Amazon Simple Notification
6
+ # Service (SNS).
7
+ #
8
+ # Configure via an initializer like:
9
+ # PipelineSnsEmitter.account_id = "6982739827398"
10
+
11
+ module RailsPipeline::SnsPublisher
12
+ class << self
13
+ # Allow configuration via initializer
14
+ @@account_id = nil
15
+ def account_id
16
+ @@account_id
17
+ end
18
+ def account_id=(account_id)
19
+ @@account_id = account_id
20
+ end
21
+ end
22
+
23
+ def self.included(base)
24
+ base.send :include, InstanceMethods
25
+ base.extend ClassMethods
26
+ end
27
+
28
+ module InstanceMethods
29
+ def publish(topic_name, data)
30
+ t0 = Time.now
31
+ topic = _sns.topics[_topic_arn(topic_name)]
32
+ topic.publish(data, subject: _subject, sqs: data)
33
+ t1 = Time.now
34
+ RailsPipeline.logger.debug "Published to SNS '#{topic_name}' in #{t1-t0}s"
35
+ end
36
+
37
+ def _sns
38
+ @sns = AWS::SNS.new if @sns.nil?
39
+ return @sns
40
+ end
41
+
42
+ def _topic_arn(topic_name, region="us-east-1")
43
+ "arn:aws:sns:#{region}:#{_account_id}:#{topic_name}"
44
+ end
45
+
46
+ # Subject of SNS message is ClassName-id
47
+ def _subject
48
+ "#{self.class.name}-#{self.id}"
49
+ end
50
+
51
+ def _account_id
52
+ if ENV.has_key?("AWS_ACCOUNT_ID")
53
+ return ENV["AWS_ACCOUNT_ID"]
54
+ end
55
+ return RailsPipeline::SnsPublisher.account_id
56
+ end
57
+ end
58
+
59
+ module ClassMethods
60
+ end
61
+
62
+ end
@@ -0,0 +1,185 @@
1
+ require "rails-pipeline/symmetric_encryptor"
2
+
3
+ module RailsPipeline
4
+
5
+ module Subscriber
6
+
7
+ Error = Class.new(StandardError)
8
+ NoApiKeyError = Class.new(Error)
9
+ WrongApiKeyError = Class.new(Error)
10
+
11
+ class << self
12
+
13
+ @@registered_models = {}
14
+ @@registered_handlers = {}
15
+
16
+ def register(payload_class, target_class, handler = nil)
17
+ @@registered_models[payload_class] = target_class
18
+ @@registered_handlers[payload_class] = handler
19
+ end
20
+
21
+ def target_class(payload_class)
22
+ @@registered_models[payload_class]
23
+ end
24
+
25
+ def target_handler(payload_class)
26
+ @@registered_handlers[payload_class]
27
+ end
28
+
29
+ def registered_handlers
30
+ @@registered_handlers
31
+ end
32
+ end
33
+
34
+
35
+ def self.included(base)
36
+ RailsPipeline::SymmetricEncryptor.included(base)
37
+ base.send :include, InstanceMethods
38
+ base.extend ClassMethods
39
+ if RailsPipeline::HAS_NEWRELIC
40
+ base.send :include, ::NewRelic::Agent::Instrumentation::ControllerInstrumentation
41
+ base.extend ::NewRelic::Agent::Instrumentation::ControllerInstrumentation::ClassMethods
42
+ base.add_transaction_tracer :handle_envelope, category: :task
43
+ base.add_transaction_tracer :handle_payload, category: :task
44
+ end
45
+ end
46
+
47
+ module InstanceMethods
48
+
49
+ # Take an EncryptedMessage envelope, and decrypt the cipher text, then
50
+ # get the protobuf object out of it
51
+ def handle_envelope(envelope)
52
+ if ENV.has_key?("DISABLE_RAILS_PIPELINE") || ENV.has_key?("DISABLE_RAILS_PIPELINE_PROCESSING")
53
+ RailsPipeline.logger.debug "Skipping incoming pipeline messages (disabled by env vars)"
54
+ return
55
+ end
56
+ verify_api_key(envelope)
57
+ payload_str = self.class.decrypt(envelope)
58
+
59
+ # Find the registered minor version & its related handler to parse and
60
+ # process this message.
61
+ clazz = registered_class_on_same_major_version(envelope.type_info)
62
+
63
+ if clazz.nil?
64
+ # No compatible version of this message is registered for this app.
65
+ RailsPipeline.logger.info "Dropping unclaimed message #{envelope.type_info} (no compatible version registered)."
66
+ return
67
+ end
68
+
69
+ # Parse and handle the payload.
70
+ payload = clazz.parse(payload_str)
71
+ handle_payload(payload, envelope)
72
+ end
73
+
74
+ # Take a protobuf object (payload) and forward it to the appropriate
75
+ # handler/method/proc
76
+ def handle_payload(payload, envelope)
77
+ version = _version(payload)
78
+ clazz = target_class(payload)
79
+ handler_class = target_handler(payload)
80
+ event_type = envelope.event_type
81
+ method = most_suitable_handler_method_name(version, clazz)
82
+
83
+ if clazz.is_a?(Class)
84
+ if handler_class
85
+ # If a built in handler_class is registered, then just use it
86
+ handler_class.new(payload, target_class: clazz, envelope: envelope).handle_payload
87
+ elsif method
88
+ # Target class had a from_pipeline method, so just call it and move on
89
+ target = clazz.send(method, payload, event_type)
90
+ else
91
+ RailsPipeline.logger.info "No handler set, dropping message #{payload.class.name}"
92
+ end
93
+ return target
94
+ elsif clazz.is_a?(Proc)
95
+ return clazz.call(payload)
96
+ end
97
+ end
98
+
99
+ def registered_class_on_same_major_version(payload_class_name)
100
+ if Subscriber.registered_handlers.has_key?(payload_class_name)
101
+ # The version we've been given has been registered. Return that.
102
+ return Object.const_get(payload_class_name)
103
+ end
104
+
105
+ # Lops off the minor version from the payload class name so
106
+ # we can find the registered message type with the same major version.
107
+ class_name_with_major_version =
108
+ payload_class_name
109
+ .split("_", 3)[0, 2]
110
+ .join("_")
111
+
112
+ # Look for message types with the same major version.
113
+ available_classes = Subscriber.registered_handlers
114
+ .keys
115
+ .map(&:to_s)
116
+ .select { |class_name| class_name.start_with?(class_name_with_major_version) }
117
+
118
+ if available_classes.empty?
119
+ # No message types with the same major version.
120
+ return nil
121
+ else
122
+ # There's a message type with the same major version.
123
+ return Object.const_get(available_classes.first)
124
+ end
125
+ end
126
+
127
+ def most_suitable_handler_method_name(version, receiver_class)
128
+ # Returns the closest lower implemented method in target_class for the given version
129
+ cached_method = self.class.handler_method_cache[version]
130
+ if cached_method
131
+ return cached_method
132
+ end
133
+ available_methods = receiver_class.methods.grep(%r{^from_pipeline_#{version.major}})
134
+ .reject { |method_name| method_name.to_s.split('_').last.to_i > version.minor }
135
+ .sort
136
+ .reverse
137
+
138
+ # cache handler method for this version
139
+ self.class.handler_method_cache[version] = available_methods.first
140
+ return available_methods.first
141
+ end
142
+
143
+ def verify_api_key(envelope)
144
+ if envelope.api_key.present?
145
+ if _api_keys.include?(envelope.api_key)
146
+ return true
147
+ else
148
+ raise WrongApiKeyError.new
149
+ end
150
+ else
151
+ raise NoApiKeyError.new
152
+ end
153
+ end
154
+
155
+ def target_class(payload)
156
+ RailsPipeline::Subscriber.target_class(payload.class)
157
+ end
158
+
159
+ def target_handler(payload)
160
+ RailsPipeline::Subscriber.target_handler(payload.class)
161
+ end
162
+
163
+ def _version(payload)
164
+ _, version = payload.class.name.split('_', 2)
165
+ return RailsPipeline::PipelineVersion.new(version)
166
+ end
167
+
168
+ def _api_keys
169
+ return ENV.fetch('PIPELINE_API_KEYS', "").split(',')
170
+ end
171
+
172
+ end
173
+
174
+ module ClassMethods
175
+
176
+ def handler_method_cache
177
+ @handler_method_cache ||= {}
178
+ end
179
+
180
+ def handler_method_cache=(cache)
181
+ @handler_method_cache = cache
182
+ end
183
+ end
184
+ end
185
+ end