rails-pipeline 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +227 -0
  4. data/Rakefile +27 -0
  5. data/bin/pipeline +138 -0
  6. data/bin/redis-to-ironmq.rb +20 -0
  7. data/lib/rails-pipeline.rb +34 -0
  8. data/lib/rails-pipeline/emitter.rb +121 -0
  9. data/lib/rails-pipeline/handlers/activerecord_crud.rb +35 -0
  10. data/lib/rails-pipeline/handlers/base_handler.rb +19 -0
  11. data/lib/rails-pipeline/handlers/logger.rb +13 -0
  12. data/lib/rails-pipeline/ironmq_publisher.rb +37 -0
  13. data/lib/rails-pipeline/ironmq_pulling_subscriber.rb +96 -0
  14. data/lib/rails-pipeline/ironmq_subscriber.rb +21 -0
  15. data/lib/rails-pipeline/pipeline_version.rb +40 -0
  16. data/lib/rails-pipeline/protobuf/encrypted_message.pb.rb +37 -0
  17. data/lib/rails-pipeline/protobuf/encrypted_message.proto +18 -0
  18. data/lib/rails-pipeline/redis_forwarder.rb +207 -0
  19. data/lib/rails-pipeline/redis_ironmq_forwarder.rb +12 -0
  20. data/lib/rails-pipeline/redis_publisher.rb +71 -0
  21. data/lib/rails-pipeline/sns_publisher.rb +62 -0
  22. data/lib/rails-pipeline/subscriber.rb +185 -0
  23. data/lib/rails-pipeline/symmetric_encryptor.rb +127 -0
  24. data/lib/rails-pipeline/version.rb +3 -0
  25. data/lib/tasks/rails-pipeline_tasks.rake +4 -0
  26. data/spec/emitter_spec.rb +141 -0
  27. data/spec/handlers/activerecord_crud_spec.rb +100 -0
  28. data/spec/handlers/logger_spec.rb +42 -0
  29. data/spec/ironmp_pulling_subscriber_spec.rb +98 -0
  30. data/spec/ironmq_publisher_spec.rb +37 -0
  31. data/spec/pipeline_version_spec.rb +35 -0
  32. data/spec/redis_forwarder_spec.rb +99 -0
  33. data/spec/redis_publisher_spec.rb +36 -0
  34. data/spec/sns_publisher_spec.rb +28 -0
  35. data/spec/subscriber_spec.rb +278 -0
  36. data/spec/symmetric_encryptor_spec.rb +21 -0
  37. metadata +175 -0
@@ -0,0 +1,207 @@
1
+
2
+ require 'redis'
3
+ require 'active_support/core_ext'
4
+ require 'rails-pipeline/protobuf/encrypted_message.pb'
5
+
6
+ # Pipeline forwarder base class that
7
+ # - reads from redis queue using BRPOPLPUSH for reliable queue pattern
8
+ # - keeps track of failed tasks in the in_progress queue
9
+ # - designed to be used with e.g. IronmqPublisher
10
+
11
+ $redis = ENV["REDISCLOUD_URL"] || ENV["REDISTOGO_URL"] || "localhost:6379"
12
+
13
+ module RailsPipeline
14
+ class RedisForwarder
15
+ if RailsPipeline::HAS_NEWRELIC
16
+ include ::NewRelic::Agent::MethodTracer
17
+ end
18
+
19
+ def initialize(key)
20
+ _trap_signals
21
+ @redis = nil
22
+ @stop = false
23
+ @queue = key
24
+ @in_progress_queue = _in_progress_queue
25
+
26
+ @processed = 0
27
+ @blocking_timeout = 2
28
+ @failure_check_interval = 30
29
+ @message_processing_limit = 10 # number of seconds before a message is considered failed
30
+ @failure_last_checked = Time.now - @failure_check_interval.seconds # TODO: randomize start time?
31
+ end
32
+
33
+ def _trap_signals
34
+ trap('SIGTERM') do
35
+ puts 'Exiting (SIGTERM)'
36
+ stop
37
+ end
38
+ trap('SIGINT') do
39
+ puts 'Exiting (SIGINT)'
40
+ stop
41
+ end
42
+ end
43
+
44
+
45
+ # Blocking right pop from the queue
46
+ # - use BRPOPLPUSH to tenporarily mark the message as "in progress"
47
+ # - delete from the in_prgress queue on success
48
+ # - restore to the main queue on failure
49
+ def process_queue
50
+ # pop from the queue and push onto the in_progress queue
51
+ data = _redis.brpoplpush(@queue, @in_progress_queue, timeout: @blocking_timeout)
52
+ if data.nil? # Timed-out with nothing to process
53
+ return
54
+ end
55
+
56
+ begin
57
+ encrypted_data = RailsPipeline::EncryptedMessage.parse(data)
58
+ RailsPipeline.logger.debug "Processing #{encrypted_data.uuid}"
59
+
60
+ # re-publish to wherever (e.g. IronMQ)
61
+ topic_name = encrypted_data.topic
62
+ if topic_name.nil?
63
+ RailsPipeline.logger.error "Damaged message, no topic name"
64
+ return
65
+ end
66
+
67
+ publish(topic_name, data)
68
+ @processed += 1
69
+
70
+ # Now remove this message from the in_progress queue
71
+ removed = _redis.lrem(@in_progress_queue, 1, data)
72
+ if removed != 1
73
+ RailsPipeline.logger.warn "OHNO! Didn't remove the data I was expecting to: #{data}"
74
+ end
75
+ rescue Exception => e
76
+ RailsPipeline.logger.info e
77
+ RailsPipeline.logger.info e.backtrace.join("\n")
78
+ if !data.nil?
79
+ RailsPipeline.logger.info "Putting message #{encrypted_data.uuid} back on main queue"
80
+ _put_back_on_queue(data)
81
+ end
82
+ end
83
+ end
84
+ add_method_tracer :process_queue, "Pipeline/RedisForwarder/process_queue" if RailsPipeline::HAS_NEWRELIC
85
+
86
+ # note in redis that we are processing this message
87
+ def report(uuid)
88
+ _redis.setex(_report_key(uuid), @message_processing_limit, _client_id)
89
+ end
90
+
91
+ # Search the in-progress queue for messages that are likely to be abandoned
92
+ # and re-queue them on the main queue
93
+ def check_for_failures
94
+ # Lock in_progress queue or return
95
+ num_in_progress = _redis.llen(@in_progress_queue)
96
+ if num_in_progress == 0
97
+ RailsPipeline.logger.debug "No messages in progress, skipping check for failures"
98
+ return
99
+ end
100
+
101
+ RailsPipeline.logger.debug "Locking '#{@in_progress_queue}' for #{num_in_progress} seconds"
102
+
103
+ # Attempt to lock this queue for the next num_in_progress seconds
104
+ lock_key = "#{@in_progress_queue}__lock"
105
+ locked = _redis.set(lock_key, _client_id, ex: num_in_progress, nx: true)
106
+ if !locked
107
+ RailsPipeline.logger.debug "in progress queue is locked"
108
+ return
109
+ end
110
+
111
+ # Go through each message, see if there's a 'report' entry. If not,
112
+ # requeue!
113
+ in_progress = _redis.lrange(@in_progress_queue, 0, num_in_progress)
114
+ in_progress.each do |message|
115
+ enc_message = EncryptedMessage.parse(message)
116
+ owner = _redis.get(_report_key(enc_message.uuid))
117
+ if owner.nil?
118
+ RailsPipeline.logger.info "Putting timed-out message #{enc_message.uuid} back on main queue"
119
+ _put_back_on_queue(message)
120
+ else
121
+ RailsPipeline.logger.debug "Message #{uuid} is owned by #{owner}"
122
+ end
123
+ end
124
+ end
125
+ add_method_tracer :check_for_failures, "Pipeline/RedisForwarder/check_for_failures" if RailsPipeline::HAS_NEWRELIC
126
+
127
+ # Function that runs in the loop
128
+ def run
129
+ process_queue
130
+ RailsPipeline.logger.info "Queue: '#{@queue}'. Processed: #{@processed}"
131
+ if Time.now - @failure_last_checked > @failure_check_interval
132
+ @failure_last_checked = Time.now
133
+ check_for_failures
134
+ end
135
+ end
136
+
137
+ # Main loop
138
+ def start
139
+ while true
140
+ begin
141
+ if @stop
142
+ RailsPipeline.logger.info "Finished"
143
+ if RailsPipeline::HAS_NEWRELIC
144
+ RailsPipeline.logger.info "Shutting down NewRelic"
145
+ ::NewRelic::Agent.shutdown
146
+ end
147
+ break
148
+ end
149
+ run
150
+ rescue Exception => e
151
+ RailsPipeline.logger.info e
152
+ RailsPipeline.logger.info e.backtrace.join("\n")
153
+ end
154
+ end
155
+ end
156
+
157
+ def stop
158
+ puts "stopping..."
159
+ @stop = true
160
+ end
161
+
162
+ def _redis
163
+ if !@redis.nil?
164
+ return @redis
165
+ end
166
+ if $redis.start_with?("redis://")
167
+ @redis = Redis.new(url: $redis)
168
+ else
169
+ host, port = $redis.split(":")
170
+ @redis = Redis.new(host: host, port: port)
171
+ end
172
+ return @redis
173
+ end
174
+
175
+ def _processed
176
+ return @processed
177
+ end
178
+
179
+ def _in_progress_queue
180
+ "#{@queue}_in_progress"
181
+ end
182
+
183
+ # The redis key at which we 'claim' the message when we start processing it.
184
+ def _report_key(uuid)
185
+ "#{@queue}__#{uuid}"
186
+ end
187
+
188
+ def _client_id
189
+ self.class.name
190
+ end
191
+
192
+ # Atomically remove a message from the in_progress queue and put it back on
193
+ # the main queue
194
+ def _put_back_on_queue(message)
195
+ future = nil
196
+ _redis.multi do
197
+ _redis.rpush(@queue, message)
198
+ future = _redis.lrem(@in_progress_queue, 1, message)
199
+ end
200
+ removed = future.value
201
+ if removed !=1
202
+ RailsPipeline.logger.error "ERROR: Didn't remove message from in_progress queue?!!!"
203
+ end
204
+ end
205
+
206
+ end
207
+ end
@@ -0,0 +1,12 @@
1
+
2
+ require 'rails-pipeline/redis_forwarder'
3
+ require 'rails-pipeline/ironmq_publisher'
4
+
5
+ # Mix-in the IronMQ publisher into a RedisForwarder to create a
6
+ # class that will forward redis messages onto IronMQ
7
+
8
+ module RailsPipeline
9
+ class RedisIronmqForwarder < RedisForwarder
10
+ include IronmqPublisher
11
+ end
12
+ end
@@ -0,0 +1,71 @@
1
+
2
+ require 'redis'
3
+
4
+ # Backend for data pipeline that publishes to redis queue
5
+ # (typically for consumption by a log sender)
6
+ #
7
+ # Typically initialized in rails initialzer e.g.
8
+ # RailsPipeline::RedisPublisher.redis = Redis.new(ENV["REDIS_URL"])
9
+ # RailsPipeline::RedisPublisher.namespace = "my-app-pipeline"
10
+
11
+ module RailsPipeline
12
+ module RedisPublisher
13
+ class << self
14
+ # Allow configuration via initializer
15
+ @@redis = nil
16
+ @@namespace = "pipeline" # default redis queue name
17
+ attr_accessor :namespace
18
+ def _redis
19
+ if @@redis.nil?
20
+ if $redis.start_with?("redis://")
21
+ @@redis = Redis.new(url: $redis)
22
+ else
23
+ host, port = $redis.split(":")
24
+ @@redis = Redis.new(host: host, port: port)
25
+ end
26
+ end
27
+ @@redis
28
+ end
29
+ def redis=(redis)
30
+ @@redis = redis
31
+ end
32
+ def namespace=(namespace)
33
+ @@namespace = namespace
34
+ end
35
+ def namespace
36
+ @@namespace
37
+ end
38
+
39
+
40
+ end
41
+
42
+ def self.included(base)
43
+ base.extend ClassMethods
44
+ base.send :include, InstanceMethods
45
+ end
46
+
47
+ module InstanceMethods
48
+ def publish(topic_name, data)
49
+ t0 = Time.now
50
+ _redis.lpush(_key, data)
51
+ t1 = Time.now
52
+ ::NewRelic::Agent.record_metric('Pipeline/Redis/publish', t1-t0) if RailsPipeline::HAS_NEWRELIC
53
+ RailsPipeline.logger.debug "Publishing to redis '#{topic_name}' took #{t1-t0}s"
54
+ end
55
+
56
+ def _redis
57
+ RedisPublisher._redis
58
+ end
59
+
60
+ def _key
61
+ RedisPublisher.namespace
62
+ end
63
+
64
+ end
65
+
66
+ module ClassMethods
67
+
68
+ end
69
+ end
70
+
71
+ end
@@ -0,0 +1,62 @@
1
+
2
+
3
+ require 'aws-sdk'
4
+
5
+ # Backend for data pipeline that publishes to Amazon Simple Notification
6
+ # Service (SNS).
7
+ #
8
+ # Configure via an initializer like:
9
+ # PipelineSnsEmitter.account_id = "6982739827398"
10
+
11
+ module RailsPipeline::SnsPublisher
12
+ class << self
13
+ # Allow configuration via initializer
14
+ @@account_id = nil
15
+ def account_id
16
+ @@account_id
17
+ end
18
+ def account_id=(account_id)
19
+ @@account_id = account_id
20
+ end
21
+ end
22
+
23
+ def self.included(base)
24
+ base.send :include, InstanceMethods
25
+ base.extend ClassMethods
26
+ end
27
+
28
+ module InstanceMethods
29
+ def publish(topic_name, data)
30
+ t0 = Time.now
31
+ topic = _sns.topics[_topic_arn(topic_name)]
32
+ topic.publish(data, subject: _subject, sqs: data)
33
+ t1 = Time.now
34
+ RailsPipeline.logger.debug "Published to SNS '#{topic_name}' in #{t1-t0}s"
35
+ end
36
+
37
+ def _sns
38
+ @sns = AWS::SNS.new if @sns.nil?
39
+ return @sns
40
+ end
41
+
42
+ def _topic_arn(topic_name, region="us-east-1")
43
+ "arn:aws:sns:#{region}:#{_account_id}:#{topic_name}"
44
+ end
45
+
46
+ # Subject of SNS message is ClassName-id
47
+ def _subject
48
+ "#{self.class.name}-#{self.id}"
49
+ end
50
+
51
+ def _account_id
52
+ if ENV.has_key?("AWS_ACCOUNT_ID")
53
+ return ENV["AWS_ACCOUNT_ID"]
54
+ end
55
+ return RailsPipeline::SnsPublisher.account_id
56
+ end
57
+ end
58
+
59
+ module ClassMethods
60
+ end
61
+
62
+ end
@@ -0,0 +1,185 @@
1
+ require "rails-pipeline/symmetric_encryptor"
2
+
3
+ module RailsPipeline
4
+
5
+ module Subscriber
6
+
7
+ Error = Class.new(StandardError)
8
+ NoApiKeyError = Class.new(Error)
9
+ WrongApiKeyError = Class.new(Error)
10
+
11
+ class << self
12
+
13
+ @@registered_models = {}
14
+ @@registered_handlers = {}
15
+
16
+ def register(payload_class, target_class, handler = nil)
17
+ @@registered_models[payload_class] = target_class
18
+ @@registered_handlers[payload_class] = handler
19
+ end
20
+
21
+ def target_class(payload_class)
22
+ @@registered_models[payload_class]
23
+ end
24
+
25
+ def target_handler(payload_class)
26
+ @@registered_handlers[payload_class]
27
+ end
28
+
29
+ def registered_handlers
30
+ @@registered_handlers
31
+ end
32
+ end
33
+
34
+
35
+ def self.included(base)
36
+ RailsPipeline::SymmetricEncryptor.included(base)
37
+ base.send :include, InstanceMethods
38
+ base.extend ClassMethods
39
+ if RailsPipeline::HAS_NEWRELIC
40
+ base.send :include, ::NewRelic::Agent::Instrumentation::ControllerInstrumentation
41
+ base.extend ::NewRelic::Agent::Instrumentation::ControllerInstrumentation::ClassMethods
42
+ base.add_transaction_tracer :handle_envelope, category: :task
43
+ base.add_transaction_tracer :handle_payload, category: :task
44
+ end
45
+ end
46
+
47
+ module InstanceMethods
48
+
49
+ # Take an EncryptedMessage envelope, and decrypt the cipher text, then
50
+ # get the protobuf object out of it
51
+ def handle_envelope(envelope)
52
+ if ENV.has_key?("DISABLE_RAILS_PIPELINE") || ENV.has_key?("DISABLE_RAILS_PIPELINE_PROCESSING")
53
+ RailsPipeline.logger.debug "Skipping incoming pipeline messages (disabled by env vars)"
54
+ return
55
+ end
56
+ verify_api_key(envelope)
57
+ payload_str = self.class.decrypt(envelope)
58
+
59
+ # Find the registered minor version & its related handler to parse and
60
+ # process this message.
61
+ clazz = registered_class_on_same_major_version(envelope.type_info)
62
+
63
+ if clazz.nil?
64
+ # No compatible version of this message is registered for this app.
65
+ RailsPipeline.logger.info "Dropping unclaimed message #{envelope.type_info} (no compatible version registered)."
66
+ return
67
+ end
68
+
69
+ # Parse and handle the payload.
70
+ payload = clazz.parse(payload_str)
71
+ handle_payload(payload, envelope)
72
+ end
73
+
74
+ # Take a protobuf object (payload) and forward it to the appropriate
75
+ # handler/method/proc
76
+ def handle_payload(payload, envelope)
77
+ version = _version(payload)
78
+ clazz = target_class(payload)
79
+ handler_class = target_handler(payload)
80
+ event_type = envelope.event_type
81
+ method = most_suitable_handler_method_name(version, clazz)
82
+
83
+ if clazz.is_a?(Class)
84
+ if handler_class
85
+ # If a built in handler_class is registered, then just use it
86
+ handler_class.new(payload, target_class: clazz, envelope: envelope).handle_payload
87
+ elsif method
88
+ # Target class had a from_pipeline method, so just call it and move on
89
+ target = clazz.send(method, payload, event_type)
90
+ else
91
+ RailsPipeline.logger.info "No handler set, dropping message #{payload.class.name}"
92
+ end
93
+ return target
94
+ elsif clazz.is_a?(Proc)
95
+ return clazz.call(payload)
96
+ end
97
+ end
98
+
99
+ def registered_class_on_same_major_version(payload_class_name)
100
+ if Subscriber.registered_handlers.has_key?(payload_class_name)
101
+ # The version we've been given has been registered. Return that.
102
+ return Object.const_get(payload_class_name)
103
+ end
104
+
105
+ # Lops off the minor version from the payload class name so
106
+ # we can find the registered message type with the same major version.
107
+ class_name_with_major_version =
108
+ payload_class_name
109
+ .split("_", 3)[0, 2]
110
+ .join("_")
111
+
112
+ # Look for message types with the same major version.
113
+ available_classes = Subscriber.registered_handlers
114
+ .keys
115
+ .map(&:to_s)
116
+ .select { |class_name| class_name.start_with?(class_name_with_major_version) }
117
+
118
+ if available_classes.empty?
119
+ # No message types with the same major version.
120
+ return nil
121
+ else
122
+ # There's a message type with the same major version.
123
+ return Object.const_get(available_classes.first)
124
+ end
125
+ end
126
+
127
+ def most_suitable_handler_method_name(version, receiver_class)
128
+ # Returns the closest lower implemented method in target_class for the given version
129
+ cached_method = self.class.handler_method_cache[version]
130
+ if cached_method
131
+ return cached_method
132
+ end
133
+ available_methods = receiver_class.methods.grep(%r{^from_pipeline_#{version.major}})
134
+ .reject { |method_name| method_name.to_s.split('_').last.to_i > version.minor }
135
+ .sort
136
+ .reverse
137
+
138
+ # cache handler method for this version
139
+ self.class.handler_method_cache[version] = available_methods.first
140
+ return available_methods.first
141
+ end
142
+
143
+ def verify_api_key(envelope)
144
+ if envelope.api_key.present?
145
+ if _api_keys.include?(envelope.api_key)
146
+ return true
147
+ else
148
+ raise WrongApiKeyError.new
149
+ end
150
+ else
151
+ raise NoApiKeyError.new
152
+ end
153
+ end
154
+
155
+ def target_class(payload)
156
+ RailsPipeline::Subscriber.target_class(payload.class)
157
+ end
158
+
159
+ def target_handler(payload)
160
+ RailsPipeline::Subscriber.target_handler(payload.class)
161
+ end
162
+
163
+ def _version(payload)
164
+ _, version = payload.class.name.split('_', 2)
165
+ return RailsPipeline::PipelineVersion.new(version)
166
+ end
167
+
168
+ def _api_keys
169
+ return ENV.fetch('PIPELINE_API_KEYS', "").split(',')
170
+ end
171
+
172
+ end
173
+
174
+ module ClassMethods
175
+
176
+ def handler_method_cache
177
+ @handler_method_cache ||= {}
178
+ end
179
+
180
+ def handler_method_cache=(cache)
181
+ @handler_method_cache = cache
182
+ end
183
+ end
184
+ end
185
+ end