rails-pipeline 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +227 -0
  4. data/Rakefile +27 -0
  5. data/bin/pipeline +138 -0
  6. data/bin/redis-to-ironmq.rb +20 -0
  7. data/lib/rails-pipeline.rb +34 -0
  8. data/lib/rails-pipeline/emitter.rb +121 -0
  9. data/lib/rails-pipeline/handlers/activerecord_crud.rb +35 -0
  10. data/lib/rails-pipeline/handlers/base_handler.rb +19 -0
  11. data/lib/rails-pipeline/handlers/logger.rb +13 -0
  12. data/lib/rails-pipeline/ironmq_publisher.rb +37 -0
  13. data/lib/rails-pipeline/ironmq_pulling_subscriber.rb +96 -0
  14. data/lib/rails-pipeline/ironmq_subscriber.rb +21 -0
  15. data/lib/rails-pipeline/pipeline_version.rb +40 -0
  16. data/lib/rails-pipeline/protobuf/encrypted_message.pb.rb +37 -0
  17. data/lib/rails-pipeline/protobuf/encrypted_message.proto +18 -0
  18. data/lib/rails-pipeline/redis_forwarder.rb +207 -0
  19. data/lib/rails-pipeline/redis_ironmq_forwarder.rb +12 -0
  20. data/lib/rails-pipeline/redis_publisher.rb +71 -0
  21. data/lib/rails-pipeline/sns_publisher.rb +62 -0
  22. data/lib/rails-pipeline/subscriber.rb +185 -0
  23. data/lib/rails-pipeline/symmetric_encryptor.rb +127 -0
  24. data/lib/rails-pipeline/version.rb +3 -0
  25. data/lib/tasks/rails-pipeline_tasks.rake +4 -0
  26. data/spec/emitter_spec.rb +141 -0
  27. data/spec/handlers/activerecord_crud_spec.rb +100 -0
  28. data/spec/handlers/logger_spec.rb +42 -0
  29. data/spec/ironmp_pulling_subscriber_spec.rb +98 -0
  30. data/spec/ironmq_publisher_spec.rb +37 -0
  31. data/spec/pipeline_version_spec.rb +35 -0
  32. data/spec/redis_forwarder_spec.rb +99 -0
  33. data/spec/redis_publisher_spec.rb +36 -0
  34. data/spec/sns_publisher_spec.rb +28 -0
  35. data/spec/subscriber_spec.rb +278 -0
  36. data/spec/symmetric_encryptor_spec.rb +21 -0
  37. metadata +175 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3baf27587029e1f4d0c6005dabfd09b8ff8efd68
4
+ data.tar.gz: de357cd65ca250bae2470b69e4f3d935b407fc60
5
+ SHA512:
6
+ metadata.gz: c0741b47a72c591d3210004e5f35f71fa4efc209de96b0dfac777a18e05cbe97a6f6320bc6885af60daa476bc59e913ab1d36859699ebf233401b13b16efaa72
7
+ data.tar.gz: e5614fa21bc21ada92437787a8ac22e2fc6e1534c1c010bd3de1614cde42fc00acada91a5141903315e00d7eb05e1570dcb8b3c142fe0b3f7344cccc096137eb
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2014 Andy O'Neill, ADKM Inc
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,227 @@
1
+ # RailsPipeline
2
+
3
+ Emit a versioned stream of changes to a pub/sub queue when ActiveRecord models are
4
+ saved. This gem supports Redis, AWS (SNS/SQS), and IronMQ publishing targets.
5
+ The Redis backend supports a forwarding system to a cloud MQ like IronMQ.
6
+ Messages are encrypted in transit using AES symmetric encryption and a shared
7
+ secret.
8
+
9
+ ## Motivation
10
+
11
+ Many systems evolve into a inter-related collection of Rails applications as
12
+ they grow out of their initial monolithic design. This is often coincident with
13
+ independent teams taking responsibility for different aspects of the business
14
+ and different applications. In our case this has manifested itself as
15
+ one team that is responsible for the ecommerce platform (web, mobile etc) and
16
+ another team that works on data warehousing and personalization. The data team
17
+ was using a read-only replica of the platform team's database for data
18
+ warehousing, which tightly coupled us together when the platform team wanted to
19
+ make schema changes for performance or development velocity reasons.
20
+
21
+ This project allows us to offer a versioned API that we can maintain backwards
22
+ compatibility for while changing the underlying code and schema as we see fit.
23
+ Consumers of data will upgrade to new schema versions as they need to and when
24
+ they are able.
25
+
26
+ ## Usage
27
+
28
+ Install with bundler
29
+
30
+ gem 'rails-pipeline'
31
+
32
+ You should create a private repository containing your protocolbuffers schemas
33
+ and also depend on that, or use `git subtree` to bring it into your Rails apps.
34
+
35
+ For any models that you wish to publish changes, just include the appropriate
36
+ pipeline emitter
37
+
38
+ include RailsPipeline::RedisEmitter
39
+
40
+ Each queue backend has different methods of consuming messages as a subscriber,
41
+ but for IronMQ there is an implementation of a webhook subscriber (details
42
+ below).
43
+
44
+ The following environment variables control pipeline operations
45
+
46
+ # If set, do not emit or process incoming messages
47
+ DISABLE_RAILS_PIPELINE
48
+
49
+ # If set, do not emit pipeline messages
50
+ DISABLE_RAILS_PIPELINE_EMISSION
51
+
52
+ # If set, do not process incoming messages as a subscriber, just drop them
53
+ DISABLE_RAILS_PIPELINE_PROCESSING
54
+
55
+ The following environment variable sets the shared secret for pipeline encryption
56
+
57
+ PIPELINE_SECRET
58
+
59
+ You can pass in a logger for RailsPipeline in an initializer e.g.
60
+
61
+ RailsPipeline.logger = Rails.logger
62
+ RailsPipeline.logger.level = Log4r::DEBUG
63
+
64
+ ## Backends
65
+
66
+ ### Redis
67
+
68
+ <table>
69
+ <tr><td>Redis Emitter</td><td>Only as a forwarding intermediary</td></tr>
70
+ <tr><td>Redis Subscriber</td><td>Only forwarder</td></tr>
71
+ </table>
72
+
73
+ The implementation for Redis assumes you want to use it as a local forwarding
74
+ queue to a more scalable service such as AWS or IronMQ. Thus all messages are
75
+ pushed onto a single Redis queue and include the name of the target topic/queue.
76
+ We have included a bouncer process that will read from the Redis queue (in
77
+ parallel if need be) and forward on to IronMQ. Adding an AWS forwarder would be
78
+ trivial.
79
+
80
+ It may be desirable to write a full pub/sub emitter for Redis (rather than just
81
+ a forwarder.)
82
+
83
+ #### Redis Config
84
+
85
+ The following environment variables are checked for Redis urls (default:
86
+ localhost:6379):
87
+
88
+ REDISCLOUD_URL
89
+ REDISTOGO_URL
90
+
91
+ Alternatively you could pass in an instance of the Redis client in an
92
+ initializer:
93
+
94
+ RailsPipeline::RedisPublisher.redis = MyRedisFactory.get
95
+
96
+ The key name of the redis queue should be set in an initializer, e.g.
97
+
98
+ RailsPipeline::RedisPublisher.namespace = "rails-pipeline-spec"
99
+
100
+ ### IronMQ
101
+
102
+ <table>
103
+ <tr><td>IronMQ Emitter</td><td>YES</td></tr>
104
+ <tr><td>Redis-to-IronMQ Forwarder</td><td>YES</td></tr>
105
+ <tr><td>IronMQ Subscriber</td><td>YES</td></tr>
106
+ </table>
107
+
108
+ #### Emitter
109
+
110
+ Each model/version emits ProtocolBuffer messages to a specific IronMQ queue. If
111
+ that queue is set as a "push" queue. Subscribers can then add themselves as http
112
+ webhook endpoints for the push queue and messages will be delivered to them.
113
+
114
+
115
+ #### Subscriber
116
+
117
+ There is a subscriber implementation for IronMQ as an http endpoint.
118
+
119
+ Mount the Sinatra endpoint in your app's routes.rb
120
+
121
+ match "/ironmq" => RailsPipeline::IronmqSubscriber, :anchor => false
122
+
123
+ Register your own models as recipients of different pipeline message types and
124
+ versions (in an Rails initializer):
125
+
126
+ RailsPipeline::Subscriber.register(SomeModel_2_0, MyModel)
127
+
128
+ You will need to write a `MyModel#from_pipeline_2_0()` method. You can also
129
+ register any Proc as a processor for messages.
130
+
131
+ Add your URL as a subscriber to the push queues you care about using the
132
+ supplied 'pipeline' command
133
+
134
+ pipeline ironmq-subscribe-endpoint http://my.domain.com/ironmq some_models
135
+
136
+ You may find [ngrok](http://ngrok.com) helpful for developing and debugging.
137
+
138
+ #### Iron.io Config
139
+
140
+ The 'iron_mq' gem picks up the following environment variables
141
+
142
+ IRON_PROJECT_ID
143
+ IRON_TOKEN
144
+
145
+ ### AWS (Simple Notification Service)
146
+
147
+ <table>
148
+ <tr><td>SNS Emitter</td><td>YES</td></tr>
149
+ <tr><td>Redis-to-SNS Forwarder</td><td>NO, but easy to add.</td></tr>
150
+ <tr><td>SQS Polling Subscriber</td><td>NO</td></tr>
151
+ <tr><td>SNS Webhook Subscriber</td><td>NO</td></tr>
152
+ </table>
153
+
154
+ We include a proof-of-concept AWS emitter, written with the idea in mind to use
155
+ SQS as pub/sub queues and polling subscribers. It would also be possible to
156
+ publish to SNS and have multiple subscribers receive http webhook messages as in
157
+ IronMQ.
158
+
159
+ There are some commands in the 'pipeline' script to configure SNS/SQS:
160
+
161
+
162
+ Create SNS topics to publish to:
163
+
164
+ pipeline sns-create-topic TABLE_NAME --env ENV --version VERSION
165
+
166
+ Create and SQS queue and subscribe it to a TOPIC (one per subscribing rails
167
+ app)
168
+
169
+ pipeline sqs-subscribe-app APP TABLE_NAME[,TABLE_NAME_2,...] --env ENV --version VERSION
170
+
171
+ #### AWS Config
172
+
173
+ The AWS gem picks up the following environment variables
174
+
175
+ AWS_ACCESS_KEY_ID
176
+ AWS_SECRET_ACCESS_KEY
177
+
178
+ In addition, we use the numerical 'owner id' for your account which should be
179
+ set as
180
+
181
+ AWS_ACCOUNT_ID
182
+
183
+
184
+ ## Protocol Buffers
185
+
186
+ To build the test protocol buffers ruby files in rails-pipeline:
187
+
188
+ brew install protobuf
189
+ make
190
+
191
+ We have created a private repository gem for our protocol buffers definitions.
192
+ This is laid out like
193
+
194
+ harrys-pipeline/lib/harrys/pipeline/my_model_1_1.proto
195
+
196
+ Proto files look like this
197
+
198
+ <pre>
199
+ package Harrys.Pipeline;
200
+
201
+ message Order__1__0 {
202
+ required int32 id = 1;
203
+ required double created_at = 2;
204
+ required double updated_at = 3;
205
+ ...
206
+ }
207
+ </pre>
208
+
209
+ We then have a Makefile almost identical to the one in this gem to build our
210
+ .pb.rb files:
211
+
212
+ <pre>
213
+ GENDIR=./lib/harrys/pipeline
214
+ RUBY_PROTOC=bundle exec ruby-protoc
215
+ PROTOS=$(wildcard $(GENDIR)/*.proto)
216
+ PBS=$(PROTOS:%.proto=%.pb.rb)
217
+
218
+ all: $(PBS)
219
+
220
+ %.pb.rb: %.proto
221
+ $(RUBY_PROTOC) $<
222
+
223
+ clean:
224
+ rm -f $(PBS)
225
+ </pre>
226
+
227
+ [![TravisCI](https://travis-ci.org/harrystech/rails-pipeline.png)](https://travis-ci.org/harrystech/rails-pipeline)
data/Rakefile ADDED
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env rake
2
+ begin
3
+ require 'bundler/setup'
4
+ rescue LoadError
5
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
6
+ end
7
+ begin
8
+ require 'rdoc/task'
9
+ rescue LoadError
10
+ require 'rdoc/rdoc'
11
+ require 'rake/rdoctask'
12
+ RDoc::Task = Rake::RDocTask
13
+ end
14
+
15
+ RDoc::Task.new(:rdoc) do |rdoc|
16
+ rdoc.rdoc_dir = 'rdoc'
17
+ rdoc.title = 'RailsPipeline'
18
+ rdoc.options << '--line-numbers'
19
+ rdoc.rdoc_files.include('README.rdoc')
20
+ rdoc.rdoc_files.include('lib/**/*.rb')
21
+ end
22
+
23
+ APP_RAKEFILE = File.expand_path("../spec/dummy/Rakefile", __FILE__)
24
+ load 'rails/tasks/engine.rake'
25
+
26
+ Bundler::GemHelper.install_tasks
27
+
data/bin/pipeline ADDED
@@ -0,0 +1,138 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # command line tool for administering data pipeline queues
4
+ #
5
+ # To create queues for an app following certain models:
6
+ # pipeline subscribe-app APP_NAME orders,products,users
7
+
8
+ require 'thor'
9
+ require 'aws-sdk'
10
+ require 'iron_mq'
11
+ require 'pry'
12
+
13
+ class Pipeline < Thor
14
+ # TODO: Kinda clunky, but there is a way to get the account ID programmatically:
15
+ # https://forums.aws.amazon.com/thread.jspa?threadID=108012
16
+ AWS_ACCOUNT_ID = ENV["AWS_ACCOUNT_ID"]
17
+
18
+ def initialize(args = [], local_options = {}, config = {})
19
+ super(args, local_options, config)
20
+ @sns = AWS::SNS.new
21
+ @sqs = AWS::SQS.new
22
+ @iam = AWS::IAM.new
23
+ end
24
+
25
+
26
+ desc "sns-create-topic TABLE_NAME --env ENV --version VERSION", "create topic for TABLE_NAME"
27
+ option :env
28
+ option :version
29
+ def create_sns_topic(table_name)
30
+ env = options.fetch(:env, "test")
31
+ version = options.fetch(:version, "1_0")
32
+ topic_name = _topic_name(table_name, env, version)
33
+ topic = _find_or_create_topic(topic_name)
34
+ return topic
35
+ end
36
+
37
+ desc "sqs-subscribe-app APP TABLE_NAME[,TABLE_NAME_2,...] --env ENV --version VERSION", "create queue(s) for APP subscribed to TABLE_NAME feeds"
38
+ option :env
39
+ option :version
40
+ def sqs_subscribe_app(app_name, table_names)
41
+ env = options.fetch(:env, "test")
42
+ version = options.fetch(:version, "1_0")
43
+
44
+ table_names.split(',').each do |table_name|
45
+ topic_name = _topic_name(table_name, env, version)
46
+ queue_name = _queue_name(topic_name, app_name)
47
+ queue = _find_or_create_queue(queue_name)
48
+ topic = _find_or_create_topic(topic_name)
49
+ sub = _subscribe_queue_to_topic(queue, topic)
50
+ end
51
+ end
52
+
53
+ # Placeholders for IronMQ functions
54
+ desc "ironmq-subscribe-endpoint ENDPOINT TABLE_NAME_1[,TABLE_NAME_2,...] --env ENV --version VERSION", "subscribe the endpoint to updates on ironmq"
55
+ option :env
56
+ option :version
57
+ def ironmq_subscribe_endpoint(endpoint, table_names)
58
+ env = options.fetch('env', "test")
59
+ version = options.fetch('version', "1_0")
60
+
61
+ table_names.split(',').each do |table_name|
62
+ topic_name = _topic_name(table_name, env, version)
63
+ queue = _iron.queue(topic_name)
64
+ options = {
65
+ push_type: "multicast",
66
+ retries: 10,
67
+ retries_delay: 10
68
+ }
69
+ puts "Setting push queue settings on #{queue.name}"
70
+ queue.update(options)
71
+ puts "Subscribe #{topic_name} to #{endpoint}"
72
+ queue.add_subscriber({ url: endpoint })
73
+ end
74
+ end
75
+
76
+
77
+ private
78
+
79
+ def _topic_name(table_name, env, version)
80
+ "harrys-#{env}-v#{_major_version(version)}-#{table_name}"
81
+ end
82
+
83
+ def _queue_name(topic_name, app)
84
+ "#{topic_name}-#{app}"
85
+ end
86
+
87
+ def _major_version(v)
88
+ v.include?('_') ? v.split('_')[0] : v
89
+ end
90
+
91
+ def _find_or_create_topic(topic_name)
92
+ arn = _topic_arn(topic_name)
93
+ topic = @sns.topics[arn]
94
+ begin
95
+ topic.owner
96
+ puts "Found topic #{arn}"
97
+ return topic
98
+ rescue AWS::SNS::Errors::NotFound
99
+ puts "Creating topic #{topic_name}"
100
+ @sns.client.create_topic({name: topic_name})
101
+ return _find_or_create_topic(topic_name)
102
+ end
103
+ end
104
+
105
+ def _find_or_create_queue(queue_name)
106
+ puts "Find or create queue #{queue_name}"
107
+ begin
108
+ queue = @sqs.queues.named(queue_name)
109
+ return queue
110
+ rescue AWS::SQS::Errors::NonExistentQueue
111
+ @sqs.queues.create(queue_name)
112
+ puts "Creating queue #{queue_name}"
113
+ return _find_or_create_queue(queue_name)
114
+ end
115
+ end
116
+
117
+ def _subscribe_queue_to_topic(queue, topic)
118
+ puts "Subscribing #{queue.arn} to #{topic.arn}"
119
+ return topic.subscribe(queue)
120
+ end
121
+
122
+ def _topic_arn(topic_name, region="us-east-1")
123
+ "arn:aws:sns:#{region}:#{AWS_ACCOUNT_ID}:#{topic_name}"
124
+ end
125
+
126
+ def _queue_arn(queue_name, region="us-east-1")
127
+ "arn:aws:sns:#{region}:#{AWS_ACCOUNT_ID}:#{queue_name}"
128
+ end
129
+
130
+ def _iron
131
+ @iron = IronMQ::Client.new if @iron.nil?
132
+ return @iron
133
+ end
134
+
135
+ end
136
+
137
+
138
+ Pipeline.start()
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'logger'
4
+ require 'rails-pipeline'
5
+ require 'rails-pipeline/redis_ironmq_forwarder'
6
+
7
+ # log = Logger.new($stdout)
8
+ log = Logger.new(STDOUT)
9
+ log.progname = 'pipeline'
10
+ log.level = Logger::DEBUG
11
+ RailsPipeline::logger = log
12
+
13
+ # Pipeline forwarder that reads from redis queue and forwards to ironmq.
14
+ $redis = ENV["REDISCLOUD_URL"] || ENV["REDISTOGO_URL"] || "localhost:6379"
15
+
16
+ # TODO: non-hardcode this
17
+ key = "harrys-www-pipeline"
18
+
19
+ forwarder = RailsPipeline::RedisIronmqForwarder.new(key)
20
+ forwarder.start
@@ -0,0 +1,34 @@
1
+
2
+ module RailsPipeline
3
+ class << self
4
+ # Allow configuration via initializer
5
+ @@logger = nil
6
+ def logger
7
+ if @@logger.nil?
8
+ @@logger = Rails.logger
9
+ end
10
+ @@logger
11
+ end
12
+ def logger=(logger)
13
+ @@logger = logger
14
+ end
15
+ end
16
+ begin
17
+ require 'newrelic_rpm'
18
+ HAS_NEWRELIC = true
19
+ rescue LoadError
20
+ HAS_NEWRELIC = false
21
+ end
22
+ end
23
+
24
+ require "rails-pipeline/emitter"
25
+ require "rails-pipeline/subscriber"
26
+ require "rails-pipeline/symmetric_encryptor"
27
+ require "rails-pipeline/redis_publisher"
28
+ require "rails-pipeline/sns_publisher"
29
+ require "rails-pipeline/ironmq_publisher"
30
+ require "rails-pipeline/handlers/base_handler"
31
+ require "rails-pipeline/handlers/activerecord_crud"
32
+ require "rails-pipeline/handlers/logger"
33
+ require "rails-pipeline/pipeline_version"
34
+ require "rails-pipeline/ironmq_pulling_subscriber"