firehose_integration 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.rdoc +144 -1
- data/lib/firehose_integration/models/concerns/kinesis_event.rb +1 -1
- data/lib/firehose_integration/version.rb +1 -1
- data/test/dummy/app/models/dummy_model.rb +1 -1
- data/test/dummy/app/models/stupid_model.rb +1 -1
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/test.log +2389 -0
- data/test/models/concerns/kinesis_event_test.rb +8 -0
- data/test/test_helper.rb +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c5cb0ed37b314dbdc6f3cf1c4ace1f576a06885
|
4
|
+
data.tar.gz: 414b0fb0b90463232e45bb1dfc2391b52e9cea45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a87d42534fd683aaa5f1f3cf8708041d9e65d4015480123bcba6de84a506a374a101f9411263cfa81165778297f3d47c9a98498537c32aab79afe7d4aa5927ae
|
7
|
+
data.tar.gz: b6ebc256144c58320ea80da0dd136ff7c7a0b870b0477e8d8b2b06f355fe95c5390c776ab2c96d40d3ffcfa950d5e1ab4075386ff98019e10fe506ccf99db3f1
|
data/README.rdoc
CHANGED
@@ -1,3 +1,146 @@
|
|
1
1
|
= FirehoseIntegration
|
2
2
|
|
3
|
-
This
|
3
|
+
This gem will send data to Firehose every time your models are updated.
|
4
|
+
The general idea is to use a separate stream for each model you want to serialize.
|
5
|
+
This essentially simplifies data syncing with Amazon Redshift via Firehose.
|
6
|
+
|
7
|
+
= Basic Usage
|
8
|
+
|
9
|
+
Add the following to your Gemfile and bundle install
|
10
|
+
|
11
|
+
gem 'firehose-integration'
|
12
|
+
|
13
|
+
Create and configure config/initializers/aws.rb
|
14
|
+
|
15
|
+
require 'aws-sdk'
|
16
|
+
|
17
|
+
Aws.config.update({
|
18
|
+
region: 'us-east-1',
|
19
|
+
credentials: Aws::Credentials.new('akid', 'secret')
|
20
|
+
})
|
21
|
+
|
22
|
+
For each model you want to integrate create a kinesis serializer
|
23
|
+
|
24
|
+
# apps/kinesis_serializers/dummy_model_kinesis_serializer.rb
|
25
|
+
module DummyModelKinesisSerializer
|
26
|
+
extend ActiveSupport::Concern
|
27
|
+
|
28
|
+
included do
|
29
|
+
def to_kinesis
|
30
|
+
prepare_for_redshift [
|
31
|
+
id,
|
32
|
+
updated_at,
|
33
|
+
created_at
|
34
|
+
]
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.kinesis_stream_name
|
38
|
+
'dummy-stream'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
Add the following line to the model you want to integrate.
|
44
|
+
|
45
|
+
# app/models/dummy_model.rb
|
46
|
+
class DummyModel < ActiveRecord::Base
|
47
|
+
firehose_integratable
|
48
|
+
end
|
49
|
+
|
50
|
+
You will need to make sure you have the streams you're using created in Amazon Firehose.
|
51
|
+
For the DummyModel example here we'd setup a 'dummy-stream' in Firehose.
|
52
|
+
|
53
|
+
= Advanced Usage
|
54
|
+
|
55
|
+
There are cases where simple 1-to-1 serialization from your existing tables won't work with Redshift.
|
56
|
+
One such case is if you use Postgres array datatypes. Redshift doesn't support that datatype so you will
|
57
|
+
need to normalize the data again when putting it in Redshift. To handle these edge cases you create a
|
58
|
+
'kinesis_extra_serialization' method which will be executed when your model is updated if present.
|
59
|
+
|
60
|
+
# apps/kinesis_serializers/dummy_model_kinesis_serializer.rb
|
61
|
+
module DummyModelKinesisSerializer
|
62
|
+
extend ActiveSupport::Concern
|
63
|
+
|
64
|
+
included do
|
65
|
+
def to_kinesis
|
66
|
+
prepare_for_redshift [
|
67
|
+
id,
|
68
|
+
updated_at,
|
69
|
+
created_at
|
70
|
+
]
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.kinesis_stream_name
|
74
|
+
'dummy-stream'
|
75
|
+
end
|
76
|
+
|
77
|
+
def kinesis_extra_serialization
|
78
|
+
self.book_ids.each do |book_id|
|
79
|
+
data = [id, book_id, created_at, updated_at].join("|")
|
80
|
+
FirehoseIntegration::KinesisJob.perform_later("dummy-books", data)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
Another common case you may encounter is the need to denormalize data into other tables. Lets assume
|
87
|
+
you have a dummy_models table that you want but you also have a denormalized_dummy_models table you need
|
88
|
+
to keep in sync when your model is updated.
|
89
|
+
|
90
|
+
# apps/kinesis_serializers/dummy_model_kinesis_serializer.rb
|
91
|
+
module DummyModelKinesisSerializer
|
92
|
+
extend ActiveSupport::Concern
|
93
|
+
include DenormalizedDummyModelKinesisSerializer
|
94
|
+
|
95
|
+
included do
|
96
|
+
def to_kinesis
|
97
|
+
prepare_for_redshift [
|
98
|
+
id,
|
99
|
+
updated_at,
|
100
|
+
created_at
|
101
|
+
]
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.kinesis_stream_name
|
105
|
+
'dummy-stream'
|
106
|
+
end
|
107
|
+
|
108
|
+
def kinesis_extra_serialization
|
109
|
+
serialize_denormalized_dummy_model(id)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# apps/kinesis_serializers/denormalized_dummy_model_kinesis_serializer.rb
|
115
|
+
module DenormalizedDummyModelKinesisSerializer
|
116
|
+
extend ActiveSupport::Concern
|
117
|
+
|
118
|
+
included do
|
119
|
+
def denormalized_dummy_model_to_kinesis(dummy_id)
|
120
|
+
prepare_for_redshift [ id,
|
121
|
+
created_at,
|
122
|
+
updated_at
|
123
|
+
]
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.denormalized_kinesis_stream_name
|
127
|
+
"denormalized-dummy-model"
|
128
|
+
end
|
129
|
+
|
130
|
+
def serialize_denormalized_dummy_model(dummy_id)
|
131
|
+
FirehoseIntegration::KinesisJob.perform_later(self.class.denormalized_kinesis_stream_name, self.denormalized_dummy_model_to_kinesis(dummy_id))
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
If you'd like to turn the syncing off for some reason like when running on a staging server, just set the following ENV variable to true:
|
137
|
+
|
138
|
+
SKIP_KINESIS_EVENTS=true
|
139
|
+
|
140
|
+
= Versions
|
141
|
+
|
142
|
+
0.0.2 - Adding ability to turn off job queueing with ENV variable
|
143
|
+
0.0.1 - Initial release
|
144
|
+
|
145
|
+
= License
|
146
|
+
This project rocks and uses MIT-LICENSE.
|
@@ -6,7 +6,7 @@ module FirehoseIntegration
|
|
6
6
|
|
7
7
|
module ClassMethods
|
8
8
|
def firehose_integratable
|
9
|
-
after_commit :send_kinesis_event, unless: Proc.new { |instance| instance.try(:skip_kinesis_event) }
|
9
|
+
after_commit :send_kinesis_event, unless: Proc.new { |instance| instance.try(:skip_kinesis_event) || ENV['SKIP_KINESIS_EVENTS'] == 'true' }
|
10
10
|
|
11
11
|
begin
|
12
12
|
include "#{self.model_name.name}KinesisSerializer".constantize
|
data/test/dummy/db/test.sqlite3
CHANGED
Binary file
|