firehose_integration 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.rdoc +144 -1
- data/lib/firehose_integration/models/concerns/kinesis_event.rb +1 -1
- data/lib/firehose_integration/version.rb +1 -1
- data/test/dummy/app/models/dummy_model.rb +1 -1
- data/test/dummy/app/models/stupid_model.rb +1 -1
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/test.log +2389 -0
- data/test/models/concerns/kinesis_event_test.rb +8 -0
- data/test/test_helper.rb +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c5cb0ed37b314dbdc6f3cf1c4ace1f576a06885
|
4
|
+
data.tar.gz: 414b0fb0b90463232e45bb1dfc2391b52e9cea45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a87d42534fd683aaa5f1f3cf8708041d9e65d4015480123bcba6de84a506a374a101f9411263cfa81165778297f3d47c9a98498537c32aab79afe7d4aa5927ae
|
7
|
+
data.tar.gz: b6ebc256144c58320ea80da0dd136ff7c7a0b870b0477e8d8b2b06f355fe95c5390c776ab2c96d40d3ffcfa950d5e1ab4075386ff98019e10fe506ccf99db3f1
|
data/README.rdoc
CHANGED
@@ -1,3 +1,146 @@
|
|
1
1
|
= FirehoseIntegration
|
2
2
|
|
3
|
-
This
|
3
|
+
This gem will send data to Firehose every time your models are updated.
|
4
|
+
The general idea is to use a separate stream for each model you want to serialize.
|
5
|
+
This essentially simplifies data syncing with Amazon Redshift via Firehose.
|
6
|
+
|
7
|
+
= Basic Usage
|
8
|
+
|
9
|
+
Add the following to your Gemfile and bundle install
|
10
|
+
|
11
|
+
gem 'firehose-integration'
|
12
|
+
|
13
|
+
Create and configure config/initializers/aws.rb
|
14
|
+
|
15
|
+
require 'aws-sdk'
|
16
|
+
|
17
|
+
Aws.config.update({
|
18
|
+
region: 'us-east-1',
|
19
|
+
credentials: Aws::Credentials.new('akid', 'secret')
|
20
|
+
})
|
21
|
+
|
22
|
+
For each model you want to integrate create a kinesis serializer
|
23
|
+
|
24
|
+
# apps/kinesis_serializers/dummy_model_kinesis_serializer.rb
|
25
|
+
module DummyModelKinesisSerializer
|
26
|
+
extend ActiveSupport::Concern
|
27
|
+
|
28
|
+
included do
|
29
|
+
def to_kinesis
|
30
|
+
prepare_for_redshift [
|
31
|
+
id,
|
32
|
+
updated_at,
|
33
|
+
created_at
|
34
|
+
]
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.kinesis_stream_name
|
38
|
+
'dummy-stream'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
Add the following line to the model you want to integrate.
|
44
|
+
|
45
|
+
# app/models/dummy_model.rb
|
46
|
+
class DummyModel < ActiveRecord::Base
|
47
|
+
firehose_integratable
|
48
|
+
end
|
49
|
+
|
50
|
+
You will need to make sure you have the streams you're using created in Amazon Firehose.
|
51
|
+
For the DummyModel example here we'd setup a 'dummy-stream' in Firehose.
|
52
|
+
|
53
|
+
= Advanced Usage
|
54
|
+
|
55
|
+
There are cases where simple 1-to-1 serialization from your existing tables won't work with Redshift.
|
56
|
+
One such case is if you use Postgres array datatypes. Redshift doesn't support that datatype so you will
|
57
|
+
need to normalize the data again when putting it in Redshift. To handle these edge cases you create a
|
58
|
+
'kinesis_extra_serialization' method which will be executed when your model is updated if present.
|
59
|
+
|
60
|
+
# apps/kinesis_serializers/dummy_model_kinesis_serializer.rb
|
61
|
+
module DummyModelKinesisSerializer
|
62
|
+
extend ActiveSupport::Concern
|
63
|
+
|
64
|
+
included do
|
65
|
+
def to_kinesis
|
66
|
+
prepare_for_redshift [
|
67
|
+
id,
|
68
|
+
updated_at,
|
69
|
+
created_at
|
70
|
+
]
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.kinesis_stream_name
|
74
|
+
'dummy-stream'
|
75
|
+
end
|
76
|
+
|
77
|
+
def kinesis_extra_serialization
|
78
|
+
self.book_ids.each do |book_id|
|
79
|
+
data = [id, book_id, created_at, updated_at].join("|")
|
80
|
+
FirehoseIntegration::KinesisJob.perform_later("dummy-books", data)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
Another common case you may encounter is the need to denormalize data into other tables. Lets assume
|
87
|
+
you have a dummy_models table that you want but you also have a denormalized_dummy_models table you need
|
88
|
+
to keep in sync when your model is updated.
|
89
|
+
|
90
|
+
# apps/kinesis_serializers/dummy_model_kinesis_serializer.rb
|
91
|
+
module DummyModelKinesisSerializer
|
92
|
+
extend ActiveSupport::Concern
|
93
|
+
include DenormalizedDummyModelKinesisSerializer
|
94
|
+
|
95
|
+
included do
|
96
|
+
def to_kinesis
|
97
|
+
prepare_for_redshift [
|
98
|
+
id,
|
99
|
+
updated_at,
|
100
|
+
created_at
|
101
|
+
]
|
102
|
+
end
|
103
|
+
|
104
|
+
def self.kinesis_stream_name
|
105
|
+
'dummy-stream'
|
106
|
+
end
|
107
|
+
|
108
|
+
def kinesis_extra_serialization
|
109
|
+
serialize_denormalized_dummy_model(id)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# apps/kinesis_serializers/denormalized_dummy_model_kinesis_serializer.rb
|
115
|
+
module DenormalizedDummyModelKinesisSerializer
|
116
|
+
extend ActiveSupport::Concern
|
117
|
+
|
118
|
+
included do
|
119
|
+
def denormalized_dummy_model_to_kinesis(dummy_id)
|
120
|
+
prepare_for_redshift [ id,
|
121
|
+
created_at,
|
122
|
+
updated_at
|
123
|
+
]
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.denormalized_kinesis_stream_name
|
127
|
+
"denormalized-dummy-model"
|
128
|
+
end
|
129
|
+
|
130
|
+
def serialize_denormalized_dummy_model(dummy_id)
|
131
|
+
FirehoseIntegration::KinesisJob.perform_later(self.class.denormalized_kinesis_stream_name, self.denormalized_dummy_model_to_kinesis(dummy_id))
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
If you'd like to turn the syncing off for some reason like when running on a staging server, just set the following ENV variable to true:
|
137
|
+
|
138
|
+
SKIP_KINESIS_EVENTS=true
|
139
|
+
|
140
|
+
= Versions
|
141
|
+
|
142
|
+
0.0.2 - Adding ability to turn off job queueing with ENV variable
|
143
|
+
0.0.1 - Initial release
|
144
|
+
|
145
|
+
= License
|
146
|
+
This project rocks and uses MIT-LICENSE.
|
@@ -6,7 +6,7 @@ module FirehoseIntegration
|
|
6
6
|
|
7
7
|
module ClassMethods
|
8
8
|
def firehose_integratable
|
9
|
-
after_commit :send_kinesis_event, unless: Proc.new { |instance| instance.try(:skip_kinesis_event) }
|
9
|
+
after_commit :send_kinesis_event, unless: Proc.new { |instance| instance.try(:skip_kinesis_event) || ENV['SKIP_KINESIS_EVENTS'] == 'true' }
|
10
10
|
|
11
11
|
begin
|
12
12
|
include "#{self.model_name.name}KinesisSerializer".constantize
|
data/test/dummy/db/test.sqlite3
CHANGED
Binary file
|