fluent-plugin-kinesis-aggregation 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5337708f7ca323b6ed1e9c47809a6af9f489b885
4
- data.tar.gz: 0ed78adebd0c6d2136ccafd2cebc287ae88d86ed
3
+ metadata.gz: ce0b6aadd6a6326dfe20ac3db8c472eee51b3a51
4
+ data.tar.gz: b0a194b9450d3c16ea4ac8c9d8bae05bb68fdc8f
5
5
  SHA512:
6
- metadata.gz: 63145534371f0eb39b3230a77218536d65a0fdbc34ca60f50b72f6377a292c5cbe26ece61399e2eb4d3c429e8193d51d5de2ba850845927b92ffcdb5ab084ea4
7
- data.tar.gz: 26c464ef240db8b0801345ca572055995d327a5200b799081b9bb8a3eaaa91255a8adcd59d0f62d8252e055328ef37535a6182dc026dbb07042a793b0d829ee8
6
+ metadata.gz: b3ee690dc4ac1075c3e484d268c83c73f48f61b0d87f93b90c1884a6ae727f37cc2084710f287284bd59c66cd654a756c1a2f13f186c3c96aa9f4c4ad4b117f8
7
+ data.tar.gz: 242726a535b0387042718b4ae8b73def2e65cd4dc0d90aaa80d6a986e7a26c120362b38dc5f69c831357db92df366d9a9d78610134002dffd7b494f690da8f4f
@@ -1,5 +1,9 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## 0.2.0
4
+
5
+ - switch to google protobuf library (ruby native one uses too much memory)
6
+
3
7
  ## 0.1.1
4
8
 
5
9
  - fix up conflict with fluent-kinesis plugin
@@ -17,7 +17,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
17
17
 
18
18
  Gem::Specification.new do |spec|
19
19
  spec.name = "fluent-plugin-kinesis-aggregation"
20
- spec.version = '0.1.1'
20
+ spec.version = '0.2.0'
21
21
  spec.author = 'Someone'
22
22
  spec.summary = %q{Fluentd output plugin that sends KPL style aggregated events to Amazon Kinesis.}
23
23
  spec.homepage = "https://github.com/atlassian/fluent-plugin-kinesis-aggregation"
@@ -36,5 +36,5 @@ Gem::Specification.new do |spec|
36
36
  spec.add_dependency "fluentd", ">= 0.10.53", "< 0.13"
37
37
  spec.add_dependency "aws-sdk-core", ">= 2.0.12", "< 3.0"
38
38
  spec.add_dependency "msgpack", ">= 0.5.8"
39
- spec.add_dependency "protobuf", ">= 3.5.5"
39
+ spec.add_dependency "google-protobuf", ">= 3.0.0.alpha.4.0"
40
40
  end
@@ -0,0 +1,19 @@
1
+ syntax = "proto3";
2
+
3
+ message AggregatedRecord {
4
+ repeated string partition_key_table = 1;
5
+ repeated string explicit_hash_key_table = 2;
6
+ repeated Record records = 3;
7
+ }
8
+
9
+ message Tag {
10
+ string key = 1;
11
+ string value = 2;
12
+ }
13
+
14
+ message Record {
15
+ uint64 partition_key_index = 1;
16
+ uint64 explicit_hash_key_index = 2;
17
+ bytes data = 3;
18
+ repeated Tag tags = 4;
19
+ }
@@ -17,32 +17,29 @@ require 'logger'
17
17
  require 'securerandom'
18
18
  require 'digest'
19
19
 
20
- require 'protobuf'
21
- require 'protobuf/message'
20
+ require 'google/protobuf'
22
21
 
23
-
24
- # https://github.com/awslabs/amazon-kinesis-producer/blob/master/aggregation-format.md
25
- class AggregatedRecord < ::Protobuf::Message; end
26
- class Tag < ::Protobuf::Message; end
27
- class Record < ::Protobuf::Message; end
28
-
29
- class AggregatedRecord
30
- repeated :string, :partition_key_table, 1
31
- repeated :string, :explicit_hash_key_table, 2
32
- repeated ::Record, :records, 3
33
- end
34
-
35
- class Tag
36
- required :string, :key, 1
37
- optional :string, :value, 2
22
+ Google::Protobuf::DescriptorPool.generated_pool.build do
23
+ add_message "AggregatedRecord" do
24
+ repeated :partition_key_table, :string, 1
25
+ repeated :explicit_hash_key_table, :string, 2
26
+ repeated :records, :message, 3, "Record"
27
+ end
28
+ add_message "Tag" do
29
+ optional :key, :string, 1
30
+ optional :value, :string, 2
31
+ end
32
+ add_message "Record" do
33
+ optional :partition_key_index, :uint64, 1
34
+ optional :explicit_hash_key_index, :uint64, 2
35
+ optional :data, :bytes, 3
36
+ repeated :tags, :message, 4, "Tag"
37
+ end
38
38
  end
39
39
 
40
- class Record
41
- required :uint64, :partition_key_index, 1
42
- optional :uint64, :explicit_hash_key_index, 2
43
- required :bytes, :data, 3
44
- repeated ::Tag, :tags, 4
45
- end
40
+ AggregatedRecord = Google::Protobuf::DescriptorPool.generated_pool.lookup("AggregatedRecord").msgclass
41
+ Tag = Google::Protobuf::DescriptorPool.generated_pool.lookup("Tag").msgclass
42
+ Record = Google::Protobuf::DescriptorPool.generated_pool.lookup("Record").msgclass
46
43
 
47
44
 
48
45
  module FluentPluginKinesisAggregation
@@ -104,12 +101,12 @@ module FluentPluginKinesisAggregation
104
101
  end
105
102
 
106
103
  def format(tag, time, record)
107
- return AggregatedRecord.new(
104
+ return AggregatedRecord.encode(AggregatedRecord.new(
108
105
  records: [Record.new(
109
- partition_key_index: 0,
110
- data: Yajl.dump(record)
106
+ partition_key_index: 1,
107
+ data: Yajl.dump(record).b
111
108
  )]
112
- ).encode
109
+ ))
113
110
  end
114
111
 
115
112
  def write(chunk)
@@ -125,9 +122,17 @@ module FluentPluginKinesisAggregation
125
122
  # it's valid (in this case) to concatenate the AggregatedRecords
126
123
  # to form one AggregatedRecord, since we only have a repeated field
127
124
  # in records.
128
- message = AggregatedRecord.new(
129
- partition_key_table: [partition_key]
130
- ).encode + records
125
+ #
126
+ # ALSO, since we use google's protobuf stuff (much better
127
+ # memory usage due to C extension), we're stuck on proto3.
128
+ # Unfortunately, KPL uses proto2 form, and partition_key_index
129
+ # is a required field. If we set it to 0 in proto3, though,
130
+ # it's helpfully ignored in the serialisation (default!).
131
+ # Therefore we have to pass a partition_key_index of 1,
132
+ # and put two things in our partition_key_table.
133
+ message = AggregatedRecord.encode(AggregatedRecord.new(
134
+ partition_key_table: ['a', partition_key]
135
+ )) + records
131
136
 
132
137
  @client.put_record(
133
138
  stream_name: @stream_name,
@@ -163,22 +163,20 @@ class KinesisOutputTest < Test::Unit::TestCase
163
163
  d.run
164
164
  end
165
165
 
166
- def test_format
166
+ def test_emitting
167
167
  d = create_driver
168
168
 
169
- data1 = {"test_partition_key"=>"key1","a"=>1,"time"=>"2011-01-02T13:14:15Z","tag"=>"test"}
170
- data2 = {"test_partition_key"=>"key2","a"=>2,"time"=>"2011-01-02T13:14:15Z","tag"=>"test"}
169
+ data1 = {"a"=>1,"time"=>"2011-01-02T13:14:15Z","tag"=>"test"}
170
+ data2 = {"a"=>2,"time"=>"2011-01-02T13:14:15Z","tag"=>"test"}
171
171
 
172
172
  time = Time.parse("2011-01-02 13:14:15 UTC").to_i
173
173
  d.emit(data1, time)
174
174
  d.emit(data2, time)
175
- d.expect_format("\u001AR\b\u0000\u001AN{\"test_partition_key\":\"key1\",\"a\":1,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}")
176
- d.expect_format("\u001AR\b\u0000\u001AN{\"test_partition_key\":\"key2\",\"a\":2,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}")
177
175
 
178
176
  client = create_mock_client
179
177
  client.put_record(
180
178
  stream_name: 'test_stream',
181
- data: "\xF3\x89\x9A\xC2\n\x12test_partition_key\x1AR\b\x00\x1AN{\"test_partition_key\":\"key1\",\"a\":1,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\x1AR\b\x00\x1AN{\"test_partition_key\":\"key2\",\"a\":2,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\xB6j\x1E\xF7q\xC9}v\vU\xAD\xA3@<\x82\xA9".force_encoding("ASCII-8BIT"),
179
+ data: "\xF3\x89\x9A\xC2\n\x01a\n\x12test_partition_key\x1A6\b\x01\x1A2{\"a\":1,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\x1A6\b\x01\x1A2{\"a\":2,\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\xA2\x0E y\x8B\x02\xDF\xAE\xAB\x93\x1C;\xCB\xAD\x1Fx".b,
182
180
  partition_key: 'test_partition_key'
183
181
  ) { {} }
184
182
 
@@ -188,20 +186,15 @@ class KinesisOutputTest < Test::Unit::TestCase
188
186
  def test_multibyte
189
187
  d = create_driver
190
188
 
191
- data1 = {"test_partition_key"=>"key1","a"=>"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB","time"=>"2011-01-02T13:14:15Z","tag"=>"test"}
192
- data1["a"].force_encoding("ASCII-8BIT")
189
+ data1 = {"a"=>"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB","time"=>"2011-01-02T13:14:15Z".b,"tag"=>"test"}
193
190
 
194
191
  time = Time.parse("2011-01-02 13:14:15 UTC").to_i
195
192
  d.emit(data1, time)
196
193
 
197
- d.expect_format(
198
- "\x1Ae\b\x00\x1Aa{\"test_partition_key\":\"key1\",\"a\":\"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB\",\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}".force_encoding("ASCII-8BIT")
199
- )
200
-
201
194
  client = create_mock_client
202
195
  client.put_record(
203
196
  stream_name: 'test_stream',
204
- data: "\xF3\x89\x9A\xC2\n\x12test_partition_key\x1Ae\b\x00\x1Aa{\"test_partition_key\":\"key1\",\"a\":\"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB\",\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}\xC8\x13{\xFBL_\x8FE\x02\xEEC\xC9_~\xEF(".force_encoding("ASCII-8BIT"),
197
+ data: "\xF3\x89\x9A\xC2\n\x01a\n\x12test_partition_key\x1AI\b\x01\x1AE{\"a\":\"\xE3\x82\xA4\xE3\x83\xB3\xE3\x82\xB9\xE3\x83\x88\xE3\x83\xBC\xE3\x83\xAB\",\"time\":\"2011-01-02T13:14:15Z\",\"tag\":\"test\"}_$\x9C\xF9v+pV:g7c\xE3\xF2$\xBA".b,
205
198
  partition_key: 'test_partition_key'
206
199
  ) { {} }
207
200
 
@@ -212,7 +205,7 @@ class KinesisOutputTest < Test::Unit::TestCase
212
205
  d = create_driver
213
206
 
214
207
  d.emit(
215
- {"msg": "z" * 1024 * 1024},
208
+ {"msg" => "z" * 1024 * 1024},
216
209
  Time.parse("2011-01-02 13:14:15 UTC").to_i)
217
210
  client = dont_allow(Object.new)
218
211
  client.put_record
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kinesis-aggregation
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Someone
@@ -107,19 +107,19 @@ dependencies:
107
107
  - !ruby/object:Gem::Version
108
108
  version: 0.5.8
109
109
  - !ruby/object:Gem::Dependency
110
- name: protobuf
110
+ name: google-protobuf
111
111
  requirement: !ruby/object:Gem::Requirement
112
112
  requirements:
113
113
  - - ">="
114
114
  - !ruby/object:Gem::Version
115
- version: 3.5.5
115
+ version: 3.0.0.alpha.4.0
116
116
  type: :runtime
117
117
  prerelease: false
118
118
  version_requirements: !ruby/object:Gem::Requirement
119
119
  requirements:
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 3.5.5
122
+ version: 3.0.0.alpha.4.0
123
123
  description:
124
124
  email:
125
125
  executables: []
@@ -136,6 +136,7 @@ files:
136
136
  - README.md
137
137
  - Rakefile
138
138
  - fluent-plugin-kinesis-aggregation.gemspec
139
+ - kpl_aggregated_records.proto
139
140
  - lib/fluent/plugin/out_kinesis-aggregation.rb
140
141
  - test/helper.rb
141
142
  - test/plugin/test_out_kinesis-aggregation.rb