fluent-plugin-arrow 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c46290f5f4e9c75defd5494117f6deb504dad5fb70c51ceaccb5aabe9ff06934
4
+ data.tar.gz: 6bc16d3413997b32efe5233a3aea4e3f4a3020eaf70c10675d8d1eef5899f0a2
5
+ SHA512:
6
+ metadata.gz: 73e46c999a9a62710c1ead926565d18391cd377e61e6eb01bb76d6ca102cf8423ffd01c7934dfd6fb0c32282bf04867810854dd14b76ec79416597e6237aae76
7
+ data.tar.gz: 6c14dc63fa79ece11140fd1b2ee916bcf2917260b0ae9a5741024ea5923d18e3a66add447a7f8c3cf1d5d24fccb4c8cb651d40e609f353813f7673d39669cf84
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /gemfiles/.bundle
11
+ *.gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,202 @@
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # fluent-plugin-arrow
2
+
3
+ [Fluentd](https://fluentd.org/) formatter plugin to do something.
4
+
5
+ TODO: write description for you plugin.
6
+
7
+ ## Installation
8
+
9
+ ### RubyGems
10
+
11
+ ```
12
+ $ gem install fluent-plugin-arrow
13
+ ```
14
+
15
+ ### Bundler
16
+
17
+ Add following line to your Gemfile:
18
+
19
+ ```ruby
20
+ gem "fluent-plugin-arrow"
21
+ ```
22
+
23
+ And then execute:
24
+
25
+ ```
26
+ $ bundle
27
+ ```
28
+
29
+ ## Configuration
30
+
31
+ You can generate configuration template:
32
+
33
+ ```
34
+ $ fluent-plugin-config-format formatter arrow
35
+ ```
36
+
37
+ You can copy and paste generated documents here.
38
+
39
+ ## Copyright
40
+
41
+ * Copyright(c) 2018- joker1007
42
+ * License
43
+ * Apache License, Version 2.0
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require "bundler"
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs.push("lib", "test")
8
+ t.test_files = FileList["test/**/test_*.rb"]
9
+ t.verbose = true
10
+ t.warning = true
11
+ end
12
+
13
+ task default: [:test]
@@ -0,0 +1,29 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = "fluent-plugin-arrow"
6
+ spec.version = "0.0.1"
7
+ spec.authors = ["joker1007"]
8
+ spec.email = ["kakyoin.hierophant@gmail.com"]
9
+
10
+ spec.summary = %q{Apache Arrow formatter plugin for fluentd.}
11
+ spec.description = %q{Apache Arrow formatter plugin for fluentd.}
12
+ spec.homepage = "https://github.com/joker1007/fluent-plugin-arrow"
13
+ spec.license = "Apache-2.0"
14
+
15
+ test_files, files = `git ls-files -z`.split("\x0").partition do |f|
16
+ f.match(%r{^(test|spec|features)/})
17
+ end
18
+ spec.files = files
19
+ spec.executables = files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = test_files
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.14"
24
+ spec.add_development_dependency "rake", "~> 12.0"
25
+ spec.add_development_dependency "test-unit", "~> 3.0"
26
+ spec.add_runtime_dependency "fluentd", [">= 0.14.10", "< 2"]
27
+ spec.add_runtime_dependency "red-arrow", ">= 0.10"
28
+ spec.add_runtime_dependency "red-parquet"
29
+ end
@@ -0,0 +1,73 @@
1
+ #
2
+ # Copyright 2018- joker1007
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ require "arrow"
17
+ require 'fluent/plugin/buffer'
18
+ require 'fluent/plugin/buffer/arrow_memory_chunk'
19
+
20
+ module Fluent
21
+ module Plugin
22
+ class ArrowMemoryBuffer < Fluent::Plugin::Buffer
23
+ Plugin.register_buffer('arrow_memory', self)
24
+
25
+ config_param :schema, :array
26
+ config_param :arrow_format, :enum, list: [:arrow, :parquet], default: :arrow
27
+ config_param :row_group_chunk_size, :integer, default: 1024
28
+
29
+ attr_reader :arrow_schema
30
+
31
+ def configure(conf)
32
+ super
33
+
34
+ # [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
35
+ arrow_fields = @schema.map do |field|
36
+ create_arrow_field(field)
37
+ end
38
+
39
+ @arrow_schema = Arrow::Schema.new(arrow_fields)
40
+ end
41
+
42
+ def resume
43
+ return {}, []
44
+ end
45
+
46
+ def generate_chunk(metadata)
47
+ Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
48
+ end
49
+
50
+ private
51
+
52
+ def create_arrow_field(field)
53
+ Arrow::Field.new(field["name"], create_arrow_data_type(field))
54
+ end
55
+
56
+ def create_arrow_data_type(field)
57
+ case field["type"]
58
+ when "struct"
59
+ Arrow::StructDataType.new(field["fields"].map { |f| create_arrow_field(f) })
60
+ when "list"
61
+ Arrow::ListDataType.new(create_arrow_field(field["value_type"]))
62
+ when "timestamp"
63
+ Arrow::TimestampDataType.new(field["unit"].to_sym)
64
+ else
65
+ data_type_name = field["type"].to_s.capitalize.gsub(/\AUint/, "UInt")
66
+ data_type_class_name = "#{data_type_name}DataType"
67
+ data_type_class = Arrow.const_get(data_type_class_name)
68
+ data_type_class.new
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,104 @@
1
+ #
2
+ # Copyright 2018- joker1007
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ require 'arrow'
17
+ require 'parquet'
18
+ require 'fluent/plugin/buffer/chunk'
19
+ require 'fluent/plugin/buffer/memory_chunk'
20
+
21
+ module Fluent
22
+ module Plugin
23
+ class Buffer
24
+ class ArrowMemoryChunk < MemoryChunk
25
+ def initialize(metadata, schema, chunk_size: 1024, format: :arrow)
26
+ super(metadata, compress: :text)
27
+ @schema = schema
28
+ @chunk_size = chunk_size
29
+ @format = format
30
+ @array_builders = {}
31
+ @schema.fields.each do |f|
32
+ @array_builders[f.name] = field_to_array_builder(f)
33
+ end
34
+ @unpacker = Fluent::MessagePackFactory.engine_factory.unpacker
35
+ end
36
+
37
+ def read(**kwargs)
38
+ build_arrow_buffer_string
39
+ end
40
+
41
+ def open(**kwargs, &block)
42
+ StringIO.open(build_arrow_buffer_string, &block)
43
+ end
44
+
45
+ def write_to(io, **kwargs)
46
+ # re-implementation to optimize not to create StringIO
47
+ io.write build_arrow_buffer_string
48
+ end
49
+
50
+ private
51
+
52
+ def field_to_array_builder(f)
53
+ data_type_str = f.data_type.to_s
54
+ if data_type_str =~ /timestamp/
55
+ return Arrow::TimestampArrayBuilder.new(f.data_type)
56
+ end
57
+
58
+ data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
59
+ array_builder_class_name = "#{data_type_name}ArrayBuilder"
60
+ array_builder_class = Arrow.const_get(array_builder_class_name)
61
+ if array_builder_class.method(:new).arity > 0
62
+ array_builder_class.new(f.data_type)
63
+ else
64
+ array_builder_class.new
65
+ end
66
+ end
67
+
68
+ def build_arrow_buffer_string
69
+ count = 0
70
+ @unpacker.feed_each(@chunk) do |record|
71
+ count += 1
72
+ record.each do |k, v|
73
+ if v.nil?
74
+ @array_builders[k].append_null
75
+ else
76
+ @array_builders[k].append(v)
77
+ end
78
+ end
79
+ end
80
+ arrow_buf = Arrow::ResizableBuffer.new(@chunk_bytes * 1.2)
81
+
82
+ Arrow::BufferOutputStream.open(arrow_buf) do |output|
83
+ if @format == :parquet
84
+ Parquet::ArrowFileWriter.open(@schema, output) do |writer|
85
+ columns = @schema.fields.map do |f|
86
+ Arrow::Column.new(f, @array_builders[f.name].finish)
87
+ end
88
+ table = Arrow::Table.new(@schema, columns)
89
+ writer.write_table(table, @chunk_size)
90
+ end
91
+ else
92
+ Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
93
+ record_batch = Arrow::RecordBatch.new(@schema, count, @array_builders.values.map(&:finish))
94
+ writer.write_record_batch(record_batch)
95
+ end
96
+ end
97
+ end
98
+
99
+ arrow_buf.data.to_s
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,33 @@
1
+ #
2
+ # Copyright 2018- joker1007
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ require "fluent/plugin/formatter"
17
+
18
+ module Fluent
19
+ module Plugin
20
+ # This formatter is dummy. This is same with MsgpackFormatter
21
+ class ArrowFormatter < Fluent::Plugin::Formatter
22
+ Fluent::Plugin.register_formatter("arrow", self)
23
+
24
+ def formatter_type
25
+ :binary
26
+ end
27
+
28
+ def format(tag, time, record)
29
+ record.to_msgpack
30
+ end
31
+ end
32
+ end
33
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,8 @@
1
+ $LOAD_PATH.unshift(File.expand_path("../../", __FILE__))
2
+ require "test-unit"
3
+ require "fluent/test"
4
+ require "fluent/test/driver/formatter"
5
+ require "fluent/test/helpers"
6
+
7
+ Test::Unit::TestCase.include(Fluent::Test::Helpers)
8
+ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
@@ -0,0 +1,56 @@
1
+ require "helper"
2
+ require 'fluent/plugin/buf_arrow_memory'
3
+ require 'fluent/plugin/output'
4
+
5
+ module FluentPluginArrowMemoryBufferTest
6
+ class DummyOutputPlugin < Fluent::Plugin::Output
7
+ end
8
+ end
9
+
10
+ class ArrowMemoryBufferTest < Test::Unit::TestCase
11
+ setup do
12
+ Fluent::Test.setup
13
+ @d = FluentPluginArrowMemoryBufferTest::DummyOutputPlugin.new
14
+ @p = Fluent::Plugin::ArrowMemoryBuffer.new
15
+ @p.owner = @d
16
+ end
17
+
18
+ test 'this is non persistent plugin' do
19
+ assert !@p.persistent?
20
+ end
21
+
22
+ test 'configure' do
23
+ conf = %[
24
+ schema [
25
+ {"name": "foo1", "type": "uint64"},
26
+ {"name": "foo2", "type": "string"},
27
+ {"name": "foo3", "type": "timestamp", "unit": "milli"},
28
+ {"name": "foo4", "type": "list", "value_type": {"name": "value", "type": "uint64"}},
29
+ {"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "value_type": {"name": "value", "type": "string"}}]}
30
+ ]
31
+ ]
32
+ buffer_conf = Fluent::Config.parse(conf, "(test)", "(test_dir)", syntax: :v1)
33
+ @p.configure(buffer_conf)
34
+ assert @p.arrow_schema.is_a?(Arrow::Schema)
35
+ assert @p.arrow_schema.n_fields == 5
36
+ assert @p.arrow_schema.fields[0].data_type.is_a?(Arrow::UInt64DataType)
37
+ assert @p.arrow_schema.fields[1].data_type.is_a?(Arrow::StringDataType)
38
+ assert @p.arrow_schema.fields[2].data_type.is_a?(Arrow::TimestampDataType)
39
+ assert @p.arrow_schema.fields[3].data_type.is_a?(Arrow::ListDataType)
40
+ assert @p.arrow_schema.fields[3].data_type.value_field.data_type.is_a?(Arrow::UInt64DataType)
41
+ assert @p.arrow_schema.fields[4].data_type.is_a?(Arrow::StructDataType)
42
+ end
43
+
44
+ test 'generate_chunk' do
45
+ conf = %[
46
+ schema [
47
+ {"name": "foo1", "type": "uint64"},
48
+ {"name": "foo2", "type": "string"}
49
+ ]
50
+ ]
51
+ buffer_conf = Fluent::Config.parse(conf, "(test)", "(test_dir)", syntax: :v1)
52
+ @p.configure(buffer_conf)
53
+ chunk = @p.generate_chunk(Object.new)
54
+ assert chunk.is_a?(Fluent::Plugin::Buffer::ArrowMemoryChunk)
55
+ end
56
+ end
@@ -0,0 +1,61 @@
1
+ require "helper"
2
+ require "fluent/msgpack_factory"
3
+ require "fluent/plugin/buffer/arrow_memory_chunk"
4
+
5
+ class ArrowMemoryChunkTest < Test::Unit::TestCase
6
+ setup do
7
+ @fields = [
8
+ Arrow::Field.new("key1", :uint64),
9
+ Arrow::Field.new("key2", :double),
10
+ Arrow::Field.new("key3", Arrow::TimestampDataType.new(:second)),
11
+ ]
12
+ @schema = Arrow::Schema.new(@fields)
13
+ @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema)
14
+ end
15
+
16
+ test "can #read" do
17
+ d1 = {"key1" => 123, "key2" => 10.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
18
+ d2 = {"key1" => 124, "key2" => 11.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
19
+ data = [d1.to_msgpack, d2.to_msgpack]
20
+ @c.append(data)
21
+ Arrow::BufferInputStream.open(Arrow::Buffer.new(@c.read)) do |input|
22
+ reader = Arrow::RecordBatchFileReader.new(input)
23
+
24
+ reader.each do |record_batch|
25
+ assert { record_batch.n_rows == 2 }
26
+
27
+ assert { record_batch.find_column(@fields[0].name).class == Arrow::UInt64Array }
28
+ assert { record_batch.find_column(@fields[0].name).values == [123, 124] }
29
+ end
30
+ end
31
+ end
32
+
33
+ test "can #write_to" do
34
+ d1 = {"key1" => 123, "key2" => 10.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
35
+ d2 = {"key1" => 124, "key2" => 11.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
36
+ data = [d1.to_msgpack, d2.to_msgpack]
37
+ @c.append(data)
38
+ Tempfile.create do |tf|
39
+ @c.write_to(tf)
40
+ tf.flush
41
+
42
+ Arrow::MemoryMappedInputStream.open(tf.path) do |input|
43
+ reader = Arrow::RecordBatchFileReader.new(input)
44
+ reader.each_with_index do |record_batch, i|
45
+ reader.each do |record_batch|
46
+ assert { record_batch.n_rows == 2 }
47
+
48
+ assert { record_batch.find_column(@fields[0].name).class == Arrow::UInt64Array }
49
+ assert { record_batch.find_column(@fields[0].name).values == [123, 124] }
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def create_driver(conf)
59
+ Fluent::Test::Driver::Formatter.new(Fluent::Plugin::ArrowFormatter).configure(conf)
60
+ end
61
+ end
@@ -0,0 +1,14 @@
1
+ require "helper"
2
+ require "fluent/plugin/formatter_arrow.rb"
3
+
4
+ class ArrowFormatterTest < Test::Unit::TestCase
5
+ setup do
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ private
10
+
11
+ def create_driver(conf)
12
+ Fluent::Test::Driver::Formatter.new(Fluent::Plugin::ArrowFormatter).configure(conf)
13
+ end
14
+ end
metadata ADDED
@@ -0,0 +1,151 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-arrow
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - joker1007
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-11-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.14'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.14'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '12.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '12.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: test-unit
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: fluentd
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 0.14.10
62
+ - - "<"
63
+ - !ruby/object:Gem::Version
64
+ version: '2'
65
+ type: :runtime
66
+ prerelease: false
67
+ version_requirements: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: 0.14.10
72
+ - - "<"
73
+ - !ruby/object:Gem::Version
74
+ version: '2'
75
+ - !ruby/object:Gem::Dependency
76
+ name: red-arrow
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0.10'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0.10'
89
+ - !ruby/object:Gem::Dependency
90
+ name: red-parquet
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :runtime
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ description: Apache Arrow formatter plugin for fluentd.
104
+ email:
105
+ - kakyoin.hierophant@gmail.com
106
+ executables: []
107
+ extensions: []
108
+ extra_rdoc_files: []
109
+ files:
110
+ - ".gitignore"
111
+ - Gemfile
112
+ - LICENSE
113
+ - README.md
114
+ - Rakefile
115
+ - fluent-plugin-arrow.gemspec
116
+ - lib/fluent/plugin/buf_arrow_memory.rb
117
+ - lib/fluent/plugin/buffer/arrow_memory_chunk.rb
118
+ - lib/fluent/plugin/formatter_arrow.rb
119
+ - test/helper.rb
120
+ - test/plugin/test_buf_arrow_memory.rb
121
+ - test/plugin/test_buffer_arrow_memory_chunk.rb
122
+ - test/plugin/test_formatter_arrow.rb
123
+ homepage: https://github.com/joker1007/fluent-plugin-arrow
124
+ licenses:
125
+ - Apache-2.0
126
+ metadata: {}
127
+ post_install_message:
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 2.7.8
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: Apache Arrow formatter plugin for fluentd.
147
+ test_files:
148
+ - test/helper.rb
149
+ - test/plugin/test_buf_arrow_memory.rb
150
+ - test/plugin/test_buffer_arrow_memory_chunk.rb
151
+ - test/plugin/test_formatter_arrow.rb