fluent-plugin-arrow 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c46290f5f4e9c75defd5494117f6deb504dad5fb70c51ceaccb5aabe9ff06934
4
- data.tar.gz: 6bc16d3413997b32efe5233a3aea4e3f4a3020eaf70c10675d8d1eef5899f0a2
3
+ metadata.gz: a1ed6b1b40e55501097a3a0455b2f2be93b3cf55d7e8cb831d4dc742f4007b97
4
+ data.tar.gz: fdcaed4462a919076c29f5947c3c81744b887e0099d2e5ad7a317e6845391da1
5
5
  SHA512:
6
- metadata.gz: 73e46c999a9a62710c1ead926565d18391cd377e61e6eb01bb76d6ca102cf8423ffd01c7934dfd6fb0c32282bf04867810854dd14b76ec79416597e6237aae76
7
- data.tar.gz: 6c14dc63fa79ece11140fd1b2ee916bcf2917260b0ae9a5741024ea5923d18e3a66add447a7f8c3cf1d5d24fccb4c8cb651d40e609f353813f7673d39669cf84
6
+ metadata.gz: 5f219215b1f8b8a57b2f9d79a6d1fdec39e10652c6d674a8d1390f7f3ce75d8752c15e89b268e36fb781bed44cfed2e128fd840b0978eb508c75dec1fc2b82a1
7
+ data.tar.gz: 2d473fbead6b6c12e361b045e7de325652b36c22d94685be289bc8a076c03f4dcb8b2c5270d922f76c0f462475d6846c96176abf4085abf24affaad0bc4b04fd
data/README.md CHANGED
@@ -1,8 +1,13 @@
1
1
  # fluent-plugin-arrow
2
2
 
3
- [Fluentd](https://fluentd.org/) formatter plugin to do something.
3
+ [Fluentd](https://fluentd.org/) buffer plugin to output Apache Arrow and Parquet format.
4
4
 
5
- TODO: write description for you plugin.
5
+ ## Prerequisite
6
+
7
+ - [Apache Arrow c++](https://github.com/apache/arrow/tree/master/cpp) (with -DARROW_PARQUET=ON)
8
+ - [Apache Arrow c_glib](https://github.com/apache/arrow/tree/master/c_glib)
9
+ - [red-arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
10
+ - [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet)
6
11
 
7
12
  ## Installation
8
13
 
@@ -31,7 +36,31 @@ $ bundle
31
36
  You can generate configuration template:
32
37
 
33
38
  ```
34
- $ fluent-plugin-config-format formatter arrow
39
+ <match arrow>
40
+ @type file
41
+
42
+ path arrow_test
43
+
44
+ <buffer>
45
+ @type arrow_memory
46
+ arrow_format arrow # or parquet
47
+
48
+ schema [
49
+ {"name": "key1", "type": "string"},
50
+ {"name": "key2", "type": "uint64"},
51
+ {"name": "key3", "type": "timestamp", "unit": "milli"},
52
+ {"name": "key4", "type": "list", "value_type": {"name": "value", "type": "uint64"}},
53
+ {"name": "key5", "type": "struct", "fields": [
54
+ {"name": "bar1", "type": "uint64"},
55
+ {"name": "bar2", "type": "list", "value_type": {"name": "value", "type": "string"}}
56
+ ]}
57
+ ]
58
+ </buffer>
59
+
60
+ <format>
61
+ @type arrow
62
+ </format>
63
+ </match>
35
64
  ```
36
65
 
37
66
  You can copy and paste generated documents here.
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "fluent-plugin-arrow"
6
- spec.version = "0.0.1"
6
+ spec.version = "0.0.2"
7
7
  spec.authors = ["joker1007"]
8
8
  spec.email = ["kakyoin.hierophant@gmail.com"]
9
9
 
@@ -0,0 +1,259 @@
1
+ require "arrow"
2
+
3
+ module Fluent
4
+ module Plugin
5
+ module Arrow
6
+ class FieldWrapper
7
+ class << self
8
+ def build(field)
9
+ case field["type"]
10
+ when "string"
11
+ StringFieldWrapper.new(field)
12
+ when "int", "int8", "int16", "int32", "int64", "uint", "uint8", "uint16", "uint32", "uint64"
13
+ IntegerFieldWrapper.new(field)
14
+ when "float", "double"
15
+ FloatFieldWrapper.new(field)
16
+ when "boolean"
17
+ BooleanFieldWrapper.new(field)
18
+ when "date32"
19
+ Date32FieldWrapper.new(field)
20
+ when "date64"
21
+ Date64FieldWrapper.new(field)
22
+ when "timestamp"
23
+ TimestampFieldWrapper.new(field)
24
+ when "list"
25
+ ListFieldWrapper.new(field)
26
+ when "struct"
27
+ StructFieldWrapper.new(field)
28
+ else
29
+ raise "Unsupported data type"
30
+ end
31
+ end
32
+ end
33
+
34
+ attr_reader :field, :name, :type, :children, :arrow_field, :array_builder
35
+
36
+ def initialize(field)
37
+ @field = field
38
+ @name = field["name"]
39
+ @type = field["type"]
40
+ @children = []
41
+
42
+ field["value_type"]&.tap do |f|
43
+ @children << self.class.build(f)
44
+ end
45
+
46
+ field["fields"]&.each do |f|
47
+ @children << self.class.build(f)
48
+ end
49
+
50
+ create_arrow_field
51
+ create_array_builder
52
+ end
53
+
54
+ def append(value)
55
+ if value.nil?
56
+ @array_builder.append_null
57
+ else
58
+ @array_builder.append(cast_value(value))
59
+ end
60
+ end
61
+
62
+ def finish
63
+ @array_builder.finish
64
+ end
65
+
66
+ def create_arrow_field
67
+ @arrow_field = ::Arrow::Field.new(name, create_arrow_data_type)
68
+ end
69
+
70
+ def create_arrow_data_type
71
+ data_type_name = type.to_s.capitalize.gsub(/\AUint/, "UInt")
72
+ data_type_class_name = "#{data_type_name}DataType"
73
+ data_type_class = ::Arrow.const_get(data_type_class_name)
74
+ data_type_class.new
75
+ end
76
+
77
+ def create_array_builder(from_parent = nil)
78
+ if from_parent
79
+ @array_builder = from_parent
80
+ else
81
+ data_type_str = arrow_field.data_type.to_s
82
+ data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
83
+ array_builder_class_name = "#{data_type_name}ArrayBuilder"
84
+ array_builder_class = ::Arrow.const_get(array_builder_class_name)
85
+ @array_builder = array_builder_class.new
86
+ end
87
+ end
88
+
89
+ def cast_value(value)
90
+ raise NotImplementedError
91
+ end
92
+ end
93
+
94
+ class StringFieldWrapper < FieldWrapper
95
+ def cast_value(value)
96
+ value.to_s
97
+ end
98
+ end
99
+
100
+ class IntegerFieldWrapper < FieldWrapper
101
+ def cast_value(value)
102
+ value.to_i
103
+ end
104
+ end
105
+
106
+ class FloatFieldWrapper < FieldWrapper
107
+ def cast_value(value)
108
+ value.to_f
109
+ end
110
+ end
111
+
112
+ class BooleanFieldWrapper < FieldWrapper
113
+ def cast_value(value)
114
+ !!value
115
+ end
116
+ end
117
+
118
+ require "date"
119
+ class Date32FieldWrapper < FieldWrapper
120
+ UNIX_EPOCH = Date.new(1970, 1, 1)
121
+ def cast_value(value)
122
+ date =
123
+ if value.respond_to?(:to_date)
124
+ value.to_date
125
+ else
126
+ Date.parse(value)
127
+ end
128
+
129
+ (date - UNIX_EPOCH).to_i
130
+ end
131
+
132
+ def create_array_builder(from_parent = nil)
133
+ if from_parent
134
+ @array_builder = from_parent
135
+ else
136
+ @array_builder = ::Arrow::Date32ArrayBuilder.new
137
+ end
138
+ end
139
+ end
140
+
141
+ class Date64FieldWrapper < FieldWrapper
142
+ UNIX_EPOCH = Date.new(1970, 1, 1)
143
+ def cast_value(value)
144
+ time =
145
+ if value.respond_to?(:to_time)
146
+ value.to_time
147
+ else
148
+ Time.parse(value)
149
+ end
150
+
151
+ time.to_i * 1_000 + time.usec / 1_000
152
+ end
153
+
154
+ def create_array_builder(from_parent = nil)
155
+ if from_parent
156
+ @array_builder = from_parent
157
+ else
158
+ @array_builder = ::Arrow::Date64ArrayBuilder.new
159
+ end
160
+ end
161
+ end
162
+
163
+ require "time"
164
+ class TimestampFieldWrapper < FieldWrapper
165
+ def cast_value(value)
166
+ value =
167
+ if value.is_a?(Fluent::EventTime)
168
+ Time.at(value, value.usec)
169
+ elsif value.respond_to?(:to_time)
170
+ value.to_time
171
+ elsif value.is_a?(String)
172
+ Time.parse(value)
173
+ else
174
+ value
175
+ end
176
+
177
+ return value if value.is_a?(Numeric)
178
+
179
+ case field["unit"]
180
+ when "second"
181
+ value.to_i
182
+ when "milli"
183
+ value.to_i * 1_000 + value.usec / 1_000
184
+ when "micro"
185
+ value.to_i * 1_000_000 + value.usec
186
+ else
187
+ value.to_i * 1_000_000_000 + value.nsec
188
+ end
189
+ end
190
+
191
+ def create_arrow_data_type
192
+ ::Arrow::TimestampDataType.new(field["unit"].to_sym)
193
+ end
194
+
195
+ def create_array_builder(from_parent = nil)
196
+ if from_parent
197
+ @array_builder = from_parent
198
+ else
199
+ @array_builder = ::Arrow::TimestampArrayBuilder.new(arrow_field.data_type)
200
+ end
201
+ end
202
+ end
203
+
204
+ class ListFieldWrapper < FieldWrapper
205
+ def append(value)
206
+ if value.nil?
207
+ @array_builder.append_null
208
+ else
209
+ @array_builder.append
210
+ value.each do |v|
211
+ @children[0].append(v)
212
+ end
213
+ end
214
+ end
215
+
216
+ def create_arrow_data_type
217
+ ::Arrow::ListDataType.new(children[0].arrow_field)
218
+ end
219
+
220
+ def create_array_builder(from_parent = nil)
221
+ if from_parent
222
+ @array_builder = from_parent
223
+ else
224
+ @array_builder = ::Arrow::ListArrayBuilder.new(arrow_field.data_type)
225
+ end
226
+
227
+ @children.each { |c| c.create_array_builder(@array_builder.value_builder) }
228
+ end
229
+ end
230
+
231
+ class StructFieldWrapper < FieldWrapper
232
+ def append(value)
233
+ if value.nil?
234
+ @array_builder.append_null
235
+ else
236
+ @array_builder.append
237
+ value.each do |k, v|
238
+ @children.find { |c| c.name == k }.append(v)
239
+ end
240
+ end
241
+ end
242
+
243
+ def create_arrow_data_type
244
+ ::Arrow::StructDataType.new(children.map(&:arrow_field))
245
+ end
246
+
247
+ def create_array_builder(from_parent = nil)
248
+ if from_parent
249
+ @array_builder = from_parent
250
+ else
251
+ @array_builder = ::Arrow::StructArrayBuilder.new(arrow_field.data_type)
252
+ end
253
+
254
+ @children.each_with_index { |c, i| c.create_array_builder(@array_builder.get_field_builder(i)) }
255
+ end
256
+ end
257
+ end
258
+ end
259
+ end
@@ -16,6 +16,7 @@
16
16
  require "arrow"
17
17
  require 'fluent/plugin/buffer'
18
18
  require 'fluent/plugin/buffer/arrow_memory_chunk'
19
+ require 'fluent/plugin/arrow/field_wrapper'
19
20
 
20
21
  module Fluent
21
22
  module Plugin
@@ -32,11 +33,11 @@ module Fluent
32
33
  super
33
34
 
34
35
  # [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
35
- arrow_fields = @schema.map do |field|
36
- create_arrow_field(field)
36
+ @field_wrappers = @schema.each_with_object({}) do |field, h|
37
+ h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
37
38
  end
38
39
 
39
- @arrow_schema = Arrow::Schema.new(arrow_fields)
40
+ @arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
40
41
  end
41
42
 
42
43
  def resume
@@ -44,29 +45,7 @@ module Fluent
44
45
  end
45
46
 
46
47
  def generate_chunk(metadata)
47
- Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
48
- end
49
-
50
- private
51
-
52
- def create_arrow_field(field)
53
- Arrow::Field.new(field["name"], create_arrow_data_type(field))
54
- end
55
-
56
- def create_arrow_data_type(field)
57
- case field["type"]
58
- when "struct"
59
- Arrow::StructDataType.new(field["fields"].map { |f| create_arrow_field(f) })
60
- when "list"
61
- Arrow::ListDataType.new(create_arrow_field(field["value_type"]))
62
- when "timestamp"
63
- Arrow::TimestampDataType.new(field["unit"].to_sym)
64
- else
65
- data_type_name = field["type"].to_s.capitalize.gsub(/\AUint/, "UInt")
66
- data_type_class_name = "#{data_type_name}DataType"
67
- data_type_class = Arrow.const_get(data_type_class_name)
68
- data_type_class.new
69
- end
48
+ Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, @field_wrappers, chunk_size: @row_group_chunk_size, format: @arrow_format)
70
49
  end
71
50
  end
72
51
  end
@@ -15,22 +15,21 @@
15
15
 
16
16
  require 'arrow'
17
17
  require 'parquet'
18
+ require 'fluent/msgpack_factory'
18
19
  require 'fluent/plugin/buffer/chunk'
19
20
  require 'fluent/plugin/buffer/memory_chunk'
21
+ require 'fluent/plugin/arrow/field_wrapper'
20
22
 
21
23
  module Fluent
22
24
  module Plugin
23
25
  class Buffer
24
26
  class ArrowMemoryChunk < MemoryChunk
25
- def initialize(metadata, schema, chunk_size: 1024, format: :arrow)
27
+ def initialize(metadata, schema, field_wrappers, chunk_size: 1024, format: :arrow)
26
28
  super(metadata, compress: :text)
27
29
  @schema = schema
30
+ @field_wrappers = field_wrappers
28
31
  @chunk_size = chunk_size
29
32
  @format = format
30
- @array_builders = {}
31
- @schema.fields.each do |f|
32
- @array_builders[f.name] = field_to_array_builder(f)
33
- end
34
33
  @unpacker = Fluent::MessagePackFactory.engine_factory.unpacker
35
34
  end
36
35
 
@@ -49,48 +48,28 @@ module Fluent
49
48
 
50
49
  private
51
50
 
52
- def field_to_array_builder(f)
53
- data_type_str = f.data_type.to_s
54
- if data_type_str =~ /timestamp/
55
- return Arrow::TimestampArrayBuilder.new(f.data_type)
56
- end
57
-
58
- data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
59
- array_builder_class_name = "#{data_type_name}ArrayBuilder"
60
- array_builder_class = Arrow.const_get(array_builder_class_name)
61
- if array_builder_class.method(:new).arity > 0
62
- array_builder_class.new(f.data_type)
63
- else
64
- array_builder_class.new
65
- end
66
- end
67
-
68
51
  def build_arrow_buffer_string
69
52
  count = 0
70
53
  @unpacker.feed_each(@chunk) do |record|
71
54
  count += 1
72
55
  record.each do |k, v|
73
- if v.nil?
74
- @array_builders[k].append_null
75
- else
76
- @array_builders[k].append(v)
77
- end
56
+ @field_wrappers[k].append(v)
78
57
  end
79
58
  end
80
- arrow_buf = Arrow::ResizableBuffer.new(@chunk_bytes * 1.2)
59
+ arrow_buf = ::Arrow::ResizableBuffer.new(@chunk_bytes * 1.2)
81
60
 
82
- Arrow::BufferOutputStream.open(arrow_buf) do |output|
61
+ ::Arrow::BufferOutputStream.open(arrow_buf) do |output|
83
62
  if @format == :parquet
84
63
  Parquet::ArrowFileWriter.open(@schema, output) do |writer|
85
64
  columns = @schema.fields.map do |f|
86
- Arrow::Column.new(f, @array_builders[f.name].finish)
65
+ ::Arrow::Column.new(f, @field_wrappers[f.name].finish)
87
66
  end
88
- table = Arrow::Table.new(@schema, columns)
67
+ table = ::Arrow::Table.new(@schema, columns)
89
68
  writer.write_table(table, @chunk_size)
90
69
  end
91
70
  else
92
- Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
93
- record_batch = Arrow::RecordBatch.new(@schema, count, @array_builders.values.map(&:finish))
71
+ ::Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
72
+ record_batch = ::Arrow::RecordBatch.new(@schema, count, @field_wrappers.values.map(&:finish))
94
73
  writer.write_record_batch(record_batch)
95
74
  end
96
75
  end
@@ -5,12 +5,17 @@ require "fluent/plugin/buffer/arrow_memory_chunk"
5
5
  class ArrowMemoryChunkTest < Test::Unit::TestCase
6
6
  setup do
7
7
  @fields = [
8
- Arrow::Field.new("key1", :uint64),
9
- Arrow::Field.new("key2", :double),
10
- Arrow::Field.new("key3", Arrow::TimestampDataType.new(:second)),
8
+ ::Arrow::Field.new("key1", :uint64),
9
+ ::Arrow::Field.new("key2", :double),
10
+ ::Arrow::Field.new("key3", ::Arrow::TimestampDataType.new(:second)),
11
11
  ]
12
+ field_wrappers = {
13
+ "key1" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "uint64"}),
14
+ "key2" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "double"}),
15
+ "key3" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "second"}),
16
+ }
12
17
  @schema = Arrow::Schema.new(@fields)
13
- @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema)
18
+ @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema, field_wrappers)
14
19
  end
15
20
 
16
21
  test "can #read" do
@@ -18,44 +23,41 @@ class ArrowMemoryChunkTest < Test::Unit::TestCase
18
23
  d2 = {"key1" => 124, "key2" => 11.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
19
24
  data = [d1.to_msgpack, d2.to_msgpack]
20
25
  @c.append(data)
21
- Arrow::BufferInputStream.open(Arrow::Buffer.new(@c.read)) do |input|
22
- reader = Arrow::RecordBatchFileReader.new(input)
26
+ ::Arrow::BufferInputStream.open(::Arrow::Buffer.new(@c.read)) do |input|
27
+ reader = ::Arrow::RecordBatchFileReader.new(input)
23
28
 
24
29
  reader.each do |record_batch|
25
30
  assert { record_batch.n_rows == 2 }
26
31
 
27
- assert { record_batch.find_column(@fields[0].name).class == Arrow::UInt64Array }
32
+ assert { record_batch.find_column(@fields[0].name).class == ::Arrow::UInt64Array }
28
33
  assert { record_batch.find_column(@fields[0].name).values == [123, 124] }
29
34
  end
30
35
  end
31
36
  end
32
37
 
33
38
  test "can #write_to" do
34
- d1 = {"key1" => 123, "key2" => 10.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
35
- d2 = {"key1" => 124, "key2" => 11.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
39
+ time = Time.now
40
+ d1 = {"key1" => 123, "key2" => 10.1234, "key3" => Fluent::EventTime.from_time(time)}
41
+ d2 = {"key1" => 124, "key2" => 11.1234, "key3" => Fluent::EventTime.from_time(time)}
36
42
  data = [d1.to_msgpack, d2.to_msgpack]
37
43
  @c.append(data)
38
44
  Tempfile.create do |tf|
39
45
  @c.write_to(tf)
40
46
  tf.flush
41
47
 
42
- Arrow::MemoryMappedInputStream.open(tf.path) do |input|
43
- reader = Arrow::RecordBatchFileReader.new(input)
48
+ ::Arrow::MemoryMappedInputStream.open(tf.path) do |input|
49
+ reader = ::Arrow::RecordBatchFileReader.new(input)
44
50
  reader.each_with_index do |record_batch, i|
45
51
  reader.each do |record_batch|
46
52
  assert { record_batch.n_rows == 2 }
47
53
 
48
- assert { record_batch.find_column(@fields[0].name).class == Arrow::UInt64Array }
54
+ assert { record_batch.find_column(@fields[0].name).class == ::Arrow::UInt64Array }
49
55
  assert { record_batch.find_column(@fields[0].name).values == [123, 124] }
56
+ assert { record_batch.find_column(@fields[1].name).values == [10.1234, 11.1234] }
57
+ assert { record_batch.find_column(@fields[2].name)[0].to_i == time.to_i }
50
58
  end
51
59
  end
52
60
  end
53
61
  end
54
62
  end
55
-
56
- private
57
-
58
- def create_driver(conf)
59
- Fluent::Test::Driver::Formatter.new(Fluent::Plugin::ArrowFormatter).configure(conf)
60
- end
61
63
  end
@@ -0,0 +1,145 @@
1
+ require "helper"
2
+ require "fluent/plugin/arrow/field_wrapper"
3
+
4
+ class ArrowFieldWrapperTest < Test::Unit::TestCase
5
+ test ".build (string)" do
6
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "string"})
7
+ assert_equal "key1", field_wrapper.name
8
+ assert_equal "string", field_wrapper.type
9
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
10
+ end
11
+
12
+ test ".build (timestamp)" do
13
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
14
+ assert_equal "key1", field_wrapper.name
15
+ assert_equal "timestamp", field_wrapper.type
16
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
17
+ end
18
+
19
+ test ".build (list)" do
20
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "list", "value_type" => {"name" => "value", "type" => "string"}})
21
+ assert_equal "key1", field_wrapper.name
22
+ assert_equal "list", field_wrapper.type
23
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
24
+ assert_kind_of Arrow::ListDataType, field_wrapper.arrow_field.data_type
25
+ assert_kind_of Arrow::ListArrayBuilder, field_wrapper.array_builder
26
+
27
+ assert_equal "value", field_wrapper.children[0].name
28
+ assert_equal "string", field_wrapper.children[0].type
29
+ assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
30
+ assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
31
+ assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
32
+ end
33
+
34
+ test ".build (struct)" do
35
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
36
+ {"name" => "foo1", "type" => "string"},
37
+ {"name" => "foo2", "type" => "uint64"},
38
+ {"name" => "foo3", "type" => "timestamp", "unit" => "milli"},
39
+ ]})
40
+ assert_equal "key1", field_wrapper.name
41
+ assert_equal "struct", field_wrapper.type
42
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
43
+ assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
44
+ assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
45
+
46
+ assert_equal "foo1", field_wrapper.children[0].name
47
+ assert_equal "string", field_wrapper.children[0].type
48
+ assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
49
+ assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
50
+ assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
51
+
52
+ assert_equal "foo2", field_wrapper.children[1].name
53
+ assert_equal "uint64", field_wrapper.children[1].type
54
+ assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
55
+ assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].arrow_field.data_type
56
+ assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].array_builder
57
+
58
+ assert_equal "foo3", field_wrapper.children[2].name
59
+ assert_equal "timestamp", field_wrapper.children[2].type
60
+ assert_kind_of Arrow::Field, field_wrapper.children[2].arrow_field
61
+ assert_kind_of Arrow::TimestampDataType, field_wrapper.children[2].arrow_field.data_type
62
+ assert_kind_of Arrow::TimestampArrayBuilder, field_wrapper.children[2].array_builder
63
+ end
64
+
65
+ test ".build (nested)" do
66
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
67
+ {"name" => "foo1", "type" => "string"},
68
+ {"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
69
+ ]})
70
+ assert_equal "key1", field_wrapper.name
71
+ assert_equal "struct", field_wrapper.type
72
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
73
+ assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
74
+ assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
75
+
76
+ assert_equal "foo1", field_wrapper.children[0].name
77
+ assert_equal "string", field_wrapper.children[0].type
78
+ assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
79
+ assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
80
+ assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
81
+
82
+ assert_equal "foo2", field_wrapper.children[1].name
83
+ assert_equal "list", field_wrapper.children[1].type
84
+ assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
85
+ assert_kind_of Arrow::ListDataType, field_wrapper.children[1].arrow_field.data_type
86
+ assert_kind_of Arrow::ListArrayBuilder, field_wrapper.children[1].array_builder
87
+
88
+ assert_equal "value", field_wrapper.children[1].children[0].name
89
+ assert_equal "uint64", field_wrapper.children[1].children[0].type
90
+ assert_kind_of Arrow::Field, field_wrapper.children[1].children[0].arrow_field
91
+ assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].children[0].arrow_field.data_type
92
+ assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].children[0].array_builder
93
+ end
94
+
95
+ test "#append (timestamp)" do
96
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
97
+ time = Time.now
98
+ field_wrapper.append(time)
99
+ timestamp_array = field_wrapper.finish
100
+ assert_kind_of Time, timestamp_array[0]
101
+ assert_equal time.to_i, timestamp_array[0].to_i
102
+ end
103
+
104
+ test "#append (date32)" do
105
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date32"})
106
+ date = Date.today
107
+ field_wrapper.append(date)
108
+ date_array = field_wrapper.finish
109
+ assert_kind_of Date, date_array[0]
110
+ assert_equal date, date_array[0]
111
+ end
112
+
113
+ test "#append (date64)" do
114
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date64"})
115
+ date = Date.today
116
+ field_wrapper.append(date)
117
+ date_array = field_wrapper.finish
118
+ assert_kind_of DateTime, date_array[0]
119
+ assert_equal date, date_array[0].to_date
120
+ end
121
+
122
+ test "#append (nested)" do
123
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
124
+ {"name" => "foo1", "type" => "string"},
125
+ {"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
126
+ ]})
127
+
128
+ field_wrapper.append({"foo1" => "rec1", "foo2" => [1, 2, 3]})
129
+ field_wrapper.append({"foo1" => "rec2", "foo2" => [4, 5]})
130
+
131
+ struct_array = field_wrapper.finish
132
+ assert_kind_of Arrow::StringArray, struct_array.fields[0]
133
+ assert_equal "rec1", struct_array.fields[0][0]
134
+ assert_equal "rec2", struct_array.fields[0][1]
135
+
136
+ assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(0)
137
+ assert_equal 1, struct_array.fields[1].get_value(0)[0]
138
+ assert_equal 2, struct_array.fields[1].get_value(0)[1]
139
+ assert_equal 3, struct_array.fields[1].get_value(0)[2]
140
+
141
+ assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(1)
142
+ assert_equal 4, struct_array.fields[1].get_value(1)[0]
143
+ assert_equal 5, struct_array.fields[1].get_value(1)[1]
144
+ end
145
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-28 00:00:00.000000000 Z
11
+ date: 2018-12-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -113,13 +113,14 @@ files:
113
113
  - README.md
114
114
  - Rakefile
115
115
  - fluent-plugin-arrow.gemspec
116
+ - lib/fluent/plugin/arrow/field_wrapper.rb
116
117
  - lib/fluent/plugin/buf_arrow_memory.rb
117
118
  - lib/fluent/plugin/buffer/arrow_memory_chunk.rb
118
119
  - lib/fluent/plugin/formatter_arrow.rb
119
120
  - test/helper.rb
120
121
  - test/plugin/test_buf_arrow_memory.rb
121
122
  - test/plugin/test_buffer_arrow_memory_chunk.rb
122
- - test/plugin/test_formatter_arrow.rb
123
+ - test/plugin/test_field_wrapper.rb
123
124
  homepage: https://github.com/joker1007/fluent-plugin-arrow
124
125
  licenses:
125
126
  - Apache-2.0
@@ -148,4 +149,4 @@ test_files:
148
149
  - test/helper.rb
149
150
  - test/plugin/test_buf_arrow_memory.rb
150
151
  - test/plugin/test_buffer_arrow_memory_chunk.rb
151
- - test/plugin/test_formatter_arrow.rb
152
+ - test/plugin/test_field_wrapper.rb
@@ -1,14 +0,0 @@
1
- require "helper"
2
- require "fluent/plugin/formatter_arrow.rb"
3
-
4
- class ArrowFormatterTest < Test::Unit::TestCase
5
- setup do
6
- Fluent::Test.setup
7
- end
8
-
9
- private
10
-
11
- def create_driver(conf)
12
- Fluent::Test::Driver::Formatter.new(Fluent::Plugin::ArrowFormatter).configure(conf)
13
- end
14
- end