fluent-plugin-arrow 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c46290f5f4e9c75defd5494117f6deb504dad5fb70c51ceaccb5aabe9ff06934
4
- data.tar.gz: 6bc16d3413997b32efe5233a3aea4e3f4a3020eaf70c10675d8d1eef5899f0a2
3
+ metadata.gz: a1ed6b1b40e55501097a3a0455b2f2be93b3cf55d7e8cb831d4dc742f4007b97
4
+ data.tar.gz: fdcaed4462a919076c29f5947c3c81744b887e0099d2e5ad7a317e6845391da1
5
5
  SHA512:
6
- metadata.gz: 73e46c999a9a62710c1ead926565d18391cd377e61e6eb01bb76d6ca102cf8423ffd01c7934dfd6fb0c32282bf04867810854dd14b76ec79416597e6237aae76
7
- data.tar.gz: 6c14dc63fa79ece11140fd1b2ee916bcf2917260b0ae9a5741024ea5923d18e3a66add447a7f8c3cf1d5d24fccb4c8cb651d40e609f353813f7673d39669cf84
6
+ metadata.gz: 5f219215b1f8b8a57b2f9d79a6d1fdec39e10652c6d674a8d1390f7f3ce75d8752c15e89b268e36fb781bed44cfed2e128fd840b0978eb508c75dec1fc2b82a1
7
+ data.tar.gz: 2d473fbead6b6c12e361b045e7de325652b36c22d94685be289bc8a076c03f4dcb8b2c5270d922f76c0f462475d6846c96176abf4085abf24affaad0bc4b04fd
data/README.md CHANGED
@@ -1,8 +1,13 @@
1
1
  # fluent-plugin-arrow
2
2
 
3
- [Fluentd](https://fluentd.org/) formatter plugin to do something.
3
+ [Fluentd](https://fluentd.org/) buffer plugin to output Apache Arrow and Parquet format.
4
4
 
5
- TODO: write description for you plugin.
5
+ ## Prerequisite
6
+
7
+ - [Apache Arrow c++](https://github.com/apache/arrow/tree/master/cpp) (with -DARROW_PARQUET=ON)
8
+ - [Apache Arrow c_glib](https://github.com/apache/arrow/tree/master/c_glib)
9
+ - [red-arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
10
+ - [red-parquet](https://github.com/apache/arrow/tree/master/ruby/red-parquet)
6
11
 
7
12
  ## Installation
8
13
 
@@ -31,7 +36,31 @@ $ bundle
31
36
  You can generate configuration template:
32
37
 
33
38
  ```
34
- $ fluent-plugin-config-format formatter arrow
39
+ <match arrow>
40
+ @type file
41
+
42
+ path arrow_test
43
+
44
+ <buffer>
45
+ @type arrow_memory
46
+ arrow_format arrow # or parquet
47
+
48
+ schema [
49
+ {"name": "key1", "type": "string"},
50
+ {"name": "key2", "type": "uint64"},
51
+ {"name": "key3", "type": "timestamp", "unit": "milli"},
52
+ {"name": "key4", "type": "list", "value_type": {"name": "value", "type": "uint64"}},
53
+ {"name": "key5", "type": "struct", "fields": [
54
+ {"name": "bar1", "type": "uint64"},
55
+ {"name": "bar2", "type": "list", "value_type": {"name": "value", "type": "string"}}
56
+ ]}
57
+ ]
58
+ </buffer>
59
+
60
+ <format>
61
+ @type arrow
62
+ </format>
63
+ </match>
35
64
  ```
36
65
 
37
66
  You can copy and paste generated documents here.
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "fluent-plugin-arrow"
6
- spec.version = "0.0.1"
6
+ spec.version = "0.0.2"
7
7
  spec.authors = ["joker1007"]
8
8
  spec.email = ["kakyoin.hierophant@gmail.com"]
9
9
 
@@ -0,0 +1,259 @@
1
+ require "arrow"
2
+
3
+ module Fluent
4
+ module Plugin
5
+ module Arrow
6
+ class FieldWrapper
7
+ class << self
8
+ def build(field)
9
+ case field["type"]
10
+ when "string"
11
+ StringFieldWrapper.new(field)
12
+ when "int", "int8", "int16", "int32", "int64", "uint", "uint8", "uint16", "uint32", "uint64"
13
+ IntegerFieldWrapper.new(field)
14
+ when "float", "double"
15
+ FloatFieldWrapper.new(field)
16
+ when "boolean"
17
+ BooleanFieldWrapper.new(field)
18
+ when "date32"
19
+ Date32FieldWrapper.new(field)
20
+ when "date64"
21
+ Date64FieldWrapper.new(field)
22
+ when "timestamp"
23
+ TimestampFieldWrapper.new(field)
24
+ when "list"
25
+ ListFieldWrapper.new(field)
26
+ when "struct"
27
+ StructFieldWrapper.new(field)
28
+ else
29
+ raise "Unsupported data type"
30
+ end
31
+ end
32
+ end
33
+
34
+ attr_reader :field, :name, :type, :children, :arrow_field, :array_builder
35
+
36
+ def initialize(field)
37
+ @field = field
38
+ @name = field["name"]
39
+ @type = field["type"]
40
+ @children = []
41
+
42
+ field["value_type"]&.tap do |f|
43
+ @children << self.class.build(f)
44
+ end
45
+
46
+ field["fields"]&.each do |f|
47
+ @children << self.class.build(f)
48
+ end
49
+
50
+ create_arrow_field
51
+ create_array_builder
52
+ end
53
+
54
+ def append(value)
55
+ if value.nil?
56
+ @array_builder.append_null
57
+ else
58
+ @array_builder.append(cast_value(value))
59
+ end
60
+ end
61
+
62
+ def finish
63
+ @array_builder.finish
64
+ end
65
+
66
+ def create_arrow_field
67
+ @arrow_field = ::Arrow::Field.new(name, create_arrow_data_type)
68
+ end
69
+
70
+ def create_arrow_data_type
71
+ data_type_name = type.to_s.capitalize.gsub(/\AUint/, "UInt")
72
+ data_type_class_name = "#{data_type_name}DataType"
73
+ data_type_class = ::Arrow.const_get(data_type_class_name)
74
+ data_type_class.new
75
+ end
76
+
77
+ def create_array_builder(from_parent = nil)
78
+ if from_parent
79
+ @array_builder = from_parent
80
+ else
81
+ data_type_str = arrow_field.data_type.to_s
82
+ data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
83
+ array_builder_class_name = "#{data_type_name}ArrayBuilder"
84
+ array_builder_class = ::Arrow.const_get(array_builder_class_name)
85
+ @array_builder = array_builder_class.new
86
+ end
87
+ end
88
+
89
+ def cast_value(value)
90
+ raise NotImplementedError
91
+ end
92
+ end
93
+
94
+ class StringFieldWrapper < FieldWrapper
95
+ def cast_value(value)
96
+ value.to_s
97
+ end
98
+ end
99
+
100
+ class IntegerFieldWrapper < FieldWrapper
101
+ def cast_value(value)
102
+ value.to_i
103
+ end
104
+ end
105
+
106
+ class FloatFieldWrapper < FieldWrapper
107
+ def cast_value(value)
108
+ value.to_f
109
+ end
110
+ end
111
+
112
+ class BooleanFieldWrapper < FieldWrapper
113
+ def cast_value(value)
114
+ !!value
115
+ end
116
+ end
117
+
118
+ require "date"
119
+ class Date32FieldWrapper < FieldWrapper
120
+ UNIX_EPOCH = Date.new(1970, 1, 1)
121
+ def cast_value(value)
122
+ date =
123
+ if value.respond_to?(:to_date)
124
+ value.to_date
125
+ else
126
+ Date.parse(value)
127
+ end
128
+
129
+ (date - UNIX_EPOCH).to_i
130
+ end
131
+
132
+ def create_array_builder(from_parent = nil)
133
+ if from_parent
134
+ @array_builder = from_parent
135
+ else
136
+ @array_builder = ::Arrow::Date32ArrayBuilder.new
137
+ end
138
+ end
139
+ end
140
+
141
+ class Date64FieldWrapper < FieldWrapper
142
+ UNIX_EPOCH = Date.new(1970, 1, 1)
143
+ def cast_value(value)
144
+ time =
145
+ if value.respond_to?(:to_time)
146
+ value.to_time
147
+ else
148
+ Time.parse(value)
149
+ end
150
+
151
+ time.to_i * 1_000 + time.usec / 1_000
152
+ end
153
+
154
+ def create_array_builder(from_parent = nil)
155
+ if from_parent
156
+ @array_builder = from_parent
157
+ else
158
+ @array_builder = ::Arrow::Date64ArrayBuilder.new
159
+ end
160
+ end
161
+ end
162
+
163
+ require "time"
164
+ class TimestampFieldWrapper < FieldWrapper
165
+ def cast_value(value)
166
+ value =
167
+ if value.is_a?(Fluent::EventTime)
168
+ Time.at(value, value.usec)
169
+ elsif value.respond_to?(:to_time)
170
+ value.to_time
171
+ elsif value.is_a?(String)
172
+ Time.parse(value)
173
+ else
174
+ value
175
+ end
176
+
177
+ return value if value.is_a?(Numeric)
178
+
179
+ case field["unit"]
180
+ when "second"
181
+ value.to_i
182
+ when "milli"
183
+ value.to_i * 1_000 + value.usec / 1_000
184
+ when "micro"
185
+ value.to_i * 1_000_000 + value.usec
186
+ else
187
+ value.to_i * 1_000_000_000 + value.nsec
188
+ end
189
+ end
190
+
191
+ def create_arrow_data_type
192
+ ::Arrow::TimestampDataType.new(field["unit"].to_sym)
193
+ end
194
+
195
+ def create_array_builder(from_parent = nil)
196
+ if from_parent
197
+ @array_builder = from_parent
198
+ else
199
+ @array_builder = ::Arrow::TimestampArrayBuilder.new(arrow_field.data_type)
200
+ end
201
+ end
202
+ end
203
+
204
+ class ListFieldWrapper < FieldWrapper
205
+ def append(value)
206
+ if value.nil?
207
+ @array_builder.append_null
208
+ else
209
+ @array_builder.append
210
+ value.each do |v|
211
+ @children[0].append(v)
212
+ end
213
+ end
214
+ end
215
+
216
+ def create_arrow_data_type
217
+ ::Arrow::ListDataType.new(children[0].arrow_field)
218
+ end
219
+
220
+ def create_array_builder(from_parent = nil)
221
+ if from_parent
222
+ @array_builder = from_parent
223
+ else
224
+ @array_builder = ::Arrow::ListArrayBuilder.new(arrow_field.data_type)
225
+ end
226
+
227
+ @children.each { |c| c.create_array_builder(@array_builder.value_builder) }
228
+ end
229
+ end
230
+
231
+ class StructFieldWrapper < FieldWrapper
232
+ def append(value)
233
+ if value.nil?
234
+ @array_builder.append_null
235
+ else
236
+ @array_builder.append
237
+ value.each do |k, v|
238
+ @children.find { |c| c.name == k }.append(v)
239
+ end
240
+ end
241
+ end
242
+
243
+ def create_arrow_data_type
244
+ ::Arrow::StructDataType.new(children.map(&:arrow_field))
245
+ end
246
+
247
+ def create_array_builder(from_parent = nil)
248
+ if from_parent
249
+ @array_builder = from_parent
250
+ else
251
+ @array_builder = ::Arrow::StructArrayBuilder.new(arrow_field.data_type)
252
+ end
253
+
254
+ @children.each_with_index { |c, i| c.create_array_builder(@array_builder.get_field_builder(i)) }
255
+ end
256
+ end
257
+ end
258
+ end
259
+ end
@@ -16,6 +16,7 @@
16
16
  require "arrow"
17
17
  require 'fluent/plugin/buffer'
18
18
  require 'fluent/plugin/buffer/arrow_memory_chunk'
19
+ require 'fluent/plugin/arrow/field_wrapper'
19
20
 
20
21
  module Fluent
21
22
  module Plugin
@@ -32,11 +33,11 @@ module Fluent
32
33
  super
33
34
 
34
35
  # [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
35
- arrow_fields = @schema.map do |field|
36
- create_arrow_field(field)
36
+ @field_wrappers = @schema.each_with_object({}) do |field, h|
37
+ h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
37
38
  end
38
39
 
39
- @arrow_schema = Arrow::Schema.new(arrow_fields)
40
+ @arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
40
41
  end
41
42
 
42
43
  def resume
@@ -44,29 +45,7 @@ module Fluent
44
45
  end
45
46
 
46
47
  def generate_chunk(metadata)
47
- Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
48
- end
49
-
50
- private
51
-
52
- def create_arrow_field(field)
53
- Arrow::Field.new(field["name"], create_arrow_data_type(field))
54
- end
55
-
56
- def create_arrow_data_type(field)
57
- case field["type"]
58
- when "struct"
59
- Arrow::StructDataType.new(field["fields"].map { |f| create_arrow_field(f) })
60
- when "list"
61
- Arrow::ListDataType.new(create_arrow_field(field["value_type"]))
62
- when "timestamp"
63
- Arrow::TimestampDataType.new(field["unit"].to_sym)
64
- else
65
- data_type_name = field["type"].to_s.capitalize.gsub(/\AUint/, "UInt")
66
- data_type_class_name = "#{data_type_name}DataType"
67
- data_type_class = Arrow.const_get(data_type_class_name)
68
- data_type_class.new
69
- end
48
+ Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, @field_wrappers, chunk_size: @row_group_chunk_size, format: @arrow_format)
70
49
  end
71
50
  end
72
51
  end
@@ -15,22 +15,21 @@
15
15
 
16
16
  require 'arrow'
17
17
  require 'parquet'
18
+ require 'fluent/msgpack_factory'
18
19
  require 'fluent/plugin/buffer/chunk'
19
20
  require 'fluent/plugin/buffer/memory_chunk'
21
+ require 'fluent/plugin/arrow/field_wrapper'
20
22
 
21
23
  module Fluent
22
24
  module Plugin
23
25
  class Buffer
24
26
  class ArrowMemoryChunk < MemoryChunk
25
- def initialize(metadata, schema, chunk_size: 1024, format: :arrow)
27
+ def initialize(metadata, schema, field_wrappers, chunk_size: 1024, format: :arrow)
26
28
  super(metadata, compress: :text)
27
29
  @schema = schema
30
+ @field_wrappers = field_wrappers
28
31
  @chunk_size = chunk_size
29
32
  @format = format
30
- @array_builders = {}
31
- @schema.fields.each do |f|
32
- @array_builders[f.name] = field_to_array_builder(f)
33
- end
34
33
  @unpacker = Fluent::MessagePackFactory.engine_factory.unpacker
35
34
  end
36
35
 
@@ -49,48 +48,28 @@ module Fluent
49
48
 
50
49
  private
51
50
 
52
- def field_to_array_builder(f)
53
- data_type_str = f.data_type.to_s
54
- if data_type_str =~ /timestamp/
55
- return Arrow::TimestampArrayBuilder.new(f.data_type)
56
- end
57
-
58
- data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
59
- array_builder_class_name = "#{data_type_name}ArrayBuilder"
60
- array_builder_class = Arrow.const_get(array_builder_class_name)
61
- if array_builder_class.method(:new).arity > 0
62
- array_builder_class.new(f.data_type)
63
- else
64
- array_builder_class.new
65
- end
66
- end
67
-
68
51
  def build_arrow_buffer_string
69
52
  count = 0
70
53
  @unpacker.feed_each(@chunk) do |record|
71
54
  count += 1
72
55
  record.each do |k, v|
73
- if v.nil?
74
- @array_builders[k].append_null
75
- else
76
- @array_builders[k].append(v)
77
- end
56
+ @field_wrappers[k].append(v)
78
57
  end
79
58
  end
80
- arrow_buf = Arrow::ResizableBuffer.new(@chunk_bytes * 1.2)
59
+ arrow_buf = ::Arrow::ResizableBuffer.new(@chunk_bytes * 1.2)
81
60
 
82
- Arrow::BufferOutputStream.open(arrow_buf) do |output|
61
+ ::Arrow::BufferOutputStream.open(arrow_buf) do |output|
83
62
  if @format == :parquet
84
63
  Parquet::ArrowFileWriter.open(@schema, output) do |writer|
85
64
  columns = @schema.fields.map do |f|
86
- Arrow::Column.new(f, @array_builders[f.name].finish)
65
+ ::Arrow::Column.new(f, @field_wrappers[f.name].finish)
87
66
  end
88
- table = Arrow::Table.new(@schema, columns)
67
+ table = ::Arrow::Table.new(@schema, columns)
89
68
  writer.write_table(table, @chunk_size)
90
69
  end
91
70
  else
92
- Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
93
- record_batch = Arrow::RecordBatch.new(@schema, count, @array_builders.values.map(&:finish))
71
+ ::Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
72
+ record_batch = ::Arrow::RecordBatch.new(@schema, count, @field_wrappers.values.map(&:finish))
94
73
  writer.write_record_batch(record_batch)
95
74
  end
96
75
  end
@@ -5,12 +5,17 @@ require "fluent/plugin/buffer/arrow_memory_chunk"
5
5
  class ArrowMemoryChunkTest < Test::Unit::TestCase
6
6
  setup do
7
7
  @fields = [
8
- Arrow::Field.new("key1", :uint64),
9
- Arrow::Field.new("key2", :double),
10
- Arrow::Field.new("key3", Arrow::TimestampDataType.new(:second)),
8
+ ::Arrow::Field.new("key1", :uint64),
9
+ ::Arrow::Field.new("key2", :double),
10
+ ::Arrow::Field.new("key3", ::Arrow::TimestampDataType.new(:second)),
11
11
  ]
12
+ field_wrappers = {
13
+ "key1" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "uint64"}),
14
+ "key2" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "double"}),
15
+ "key3" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "second"}),
16
+ }
12
17
  @schema = Arrow::Schema.new(@fields)
13
- @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema)
18
+ @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema, field_wrappers)
14
19
  end
15
20
 
16
21
  test "can #read" do
@@ -18,44 +23,41 @@ class ArrowMemoryChunkTest < Test::Unit::TestCase
18
23
  d2 = {"key1" => 124, "key2" => 11.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
19
24
  data = [d1.to_msgpack, d2.to_msgpack]
20
25
  @c.append(data)
21
- Arrow::BufferInputStream.open(Arrow::Buffer.new(@c.read)) do |input|
22
- reader = Arrow::RecordBatchFileReader.new(input)
26
+ ::Arrow::BufferInputStream.open(::Arrow::Buffer.new(@c.read)) do |input|
27
+ reader = ::Arrow::RecordBatchFileReader.new(input)
23
28
 
24
29
  reader.each do |record_batch|
25
30
  assert { record_batch.n_rows == 2 }
26
31
 
27
- assert { record_batch.find_column(@fields[0].name).class == Arrow::UInt64Array }
32
+ assert { record_batch.find_column(@fields[0].name).class == ::Arrow::UInt64Array }
28
33
  assert { record_batch.find_column(@fields[0].name).values == [123, 124] }
29
34
  end
30
35
  end
31
36
  end
32
37
 
33
38
  test "can #write_to" do
34
- d1 = {"key1" => 123, "key2" => 10.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
35
- d2 = {"key1" => 124, "key2" => 11.1234, "key3" => Fluent::EventTime.from_time(Time.now)}
39
+ time = Time.now
40
+ d1 = {"key1" => 123, "key2" => 10.1234, "key3" => Fluent::EventTime.from_time(time)}
41
+ d2 = {"key1" => 124, "key2" => 11.1234, "key3" => Fluent::EventTime.from_time(time)}
36
42
  data = [d1.to_msgpack, d2.to_msgpack]
37
43
  @c.append(data)
38
44
  Tempfile.create do |tf|
39
45
  @c.write_to(tf)
40
46
  tf.flush
41
47
 
42
- Arrow::MemoryMappedInputStream.open(tf.path) do |input|
43
- reader = Arrow::RecordBatchFileReader.new(input)
48
+ ::Arrow::MemoryMappedInputStream.open(tf.path) do |input|
49
+ reader = ::Arrow::RecordBatchFileReader.new(input)
44
50
  reader.each_with_index do |record_batch, i|
45
51
  reader.each do |record_batch|
46
52
  assert { record_batch.n_rows == 2 }
47
53
 
48
- assert { record_batch.find_column(@fields[0].name).class == Arrow::UInt64Array }
54
+ assert { record_batch.find_column(@fields[0].name).class == ::Arrow::UInt64Array }
49
55
  assert { record_batch.find_column(@fields[0].name).values == [123, 124] }
56
+ assert { record_batch.find_column(@fields[1].name).values == [10.1234, 11.1234] }
57
+ assert { record_batch.find_column(@fields[2].name)[0].to_i == time.to_i }
50
58
  end
51
59
  end
52
60
  end
53
61
  end
54
62
  end
55
-
56
- private
57
-
58
- def create_driver(conf)
59
- Fluent::Test::Driver::Formatter.new(Fluent::Plugin::ArrowFormatter).configure(conf)
60
- end
61
63
  end
@@ -0,0 +1,145 @@
1
+ require "helper"
2
+ require "fluent/plugin/arrow/field_wrapper"
3
+
4
+ class ArrowFieldWrapperTest < Test::Unit::TestCase
5
+ test ".build (string)" do
6
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "string"})
7
+ assert_equal "key1", field_wrapper.name
8
+ assert_equal "string", field_wrapper.type
9
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
10
+ end
11
+
12
+ test ".build (timestamp)" do
13
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
14
+ assert_equal "key1", field_wrapper.name
15
+ assert_equal "timestamp", field_wrapper.type
16
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
17
+ end
18
+
19
+ test ".build (list)" do
20
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "list", "value_type" => {"name" => "value", "type" => "string"}})
21
+ assert_equal "key1", field_wrapper.name
22
+ assert_equal "list", field_wrapper.type
23
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
24
+ assert_kind_of Arrow::ListDataType, field_wrapper.arrow_field.data_type
25
+ assert_kind_of Arrow::ListArrayBuilder, field_wrapper.array_builder
26
+
27
+ assert_equal "value", field_wrapper.children[0].name
28
+ assert_equal "string", field_wrapper.children[0].type
29
+ assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
30
+ assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
31
+ assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
32
+ end
33
+
34
+ test ".build (struct)" do
35
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
36
+ {"name" => "foo1", "type" => "string"},
37
+ {"name" => "foo2", "type" => "uint64"},
38
+ {"name" => "foo3", "type" => "timestamp", "unit" => "milli"},
39
+ ]})
40
+ assert_equal "key1", field_wrapper.name
41
+ assert_equal "struct", field_wrapper.type
42
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
43
+ assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
44
+ assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
45
+
46
+ assert_equal "foo1", field_wrapper.children[0].name
47
+ assert_equal "string", field_wrapper.children[0].type
48
+ assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
49
+ assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
50
+ assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
51
+
52
+ assert_equal "foo2", field_wrapper.children[1].name
53
+ assert_equal "uint64", field_wrapper.children[1].type
54
+ assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
55
+ assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].arrow_field.data_type
56
+ assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].array_builder
57
+
58
+ assert_equal "foo3", field_wrapper.children[2].name
59
+ assert_equal "timestamp", field_wrapper.children[2].type
60
+ assert_kind_of Arrow::Field, field_wrapper.children[2].arrow_field
61
+ assert_kind_of Arrow::TimestampDataType, field_wrapper.children[2].arrow_field.data_type
62
+ assert_kind_of Arrow::TimestampArrayBuilder, field_wrapper.children[2].array_builder
63
+ end
64
+
65
+ test ".build (nested)" do
66
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
67
+ {"name" => "foo1", "type" => "string"},
68
+ {"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
69
+ ]})
70
+ assert_equal "key1", field_wrapper.name
71
+ assert_equal "struct", field_wrapper.type
72
+ assert_kind_of Arrow::Field, field_wrapper.arrow_field
73
+ assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
74
+ assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
75
+
76
+ assert_equal "foo1", field_wrapper.children[0].name
77
+ assert_equal "string", field_wrapper.children[0].type
78
+ assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
79
+ assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
80
+ assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
81
+
82
+ assert_equal "foo2", field_wrapper.children[1].name
83
+ assert_equal "list", field_wrapper.children[1].type
84
+ assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
85
+ assert_kind_of Arrow::ListDataType, field_wrapper.children[1].arrow_field.data_type
86
+ assert_kind_of Arrow::ListArrayBuilder, field_wrapper.children[1].array_builder
87
+
88
+ assert_equal "value", field_wrapper.children[1].children[0].name
89
+ assert_equal "uint64", field_wrapper.children[1].children[0].type
90
+ assert_kind_of Arrow::Field, field_wrapper.children[1].children[0].arrow_field
91
+ assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].children[0].arrow_field.data_type
92
+ assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].children[0].array_builder
93
+ end
94
+
95
+ test "#append (timestamp)" do
96
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
97
+ time = Time.now
98
+ field_wrapper.append(time)
99
+ timestamp_array = field_wrapper.finish
100
+ assert_kind_of Time, timestamp_array[0]
101
+ assert_equal time.to_i, timestamp_array[0].to_i
102
+ end
103
+
104
+ test "#append (date32)" do
105
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date32"})
106
+ date = Date.today
107
+ field_wrapper.append(date)
108
+ date_array = field_wrapper.finish
109
+ assert_kind_of Date, date_array[0]
110
+ assert_equal date, date_array[0]
111
+ end
112
+
113
+ test "#append (date64)" do
114
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date64"})
115
+ date = Date.today
116
+ field_wrapper.append(date)
117
+ date_array = field_wrapper.finish
118
+ assert_kind_of DateTime, date_array[0]
119
+ assert_equal date, date_array[0].to_date
120
+ end
121
+
122
+ test "#append (nested)" do
123
+ field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
124
+ {"name" => "foo1", "type" => "string"},
125
+ {"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
126
+ ]})
127
+
128
+ field_wrapper.append({"foo1" => "rec1", "foo2" => [1, 2, 3]})
129
+ field_wrapper.append({"foo1" => "rec2", "foo2" => [4, 5]})
130
+
131
+ struct_array = field_wrapper.finish
132
+ assert_kind_of Arrow::StringArray, struct_array.fields[0]
133
+ assert_equal "rec1", struct_array.fields[0][0]
134
+ assert_equal "rec2", struct_array.fields[0][1]
135
+
136
+ assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(0)
137
+ assert_equal 1, struct_array.fields[1].get_value(0)[0]
138
+ assert_equal 2, struct_array.fields[1].get_value(0)[1]
139
+ assert_equal 3, struct_array.fields[1].get_value(0)[2]
140
+
141
+ assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(1)
142
+ assert_equal 4, struct_array.fields[1].get_value(1)[0]
143
+ assert_equal 5, struct_array.fields[1].get_value(1)[1]
144
+ end
145
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-28 00:00:00.000000000 Z
11
+ date: 2018-12-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -113,13 +113,14 @@ files:
113
113
  - README.md
114
114
  - Rakefile
115
115
  - fluent-plugin-arrow.gemspec
116
+ - lib/fluent/plugin/arrow/field_wrapper.rb
116
117
  - lib/fluent/plugin/buf_arrow_memory.rb
117
118
  - lib/fluent/plugin/buffer/arrow_memory_chunk.rb
118
119
  - lib/fluent/plugin/formatter_arrow.rb
119
120
  - test/helper.rb
120
121
  - test/plugin/test_buf_arrow_memory.rb
121
122
  - test/plugin/test_buffer_arrow_memory_chunk.rb
122
- - test/plugin/test_formatter_arrow.rb
123
+ - test/plugin/test_field_wrapper.rb
123
124
  homepage: https://github.com/joker1007/fluent-plugin-arrow
124
125
  licenses:
125
126
  - Apache-2.0
@@ -148,4 +149,4 @@ test_files:
148
149
  - test/helper.rb
149
150
  - test/plugin/test_buf_arrow_memory.rb
150
151
  - test/plugin/test_buffer_arrow_memory_chunk.rb
151
- - test/plugin/test_formatter_arrow.rb
152
+ - test/plugin/test_field_wrapper.rb
@@ -1,14 +0,0 @@
1
- require "helper"
2
- require "fluent/plugin/formatter_arrow.rb"
3
-
4
- class ArrowFormatterTest < Test::Unit::TestCase
5
- setup do
6
- Fluent::Test.setup
7
- end
8
-
9
- private
10
-
11
- def create_driver(conf)
12
- Fluent::Test::Driver::Formatter.new(Fluent::Plugin::ArrowFormatter).configure(conf)
13
- end
14
- end