fluent-plugin-arrow 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9325c65f679d81a53d202a11a842a6c3a82b1cc3d600203064fc1329eeb3ded2
4
- data.tar.gz: 134a1bb09054b3feeed480efb0d7df558ad8a8131dbe3951d73395511ed2e843
3
+ metadata.gz: 835c64e5e2e22f41142ec79b7ba525e7d47ca2fec9a70bec9daa579af86a6f2b
4
+ data.tar.gz: 8df48380e8a2f3eab3762ff0b91f88cba787cd0ac46cb1b9f02e69dfe99926eb
5
5
  SHA512:
6
- metadata.gz: c72b02f95969d7dd8cdbfe525b579afc23a16f8fae1fdd72ac095479f94514c890287f7b3c41822888965a4f9554b33b5ed49e23d92401cefbfee3f8f8feb9b0
7
- data.tar.gz: 18eaeb2135ecc0339e8986db79545a772aeeef0a2499119bfd3b2194beb125c5be518d6bb86dd0b5caaae1c1d842f98926557219c8fd6586fa04e0ac51b34e2c
6
+ metadata.gz: 1fff346a50875d7b554e067bf9b50dc64764059101b30cc9bdbadc515e45ab75841a35170f6339bea1b20d3e8365b8a7b617039757958e9bbf4f90b0c1e28348
7
+ data.tar.gz: bc034add73f3f31df0a6c1b6c24cea15eef0821f3be1062dbd940104cce178d105065b24b332eddc661faeaba9905bbc68f830c54647b10c68000f1bca4361a3
@@ -0,0 +1,2 @@
1
+ .bundle
2
+ .git
@@ -0,0 +1,10 @@
1
+ sudo: required
2
+
3
+ services:
4
+ - docker
5
+
6
+ install:
7
+ - docker build -t joker1007/fluent-plugin-arrow .
8
+
9
+ script:
10
+ - docker run joker1007/fluent-plugin-arrow /bin/sh -c "bundle exec rake"
@@ -0,0 +1,14 @@
1
+ FROM ruby:2.6-stretch
2
+
3
+ RUN \
4
+ apt update && \
5
+ apt install -y apt-transport-https lsb-release && \
6
+ wget -O /usr/share/keyrings/apache-arrow-keyring.gpg \
7
+ https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-keyring.gpg && \
8
+ echo "deb [arch=amd64 signed-by=/usr/share/keyrings/apache-arrow-keyring.gpg] https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/ $(lsb_release --codename --short) main" > \
9
+ /etc/apt/sources.list.d/apache-arrow.list
10
+
11
+ RUN mkdir /app
12
+ WORKDIR /app
13
+ COPY . /app
14
+ RUN apt update && bundle install
data/README.md CHANGED
@@ -49,10 +49,10 @@ You can generate configuration template:
49
49
  {"name": "key1", "type": "string"},
50
50
  {"name": "key2", "type": "uint64"},
51
51
  {"name": "key3", "type": "timestamp", "unit": "milli"},
52
- {"name": "key4", "type": "list", "value_type": {"name": "value", "type": "uint64"}},
52
+ {"name": "key4", "type": "list", "field": {"name": "value", "type": "uint64"}},
53
53
  {"name": "key5", "type": "struct", "fields": [
54
54
  {"name": "bar1", "type": "uint64"},
55
- {"name": "bar2", "type": "list", "value_type": {"name": "value", "type": "string"}}
55
+ {"name": "bar2", "type": "list", "field": {"name": "value", "type": "string"}}
56
56
  ]}
57
57
  ]
58
58
  </buffer>
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "fluent-plugin-arrow"
6
- spec.version = "0.0.3"
6
+ spec.version = "0.0.4"
7
7
  spec.authors = ["joker1007"]
8
8
  spec.email = ["kakyoin.hierophant@gmail.com"]
9
9
 
@@ -24,6 +24,6 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "rake", "~> 12.0"
25
25
  spec.add_development_dependency "test-unit", "~> 3.0"
26
26
  spec.add_runtime_dependency "fluentd", [">= 0.14.10", "< 2"]
27
- spec.add_runtime_dependency "red-arrow", ">= 0.10"
27
+ spec.add_runtime_dependency "red-arrow", ">= 0.12"
28
28
  spec.add_runtime_dependency "red-parquet"
29
29
  end
@@ -17,7 +17,6 @@
17
17
  require "arrow"
18
18
  require 'fluent/plugin/buf_file'
19
19
  require 'fluent/plugin/buffer/arrow_file_chunk'
20
- require 'fluent/plugin/arrow/field_wrapper'
21
20
 
22
21
  module Fluent
23
22
  module Plugin
@@ -34,11 +33,7 @@ module Fluent
34
33
  super
35
34
 
36
35
  # [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
37
- @field_wrappers = @schema.each_with_object({}) do |field, h|
38
- h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
39
- end
40
-
41
- @arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
36
+ @arrow_schema = ::Arrow::Schema.new(@schema)
42
37
  end
43
38
 
44
39
  def resume
@@ -48,9 +43,9 @@ module Fluent
48
43
  def generate_chunk(metadata)
49
44
  # FileChunk generates real path with unique_id
50
45
  if @file_permission
51
- chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, @field_wrappers, perm: @file_permission, chunk_size: @row_group_chunk_size, format: @arrow_format)
46
+ chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, perm: @file_permission, chunk_size: @row_group_chunk_size, format: @arrow_format)
52
47
  else
53
- chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, @field_wrappers, chunk_size: @row_group_chunk_size, format: @arrow_format)
48
+ chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
54
49
  end
55
50
 
56
51
  log.debug "Created new chunk", chunk_id: dump_unique_id_hex(chunk.unique_id), metadata: metadata
@@ -16,7 +16,6 @@
16
16
  require "arrow"
17
17
  require 'fluent/plugin/buffer'
18
18
  require 'fluent/plugin/buffer/arrow_memory_chunk'
19
- require 'fluent/plugin/arrow/field_wrapper'
20
19
 
21
20
  module Fluent
22
21
  module Plugin
@@ -33,11 +32,7 @@ module Fluent
33
32
  super
34
33
 
35
34
  # [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
36
- @field_wrappers = @schema.each_with_object({}) do |field, h|
37
- h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
38
- end
39
-
40
- @arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
35
+ @arrow_schema = ::Arrow::Schema.new(@schema)
41
36
  end
42
37
 
43
38
  def resume
@@ -45,7 +40,7 @@ module Fluent
45
40
  end
46
41
 
47
42
  def generate_chunk(metadata)
48
- Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, @field_wrappers, chunk_size: @row_group_chunk_size, format: @arrow_format)
43
+ Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
49
44
  end
50
45
  end
51
46
  end
@@ -10,32 +10,11 @@ module Fluent
10
10
  end
11
11
 
12
12
  def build_arrow_buffer_string
13
- count = 0
14
- each_record do |record|
15
- count += 1
16
- record.each do |k, v|
17
- @field_wrappers[k].append(v)
18
- end
19
- end
13
+ record_batch = ::Arrow::RecordBatch.new(@schema, each_record)
20
14
  arrow_buf = ::Arrow::ResizableBuffer.new(bytesize * 1.2)
21
-
22
- ::Arrow::BufferOutputStream.open(arrow_buf) do |output|
23
- if @format == :parquet
24
- Parquet::ArrowFileWriter.open(@schema, output) do |writer|
25
- columns = @schema.fields.map do |f|
26
- ::Arrow::Column.new(f, @field_wrappers[f.name].finish)
27
- end
28
- table = ::Arrow::Table.new(@schema, columns)
29
- writer.write_table(table, @chunk_size)
30
- end
31
- else
32
- ::Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
33
- record_batch = ::Arrow::RecordBatch.new(@schema, count, @field_wrappers.values.map(&:finish))
34
- writer.write_record_batch(record_batch)
35
- end
36
- end
37
- end
38
-
15
+ record_batch.to_table.save(arrow_buf,
16
+ format: @format,
17
+ chunk_size: @chunk_size)
39
18
  arrow_buf.data.to_s
40
19
  end
41
20
  end
@@ -19,7 +19,6 @@ require 'fluent/msgpack_factory'
19
19
  require 'fluent/plugin/buffer/chunk'
20
20
  require 'fluent/plugin/buffer/file_chunk'
21
21
  require 'fluent/plugin/buffer/arrow_buffer_string_builder'
22
- require 'fluent/plugin/arrow/field_wrapper'
23
22
 
24
23
  module Fluent
25
24
  module Plugin
@@ -27,10 +26,9 @@ module Fluent
27
26
  class ArrowFileChunk < FileChunk
28
27
  include ArrowBufferStringBuilder
29
28
 
30
- def initialize(metadata, path, mode, schema, field_wrappers, perm: system_config.file_permission || FILE_PERMISSION, chunk_size: 1024, format: :arrow)
29
+ def initialize(metadata, path, mode, schema, perm: system_config.file_permission || FILE_PERMISSION, chunk_size: 1024, format: :arrow)
31
30
  super(metadata, path, mode, perm: perm, compress: :text)
32
31
  @schema = schema
33
- @field_wrappers = field_wrappers
34
32
  @chunk_size = chunk_size
35
33
  @format = format
36
34
  end
@@ -19,7 +19,6 @@ require 'fluent/msgpack_factory'
19
19
  require 'fluent/plugin/buffer/chunk'
20
20
  require 'fluent/plugin/buffer/memory_chunk'
21
21
  require 'fluent/plugin/buffer/arrow_buffer_string_builder'
22
- require 'fluent/plugin/arrow/field_wrapper'
23
22
 
24
23
  module Fluent
25
24
  module Plugin
@@ -27,10 +26,9 @@ module Fluent
27
26
  class ArrowMemoryChunk < MemoryChunk
28
27
  include ArrowBufferStringBuilder
29
28
 
30
- def initialize(metadata, schema, field_wrappers, chunk_size: 1024, format: :arrow)
29
+ def initialize(metadata, schema, chunk_size: 1024, format: :arrow)
31
30
  super(metadata, compress: :text)
32
31
  @schema = schema
33
- @field_wrappers = field_wrappers
34
32
  @chunk_size = chunk_size
35
33
  @format = format
36
34
  end
@@ -25,8 +25,8 @@ class ArrowMemoryBufferTest < Test::Unit::TestCase
25
25
  {"name": "foo1", "type": "uint64"},
26
26
  {"name": "foo2", "type": "string"},
27
27
  {"name": "foo3", "type": "timestamp", "unit": "milli"},
28
- {"name": "foo4", "type": "list", "value_type": {"name": "value", "type": "uint64"}},
29
- {"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "value_type": {"name": "value", "type": "string"}}]}
28
+ {"name": "foo4", "type": "list", "field": {"name": "value", "type": "uint64"}},
29
+ {"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "field": {"name": "value", "type": "string"}}]}
30
30
  ]
31
31
  ]
32
32
  buffer_conf = Fluent::Config.parse(conf, "(test)", "(test_dir)", syntax: :v1)
@@ -9,13 +9,8 @@ class ArrowMemoryChunkTest < Test::Unit::TestCase
9
9
  ::Arrow::Field.new("key2", :double),
10
10
  ::Arrow::Field.new("key3", ::Arrow::TimestampDataType.new(:second)),
11
11
  ]
12
- field_wrappers = {
13
- "key1" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "uint64"}),
14
- "key2" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "double"}),
15
- "key3" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "second"}),
16
- }
17
12
  @schema = Arrow::Schema.new(@fields)
18
- @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema, field_wrappers)
13
+ @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema)
19
14
  end
20
15
 
21
16
  test "can #read" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-17 00:00:00.000000000 Z
11
+ date: 2019-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -78,14 +78,14 @@ dependencies:
78
78
  requirements:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: '0.10'
81
+ version: '0.12'
82
82
  type: :runtime
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - ">="
87
87
  - !ruby/object:Gem::Version
88
- version: '0.10'
88
+ version: '0.12'
89
89
  - !ruby/object:Gem::Dependency
90
90
  name: red-parquet
91
91
  requirement: !ruby/object:Gem::Requirement
@@ -107,13 +107,15 @@ executables: []
107
107
  extensions: []
108
108
  extra_rdoc_files: []
109
109
  files:
110
+ - ".dockerignore"
110
111
  - ".gitignore"
112
+ - ".travis.yml"
113
+ - Dockerfile
111
114
  - Gemfile
112
115
  - LICENSE
113
116
  - README.md
114
117
  - Rakefile
115
118
  - fluent-plugin-arrow.gemspec
116
- - lib/fluent/plugin/arrow/field_wrapper.rb
117
119
  - lib/fluent/plugin/buf_arrow_file.rb
118
120
  - lib/fluent/plugin/buf_arrow_memory.rb
119
121
  - lib/fluent/plugin/buffer/arrow_buffer_string_builder.rb
@@ -123,7 +125,6 @@ files:
123
125
  - test/helper.rb
124
126
  - test/plugin/test_buf_arrow_memory.rb
125
127
  - test/plugin/test_buffer_arrow_memory_chunk.rb
126
- - test/plugin/test_field_wrapper.rb
127
128
  homepage: https://github.com/joker1007/fluent-plugin-arrow
128
129
  licenses:
129
130
  - Apache-2.0
@@ -143,8 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
143
144
  - !ruby/object:Gem::Version
144
145
  version: '0'
145
146
  requirements: []
146
- rubyforge_project:
147
- rubygems_version: 2.7.8
147
+ rubygems_version: 3.0.1
148
148
  signing_key:
149
149
  specification_version: 4
150
150
  summary: Apache Arrow formatter plugin for fluentd.
@@ -152,4 +152,3 @@ test_files:
152
152
  - test/helper.rb
153
153
  - test/plugin/test_buf_arrow_memory.rb
154
154
  - test/plugin/test_buffer_arrow_memory_chunk.rb
155
- - test/plugin/test_field_wrapper.rb
@@ -1,259 +0,0 @@
1
- require "arrow"
2
-
3
- module Fluent
4
- module Plugin
5
- module Arrow
6
- class FieldWrapper
7
- class << self
8
- def build(field)
9
- case field["type"]
10
- when "string"
11
- StringFieldWrapper.new(field)
12
- when "int", "int8", "int16", "int32", "int64", "uint", "uint8", "uint16", "uint32", "uint64"
13
- IntegerFieldWrapper.new(field)
14
- when "float", "double"
15
- FloatFieldWrapper.new(field)
16
- when "boolean"
17
- BooleanFieldWrapper.new(field)
18
- when "date32"
19
- Date32FieldWrapper.new(field)
20
- when "date64"
21
- Date64FieldWrapper.new(field)
22
- when "timestamp"
23
- TimestampFieldWrapper.new(field)
24
- when "list"
25
- ListFieldWrapper.new(field)
26
- when "struct"
27
- StructFieldWrapper.new(field)
28
- else
29
- raise "Unsupported data type"
30
- end
31
- end
32
- end
33
-
34
- attr_reader :field, :name, :type, :children, :arrow_field, :array_builder
35
-
36
- def initialize(field)
37
- @field = field
38
- @name = field["name"]
39
- @type = field["type"]
40
- @children = []
41
-
42
- field["value_type"]&.tap do |f|
43
- @children << self.class.build(f)
44
- end
45
-
46
- field["fields"]&.each do |f|
47
- @children << self.class.build(f)
48
- end
49
-
50
- create_arrow_field
51
- create_array_builder
52
- end
53
-
54
- def append(value)
55
- if value.nil?
56
- @array_builder.append_null
57
- else
58
- @array_builder.append(cast_value(value))
59
- end
60
- end
61
-
62
- def finish
63
- @array_builder.finish
64
- end
65
-
66
- def create_arrow_field
67
- @arrow_field = ::Arrow::Field.new(name, create_arrow_data_type)
68
- end
69
-
70
- def create_arrow_data_type
71
- data_type_name = type.to_s.capitalize.gsub(/\AUint/, "UInt")
72
- data_type_class_name = "#{data_type_name}DataType"
73
- data_type_class = ::Arrow.const_get(data_type_class_name)
74
- data_type_class.new
75
- end
76
-
77
- def create_array_builder(from_parent = nil)
78
- if from_parent
79
- @array_builder = from_parent
80
- else
81
- data_type_str = arrow_field.data_type.to_s
82
- data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
83
- array_builder_class_name = "#{data_type_name}ArrayBuilder"
84
- array_builder_class = ::Arrow.const_get(array_builder_class_name)
85
- @array_builder = array_builder_class.new
86
- end
87
- end
88
-
89
- def cast_value(value)
90
- raise NotImplementedError
91
- end
92
- end
93
-
94
- class StringFieldWrapper < FieldWrapper
95
- def cast_value(value)
96
- value.to_s
97
- end
98
- end
99
-
100
- class IntegerFieldWrapper < FieldWrapper
101
- def cast_value(value)
102
- value.to_i
103
- end
104
- end
105
-
106
- class FloatFieldWrapper < FieldWrapper
107
- def cast_value(value)
108
- value.to_f
109
- end
110
- end
111
-
112
- class BooleanFieldWrapper < FieldWrapper
113
- def cast_value(value)
114
- !!value
115
- end
116
- end
117
-
118
- require "date"
119
- class Date32FieldWrapper < FieldWrapper
120
- UNIX_EPOCH = Date.new(1970, 1, 1)
121
- def cast_value(value)
122
- date =
123
- if value.respond_to?(:to_date)
124
- value.to_date
125
- else
126
- Date.parse(value)
127
- end
128
-
129
- (date - UNIX_EPOCH).to_i
130
- end
131
-
132
- def create_array_builder(from_parent = nil)
133
- if from_parent
134
- @array_builder = from_parent
135
- else
136
- @array_builder = ::Arrow::Date32ArrayBuilder.new
137
- end
138
- end
139
- end
140
-
141
- class Date64FieldWrapper < FieldWrapper
142
- UNIX_EPOCH = Date.new(1970, 1, 1)
143
- def cast_value(value)
144
- time =
145
- if value.respond_to?(:to_time)
146
- value.to_time
147
- else
148
- Time.parse(value)
149
- end
150
-
151
- time.to_i * 1_000 + time.usec / 1_000
152
- end
153
-
154
- def create_array_builder(from_parent = nil)
155
- if from_parent
156
- @array_builder = from_parent
157
- else
158
- @array_builder = ::Arrow::Date64ArrayBuilder.new
159
- end
160
- end
161
- end
162
-
163
- require "time"
164
- class TimestampFieldWrapper < FieldWrapper
165
- def cast_value(value)
166
- value =
167
- if value.is_a?(Fluent::EventTime)
168
- Time.at(value, value.usec)
169
- elsif value.respond_to?(:to_time)
170
- value.to_time
171
- elsif value.is_a?(String)
172
- Time.parse(value)
173
- else
174
- value
175
- end
176
-
177
- return value if value.is_a?(Numeric)
178
-
179
- case field["unit"]
180
- when "second"
181
- value.to_i
182
- when "milli"
183
- value.to_i * 1_000 + value.usec / 1_000
184
- when "micro"
185
- value.to_i * 1_000_000 + value.usec
186
- else
187
- value.to_i * 1_000_000_000 + value.nsec
188
- end
189
- end
190
-
191
- def create_arrow_data_type
192
- ::Arrow::TimestampDataType.new(field["unit"].to_sym)
193
- end
194
-
195
- def create_array_builder(from_parent = nil)
196
- if from_parent
197
- @array_builder = from_parent
198
- else
199
- @array_builder = ::Arrow::TimestampArrayBuilder.new(arrow_field.data_type)
200
- end
201
- end
202
- end
203
-
204
- class ListFieldWrapper < FieldWrapper
205
- def append(value)
206
- if value.nil?
207
- @array_builder.append_null
208
- else
209
- @array_builder.append
210
- value.each do |v|
211
- @children[0].append(v)
212
- end
213
- end
214
- end
215
-
216
- def create_arrow_data_type
217
- ::Arrow::ListDataType.new(children[0].arrow_field)
218
- end
219
-
220
- def create_array_builder(from_parent = nil)
221
- if from_parent
222
- @array_builder = from_parent
223
- else
224
- @array_builder = ::Arrow::ListArrayBuilder.new(arrow_field.data_type)
225
- end
226
-
227
- @children.each { |c| c.create_array_builder(@array_builder.value_builder) }
228
- end
229
- end
230
-
231
- class StructFieldWrapper < FieldWrapper
232
- def append(value)
233
- if value.nil?
234
- @array_builder.append_null
235
- else
236
- @array_builder.append
237
- value.each do |k, v|
238
- @children.find { |c| c.name == k }.append(v)
239
- end
240
- end
241
- end
242
-
243
- def create_arrow_data_type
244
- ::Arrow::StructDataType.new(children.map(&:arrow_field))
245
- end
246
-
247
- def create_array_builder(from_parent = nil)
248
- if from_parent
249
- @array_builder = from_parent
250
- else
251
- @array_builder = ::Arrow::StructArrayBuilder.new(arrow_field.data_type)
252
- end
253
-
254
- @children.each_with_index { |c, i| c.create_array_builder(@array_builder.get_field_builder(i)) }
255
- end
256
- end
257
- end
258
- end
259
- end
@@ -1,145 +0,0 @@
1
- require "helper"
2
- require "fluent/plugin/arrow/field_wrapper"
3
-
4
- class ArrowFieldWrapperTest < Test::Unit::TestCase
5
- test ".build (string)" do
6
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "string"})
7
- assert_equal "key1", field_wrapper.name
8
- assert_equal "string", field_wrapper.type
9
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
10
- end
11
-
12
- test ".build (timestamp)" do
13
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
14
- assert_equal "key1", field_wrapper.name
15
- assert_equal "timestamp", field_wrapper.type
16
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
17
- end
18
-
19
- test ".build (list)" do
20
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "list", "value_type" => {"name" => "value", "type" => "string"}})
21
- assert_equal "key1", field_wrapper.name
22
- assert_equal "list", field_wrapper.type
23
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
24
- assert_kind_of Arrow::ListDataType, field_wrapper.arrow_field.data_type
25
- assert_kind_of Arrow::ListArrayBuilder, field_wrapper.array_builder
26
-
27
- assert_equal "value", field_wrapper.children[0].name
28
- assert_equal "string", field_wrapper.children[0].type
29
- assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
30
- assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
31
- assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
32
- end
33
-
34
- test ".build (struct)" do
35
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
36
- {"name" => "foo1", "type" => "string"},
37
- {"name" => "foo2", "type" => "uint64"},
38
- {"name" => "foo3", "type" => "timestamp", "unit" => "milli"},
39
- ]})
40
- assert_equal "key1", field_wrapper.name
41
- assert_equal "struct", field_wrapper.type
42
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
43
- assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
44
- assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
45
-
46
- assert_equal "foo1", field_wrapper.children[0].name
47
- assert_equal "string", field_wrapper.children[0].type
48
- assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
49
- assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
50
- assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
51
-
52
- assert_equal "foo2", field_wrapper.children[1].name
53
- assert_equal "uint64", field_wrapper.children[1].type
54
- assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
55
- assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].arrow_field.data_type
56
- assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].array_builder
57
-
58
- assert_equal "foo3", field_wrapper.children[2].name
59
- assert_equal "timestamp", field_wrapper.children[2].type
60
- assert_kind_of Arrow::Field, field_wrapper.children[2].arrow_field
61
- assert_kind_of Arrow::TimestampDataType, field_wrapper.children[2].arrow_field.data_type
62
- assert_kind_of Arrow::TimestampArrayBuilder, field_wrapper.children[2].array_builder
63
- end
64
-
65
- test ".build (nested)" do
66
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
67
- {"name" => "foo1", "type" => "string"},
68
- {"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
69
- ]})
70
- assert_equal "key1", field_wrapper.name
71
- assert_equal "struct", field_wrapper.type
72
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
73
- assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
74
- assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
75
-
76
- assert_equal "foo1", field_wrapper.children[0].name
77
- assert_equal "string", field_wrapper.children[0].type
78
- assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
79
- assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
80
- assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
81
-
82
- assert_equal "foo2", field_wrapper.children[1].name
83
- assert_equal "list", field_wrapper.children[1].type
84
- assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
85
- assert_kind_of Arrow::ListDataType, field_wrapper.children[1].arrow_field.data_type
86
- assert_kind_of Arrow::ListArrayBuilder, field_wrapper.children[1].array_builder
87
-
88
- assert_equal "value", field_wrapper.children[1].children[0].name
89
- assert_equal "uint64", field_wrapper.children[1].children[0].type
90
- assert_kind_of Arrow::Field, field_wrapper.children[1].children[0].arrow_field
91
- assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].children[0].arrow_field.data_type
92
- assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].children[0].array_builder
93
- end
94
-
95
- test "#append (timestamp)" do
96
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
97
- time = Time.now
98
- field_wrapper.append(time)
99
- timestamp_array = field_wrapper.finish
100
- assert_kind_of Time, timestamp_array[0]
101
- assert_equal time.to_i, timestamp_array[0].to_i
102
- end
103
-
104
- test "#append (date32)" do
105
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date32"})
106
- date = Date.today
107
- field_wrapper.append(date)
108
- date_array = field_wrapper.finish
109
- assert_kind_of Date, date_array[0]
110
- assert_equal date, date_array[0]
111
- end
112
-
113
- test "#append (date64)" do
114
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date64"})
115
- date = Date.today
116
- field_wrapper.append(date)
117
- date_array = field_wrapper.finish
118
- assert_kind_of DateTime, date_array[0]
119
- assert_equal date, date_array[0].to_date
120
- end
121
-
122
- test "#append (nested)" do
123
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
124
- {"name" => "foo1", "type" => "string"},
125
- {"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
126
- ]})
127
-
128
- field_wrapper.append({"foo1" => "rec1", "foo2" => [1, 2, 3]})
129
- field_wrapper.append({"foo1" => "rec2", "foo2" => [4, 5]})
130
-
131
- struct_array = field_wrapper.finish
132
- assert_kind_of Arrow::StringArray, struct_array.fields[0]
133
- assert_equal "rec1", struct_array.fields[0][0]
134
- assert_equal "rec2", struct_array.fields[0][1]
135
-
136
- assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(0)
137
- assert_equal 1, struct_array.fields[1].get_value(0)[0]
138
- assert_equal 2, struct_array.fields[1].get_value(0)[1]
139
- assert_equal 3, struct_array.fields[1].get_value(0)[2]
140
-
141
- assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(1)
142
- assert_equal 4, struct_array.fields[1].get_value(1)[0]
143
- assert_equal 5, struct_array.fields[1].get_value(1)[1]
144
- end
145
- end