fluent-plugin-arrow 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9325c65f679d81a53d202a11a842a6c3a82b1cc3d600203064fc1329eeb3ded2
4
- data.tar.gz: 134a1bb09054b3feeed480efb0d7df558ad8a8131dbe3951d73395511ed2e843
3
+ metadata.gz: 835c64e5e2e22f41142ec79b7ba525e7d47ca2fec9a70bec9daa579af86a6f2b
4
+ data.tar.gz: 8df48380e8a2f3eab3762ff0b91f88cba787cd0ac46cb1b9f02e69dfe99926eb
5
5
  SHA512:
6
- metadata.gz: c72b02f95969d7dd8cdbfe525b579afc23a16f8fae1fdd72ac095479f94514c890287f7b3c41822888965a4f9554b33b5ed49e23d92401cefbfee3f8f8feb9b0
7
- data.tar.gz: 18eaeb2135ecc0339e8986db79545a772aeeef0a2499119bfd3b2194beb125c5be518d6bb86dd0b5caaae1c1d842f98926557219c8fd6586fa04e0ac51b34e2c
6
+ metadata.gz: 1fff346a50875d7b554e067bf9b50dc64764059101b30cc9bdbadc515e45ab75841a35170f6339bea1b20d3e8365b8a7b617039757958e9bbf4f90b0c1e28348
7
+ data.tar.gz: bc034add73f3f31df0a6c1b6c24cea15eef0821f3be1062dbd940104cce178d105065b24b332eddc661faeaba9905bbc68f830c54647b10c68000f1bca4361a3
@@ -0,0 +1,2 @@
1
+ .bundle
2
+ .git
@@ -0,0 +1,10 @@
1
+ sudo: required
2
+
3
+ services:
4
+ - docker
5
+
6
+ install:
7
+ - docker build -t joker1007/fluent-plugin-arrow .
8
+
9
+ script:
10
+ - docker run joker1007/fluent-plugin-arrow /bin/sh -c "bundle exec rake"
@@ -0,0 +1,14 @@
1
+ FROM ruby:2.6-stretch
2
+
3
+ RUN \
4
+ apt update && \
5
+ apt install -y apt-transport-https lsb-release && \
6
+ wget -O /usr/share/keyrings/apache-arrow-keyring.gpg \
7
+ https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-keyring.gpg && \
8
+ echo "deb [arch=amd64 signed-by=/usr/share/keyrings/apache-arrow-keyring.gpg] https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/ $(lsb_release --codename --short) main" > \
9
+ /etc/apt/sources.list.d/apache-arrow.list
10
+
11
+ RUN mkdir /app
12
+ WORKDIR /app
13
+ COPY . /app
14
+ RUN apt update && bundle install
data/README.md CHANGED
@@ -49,10 +49,10 @@ You can generate configuration template:
49
49
  {"name": "key1", "type": "string"},
50
50
  {"name": "key2", "type": "uint64"},
51
51
  {"name": "key3", "type": "timestamp", "unit": "milli"},
52
- {"name": "key4", "type": "list", "value_type": {"name": "value", "type": "uint64"}},
52
+ {"name": "key4", "type": "list", "field": {"name": "value", "type": "uint64"}},
53
53
  {"name": "key5", "type": "struct", "fields": [
54
54
  {"name": "bar1", "type": "uint64"},
55
- {"name": "bar2", "type": "list", "value_type": {"name": "value", "type": "string"}}
55
+ {"name": "bar2", "type": "list", "field": {"name": "value", "type": "string"}}
56
56
  ]}
57
57
  ]
58
58
  </buffer>
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "fluent-plugin-arrow"
6
- spec.version = "0.0.3"
6
+ spec.version = "0.0.4"
7
7
  spec.authors = ["joker1007"]
8
8
  spec.email = ["kakyoin.hierophant@gmail.com"]
9
9
 
@@ -24,6 +24,6 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "rake", "~> 12.0"
25
25
  spec.add_development_dependency "test-unit", "~> 3.0"
26
26
  spec.add_runtime_dependency "fluentd", [">= 0.14.10", "< 2"]
27
- spec.add_runtime_dependency "red-arrow", ">= 0.10"
27
+ spec.add_runtime_dependency "red-arrow", ">= 0.12"
28
28
  spec.add_runtime_dependency "red-parquet"
29
29
  end
@@ -17,7 +17,6 @@
17
17
  require "arrow"
18
18
  require 'fluent/plugin/buf_file'
19
19
  require 'fluent/plugin/buffer/arrow_file_chunk'
20
- require 'fluent/plugin/arrow/field_wrapper'
21
20
 
22
21
  module Fluent
23
22
  module Plugin
@@ -34,11 +33,7 @@ module Fluent
34
33
  super
35
34
 
36
35
  # [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
37
- @field_wrappers = @schema.each_with_object({}) do |field, h|
38
- h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
39
- end
40
-
41
- @arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
36
+ @arrow_schema = ::Arrow::Schema.new(@schema)
42
37
  end
43
38
 
44
39
  def resume
@@ -48,9 +43,9 @@ module Fluent
48
43
  def generate_chunk(metadata)
49
44
  # FileChunk generates real path with unique_id
50
45
  if @file_permission
51
- chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, @field_wrappers, perm: @file_permission, chunk_size: @row_group_chunk_size, format: @arrow_format)
46
+ chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, perm: @file_permission, chunk_size: @row_group_chunk_size, format: @arrow_format)
52
47
  else
53
- chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, @field_wrappers, chunk_size: @row_group_chunk_size, format: @arrow_format)
48
+ chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
54
49
  end
55
50
 
56
51
  log.debug "Created new chunk", chunk_id: dump_unique_id_hex(chunk.unique_id), metadata: metadata
@@ -16,7 +16,6 @@
16
16
  require "arrow"
17
17
  require 'fluent/plugin/buffer'
18
18
  require 'fluent/plugin/buffer/arrow_memory_chunk'
19
- require 'fluent/plugin/arrow/field_wrapper'
20
19
 
21
20
  module Fluent
22
21
  module Plugin
@@ -33,11 +32,7 @@ module Fluent
33
32
  super
34
33
 
35
34
  # [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
36
- @field_wrappers = @schema.each_with_object({}) do |field, h|
37
- h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
38
- end
39
-
40
- @arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
35
+ @arrow_schema = ::Arrow::Schema.new(@schema)
41
36
  end
42
37
 
43
38
  def resume
@@ -45,7 +40,7 @@ module Fluent
45
40
  end
46
41
 
47
42
  def generate_chunk(metadata)
48
- Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, @field_wrappers, chunk_size: @row_group_chunk_size, format: @arrow_format)
43
+ Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
49
44
  end
50
45
  end
51
46
  end
@@ -10,32 +10,11 @@ module Fluent
10
10
  end
11
11
 
12
12
  def build_arrow_buffer_string
13
- count = 0
14
- each_record do |record|
15
- count += 1
16
- record.each do |k, v|
17
- @field_wrappers[k].append(v)
18
- end
19
- end
13
+ record_batch = ::Arrow::RecordBatch.new(@schema, each_record)
20
14
  arrow_buf = ::Arrow::ResizableBuffer.new(bytesize * 1.2)
21
-
22
- ::Arrow::BufferOutputStream.open(arrow_buf) do |output|
23
- if @format == :parquet
24
- Parquet::ArrowFileWriter.open(@schema, output) do |writer|
25
- columns = @schema.fields.map do |f|
26
- ::Arrow::Column.new(f, @field_wrappers[f.name].finish)
27
- end
28
- table = ::Arrow::Table.new(@schema, columns)
29
- writer.write_table(table, @chunk_size)
30
- end
31
- else
32
- ::Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
33
- record_batch = ::Arrow::RecordBatch.new(@schema, count, @field_wrappers.values.map(&:finish))
34
- writer.write_record_batch(record_batch)
35
- end
36
- end
37
- end
38
-
15
+ record_batch.to_table.save(arrow_buf,
16
+ format: @format,
17
+ chunk_size: @chunk_size)
39
18
  arrow_buf.data.to_s
40
19
  end
41
20
  end
@@ -19,7 +19,6 @@ require 'fluent/msgpack_factory'
19
19
  require 'fluent/plugin/buffer/chunk'
20
20
  require 'fluent/plugin/buffer/file_chunk'
21
21
  require 'fluent/plugin/buffer/arrow_buffer_string_builder'
22
- require 'fluent/plugin/arrow/field_wrapper'
23
22
 
24
23
  module Fluent
25
24
  module Plugin
@@ -27,10 +26,9 @@ module Fluent
27
26
  class ArrowFileChunk < FileChunk
28
27
  include ArrowBufferStringBuilder
29
28
 
30
- def initialize(metadata, path, mode, schema, field_wrappers, perm: system_config.file_permission || FILE_PERMISSION, chunk_size: 1024, format: :arrow)
29
+ def initialize(metadata, path, mode, schema, perm: system_config.file_permission || FILE_PERMISSION, chunk_size: 1024, format: :arrow)
31
30
  super(metadata, path, mode, perm: perm, compress: :text)
32
31
  @schema = schema
33
- @field_wrappers = field_wrappers
34
32
  @chunk_size = chunk_size
35
33
  @format = format
36
34
  end
@@ -19,7 +19,6 @@ require 'fluent/msgpack_factory'
19
19
  require 'fluent/plugin/buffer/chunk'
20
20
  require 'fluent/plugin/buffer/memory_chunk'
21
21
  require 'fluent/plugin/buffer/arrow_buffer_string_builder'
22
- require 'fluent/plugin/arrow/field_wrapper'
23
22
 
24
23
  module Fluent
25
24
  module Plugin
@@ -27,10 +26,9 @@ module Fluent
27
26
  class ArrowMemoryChunk < MemoryChunk
28
27
  include ArrowBufferStringBuilder
29
28
 
30
- def initialize(metadata, schema, field_wrappers, chunk_size: 1024, format: :arrow)
29
+ def initialize(metadata, schema, chunk_size: 1024, format: :arrow)
31
30
  super(metadata, compress: :text)
32
31
  @schema = schema
33
- @field_wrappers = field_wrappers
34
32
  @chunk_size = chunk_size
35
33
  @format = format
36
34
  end
@@ -25,8 +25,8 @@ class ArrowMemoryBufferTest < Test::Unit::TestCase
25
25
  {"name": "foo1", "type": "uint64"},
26
26
  {"name": "foo2", "type": "string"},
27
27
  {"name": "foo3", "type": "timestamp", "unit": "milli"},
28
- {"name": "foo4", "type": "list", "value_type": {"name": "value", "type": "uint64"}},
29
- {"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "value_type": {"name": "value", "type": "string"}}]}
28
+ {"name": "foo4", "type": "list", "field": {"name": "value", "type": "uint64"}},
29
+ {"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "field": {"name": "value", "type": "string"}}]}
30
30
  ]
31
31
  ]
32
32
  buffer_conf = Fluent::Config.parse(conf, "(test)", "(test_dir)", syntax: :v1)
@@ -9,13 +9,8 @@ class ArrowMemoryChunkTest < Test::Unit::TestCase
9
9
  ::Arrow::Field.new("key2", :double),
10
10
  ::Arrow::Field.new("key3", ::Arrow::TimestampDataType.new(:second)),
11
11
  ]
12
- field_wrappers = {
13
- "key1" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "uint64"}),
14
- "key2" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "double"}),
15
- "key3" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "second"}),
16
- }
17
12
  @schema = Arrow::Schema.new(@fields)
18
- @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema, field_wrappers)
13
+ @c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema)
19
14
  end
20
15
 
21
16
  test "can #read" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - joker1007
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-17 00:00:00.000000000 Z
11
+ date: 2019-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -78,14 +78,14 @@ dependencies:
78
78
  requirements:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: '0.10'
81
+ version: '0.12'
82
82
  type: :runtime
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - ">="
87
87
  - !ruby/object:Gem::Version
88
- version: '0.10'
88
+ version: '0.12'
89
89
  - !ruby/object:Gem::Dependency
90
90
  name: red-parquet
91
91
  requirement: !ruby/object:Gem::Requirement
@@ -107,13 +107,15 @@ executables: []
107
107
  extensions: []
108
108
  extra_rdoc_files: []
109
109
  files:
110
+ - ".dockerignore"
110
111
  - ".gitignore"
112
+ - ".travis.yml"
113
+ - Dockerfile
111
114
  - Gemfile
112
115
  - LICENSE
113
116
  - README.md
114
117
  - Rakefile
115
118
  - fluent-plugin-arrow.gemspec
116
- - lib/fluent/plugin/arrow/field_wrapper.rb
117
119
  - lib/fluent/plugin/buf_arrow_file.rb
118
120
  - lib/fluent/plugin/buf_arrow_memory.rb
119
121
  - lib/fluent/plugin/buffer/arrow_buffer_string_builder.rb
@@ -123,7 +125,6 @@ files:
123
125
  - test/helper.rb
124
126
  - test/plugin/test_buf_arrow_memory.rb
125
127
  - test/plugin/test_buffer_arrow_memory_chunk.rb
126
- - test/plugin/test_field_wrapper.rb
127
128
  homepage: https://github.com/joker1007/fluent-plugin-arrow
128
129
  licenses:
129
130
  - Apache-2.0
@@ -143,8 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
143
144
  - !ruby/object:Gem::Version
144
145
  version: '0'
145
146
  requirements: []
146
- rubyforge_project:
147
- rubygems_version: 2.7.8
147
+ rubygems_version: 3.0.1
148
148
  signing_key:
149
149
  specification_version: 4
150
150
  summary: Apache Arrow formatter plugin for fluentd.
@@ -152,4 +152,3 @@ test_files:
152
152
  - test/helper.rb
153
153
  - test/plugin/test_buf_arrow_memory.rb
154
154
  - test/plugin/test_buffer_arrow_memory_chunk.rb
155
- - test/plugin/test_field_wrapper.rb
@@ -1,259 +0,0 @@
1
- require "arrow"
2
-
3
- module Fluent
4
- module Plugin
5
- module Arrow
6
- class FieldWrapper
7
- class << self
8
- def build(field)
9
- case field["type"]
10
- when "string"
11
- StringFieldWrapper.new(field)
12
- when "int", "int8", "int16", "int32", "int64", "uint", "uint8", "uint16", "uint32", "uint64"
13
- IntegerFieldWrapper.new(field)
14
- when "float", "double"
15
- FloatFieldWrapper.new(field)
16
- when "boolean"
17
- BooleanFieldWrapper.new(field)
18
- when "date32"
19
- Date32FieldWrapper.new(field)
20
- when "date64"
21
- Date64FieldWrapper.new(field)
22
- when "timestamp"
23
- TimestampFieldWrapper.new(field)
24
- when "list"
25
- ListFieldWrapper.new(field)
26
- when "struct"
27
- StructFieldWrapper.new(field)
28
- else
29
- raise "Unsupported data type"
30
- end
31
- end
32
- end
33
-
34
- attr_reader :field, :name, :type, :children, :arrow_field, :array_builder
35
-
36
- def initialize(field)
37
- @field = field
38
- @name = field["name"]
39
- @type = field["type"]
40
- @children = []
41
-
42
- field["value_type"]&.tap do |f|
43
- @children << self.class.build(f)
44
- end
45
-
46
- field["fields"]&.each do |f|
47
- @children << self.class.build(f)
48
- end
49
-
50
- create_arrow_field
51
- create_array_builder
52
- end
53
-
54
- def append(value)
55
- if value.nil?
56
- @array_builder.append_null
57
- else
58
- @array_builder.append(cast_value(value))
59
- end
60
- end
61
-
62
- def finish
63
- @array_builder.finish
64
- end
65
-
66
- def create_arrow_field
67
- @arrow_field = ::Arrow::Field.new(name, create_arrow_data_type)
68
- end
69
-
70
- def create_arrow_data_type
71
- data_type_name = type.to_s.capitalize.gsub(/\AUint/, "UInt")
72
- data_type_class_name = "#{data_type_name}DataType"
73
- data_type_class = ::Arrow.const_get(data_type_class_name)
74
- data_type_class.new
75
- end
76
-
77
- def create_array_builder(from_parent = nil)
78
- if from_parent
79
- @array_builder = from_parent
80
- else
81
- data_type_str = arrow_field.data_type.to_s
82
- data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
83
- array_builder_class_name = "#{data_type_name}ArrayBuilder"
84
- array_builder_class = ::Arrow.const_get(array_builder_class_name)
85
- @array_builder = array_builder_class.new
86
- end
87
- end
88
-
89
- def cast_value(value)
90
- raise NotImplementedError
91
- end
92
- end
93
-
94
- class StringFieldWrapper < FieldWrapper
95
- def cast_value(value)
96
- value.to_s
97
- end
98
- end
99
-
100
- class IntegerFieldWrapper < FieldWrapper
101
- def cast_value(value)
102
- value.to_i
103
- end
104
- end
105
-
106
- class FloatFieldWrapper < FieldWrapper
107
- def cast_value(value)
108
- value.to_f
109
- end
110
- end
111
-
112
- class BooleanFieldWrapper < FieldWrapper
113
- def cast_value(value)
114
- !!value
115
- end
116
- end
117
-
118
- require "date"
119
- class Date32FieldWrapper < FieldWrapper
120
- UNIX_EPOCH = Date.new(1970, 1, 1)
121
- def cast_value(value)
122
- date =
123
- if value.respond_to?(:to_date)
124
- value.to_date
125
- else
126
- Date.parse(value)
127
- end
128
-
129
- (date - UNIX_EPOCH).to_i
130
- end
131
-
132
- def create_array_builder(from_parent = nil)
133
- if from_parent
134
- @array_builder = from_parent
135
- else
136
- @array_builder = ::Arrow::Date32ArrayBuilder.new
137
- end
138
- end
139
- end
140
-
141
- class Date64FieldWrapper < FieldWrapper
142
- UNIX_EPOCH = Date.new(1970, 1, 1)
143
- def cast_value(value)
144
- time =
145
- if value.respond_to?(:to_time)
146
- value.to_time
147
- else
148
- Time.parse(value)
149
- end
150
-
151
- time.to_i * 1_000 + time.usec / 1_000
152
- end
153
-
154
- def create_array_builder(from_parent = nil)
155
- if from_parent
156
- @array_builder = from_parent
157
- else
158
- @array_builder = ::Arrow::Date64ArrayBuilder.new
159
- end
160
- end
161
- end
162
-
163
- require "time"
164
- class TimestampFieldWrapper < FieldWrapper
165
- def cast_value(value)
166
- value =
167
- if value.is_a?(Fluent::EventTime)
168
- Time.at(value, value.usec)
169
- elsif value.respond_to?(:to_time)
170
- value.to_time
171
- elsif value.is_a?(String)
172
- Time.parse(value)
173
- else
174
- value
175
- end
176
-
177
- return value if value.is_a?(Numeric)
178
-
179
- case field["unit"]
180
- when "second"
181
- value.to_i
182
- when "milli"
183
- value.to_i * 1_000 + value.usec / 1_000
184
- when "micro"
185
- value.to_i * 1_000_000 + value.usec
186
- else
187
- value.to_i * 1_000_000_000 + value.nsec
188
- end
189
- end
190
-
191
- def create_arrow_data_type
192
- ::Arrow::TimestampDataType.new(field["unit"].to_sym)
193
- end
194
-
195
- def create_array_builder(from_parent = nil)
196
- if from_parent
197
- @array_builder = from_parent
198
- else
199
- @array_builder = ::Arrow::TimestampArrayBuilder.new(arrow_field.data_type)
200
- end
201
- end
202
- end
203
-
204
- class ListFieldWrapper < FieldWrapper
205
- def append(value)
206
- if value.nil?
207
- @array_builder.append_null
208
- else
209
- @array_builder.append
210
- value.each do |v|
211
- @children[0].append(v)
212
- end
213
- end
214
- end
215
-
216
- def create_arrow_data_type
217
- ::Arrow::ListDataType.new(children[0].arrow_field)
218
- end
219
-
220
- def create_array_builder(from_parent = nil)
221
- if from_parent
222
- @array_builder = from_parent
223
- else
224
- @array_builder = ::Arrow::ListArrayBuilder.new(arrow_field.data_type)
225
- end
226
-
227
- @children.each { |c| c.create_array_builder(@array_builder.value_builder) }
228
- end
229
- end
230
-
231
- class StructFieldWrapper < FieldWrapper
232
- def append(value)
233
- if value.nil?
234
- @array_builder.append_null
235
- else
236
- @array_builder.append
237
- value.each do |k, v|
238
- @children.find { |c| c.name == k }.append(v)
239
- end
240
- end
241
- end
242
-
243
- def create_arrow_data_type
244
- ::Arrow::StructDataType.new(children.map(&:arrow_field))
245
- end
246
-
247
- def create_array_builder(from_parent = nil)
248
- if from_parent
249
- @array_builder = from_parent
250
- else
251
- @array_builder = ::Arrow::StructArrayBuilder.new(arrow_field.data_type)
252
- end
253
-
254
- @children.each_with_index { |c, i| c.create_array_builder(@array_builder.get_field_builder(i)) }
255
- end
256
- end
257
- end
258
- end
259
- end
@@ -1,145 +0,0 @@
1
- require "helper"
2
- require "fluent/plugin/arrow/field_wrapper"
3
-
4
- class ArrowFieldWrapperTest < Test::Unit::TestCase
5
- test ".build (string)" do
6
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "string"})
7
- assert_equal "key1", field_wrapper.name
8
- assert_equal "string", field_wrapper.type
9
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
10
- end
11
-
12
- test ".build (timestamp)" do
13
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
14
- assert_equal "key1", field_wrapper.name
15
- assert_equal "timestamp", field_wrapper.type
16
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
17
- end
18
-
19
- test ".build (list)" do
20
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "list", "value_type" => {"name" => "value", "type" => "string"}})
21
- assert_equal "key1", field_wrapper.name
22
- assert_equal "list", field_wrapper.type
23
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
24
- assert_kind_of Arrow::ListDataType, field_wrapper.arrow_field.data_type
25
- assert_kind_of Arrow::ListArrayBuilder, field_wrapper.array_builder
26
-
27
- assert_equal "value", field_wrapper.children[0].name
28
- assert_equal "string", field_wrapper.children[0].type
29
- assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
30
- assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
31
- assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
32
- end
33
-
34
- test ".build (struct)" do
35
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
36
- {"name" => "foo1", "type" => "string"},
37
- {"name" => "foo2", "type" => "uint64"},
38
- {"name" => "foo3", "type" => "timestamp", "unit" => "milli"},
39
- ]})
40
- assert_equal "key1", field_wrapper.name
41
- assert_equal "struct", field_wrapper.type
42
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
43
- assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
44
- assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
45
-
46
- assert_equal "foo1", field_wrapper.children[0].name
47
- assert_equal "string", field_wrapper.children[0].type
48
- assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
49
- assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
50
- assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
51
-
52
- assert_equal "foo2", field_wrapper.children[1].name
53
- assert_equal "uint64", field_wrapper.children[1].type
54
- assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
55
- assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].arrow_field.data_type
56
- assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].array_builder
57
-
58
- assert_equal "foo3", field_wrapper.children[2].name
59
- assert_equal "timestamp", field_wrapper.children[2].type
60
- assert_kind_of Arrow::Field, field_wrapper.children[2].arrow_field
61
- assert_kind_of Arrow::TimestampDataType, field_wrapper.children[2].arrow_field.data_type
62
- assert_kind_of Arrow::TimestampArrayBuilder, field_wrapper.children[2].array_builder
63
- end
64
-
65
- test ".build (nested)" do
66
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
67
- {"name" => "foo1", "type" => "string"},
68
- {"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
69
- ]})
70
- assert_equal "key1", field_wrapper.name
71
- assert_equal "struct", field_wrapper.type
72
- assert_kind_of Arrow::Field, field_wrapper.arrow_field
73
- assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
74
- assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
75
-
76
- assert_equal "foo1", field_wrapper.children[0].name
77
- assert_equal "string", field_wrapper.children[0].type
78
- assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
79
- assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
80
- assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
81
-
82
- assert_equal "foo2", field_wrapper.children[1].name
83
- assert_equal "list", field_wrapper.children[1].type
84
- assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
85
- assert_kind_of Arrow::ListDataType, field_wrapper.children[1].arrow_field.data_type
86
- assert_kind_of Arrow::ListArrayBuilder, field_wrapper.children[1].array_builder
87
-
88
- assert_equal "value", field_wrapper.children[1].children[0].name
89
- assert_equal "uint64", field_wrapper.children[1].children[0].type
90
- assert_kind_of Arrow::Field, field_wrapper.children[1].children[0].arrow_field
91
- assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].children[0].arrow_field.data_type
92
- assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].children[0].array_builder
93
- end
94
-
95
- test "#append (timestamp)" do
96
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
97
- time = Time.now
98
- field_wrapper.append(time)
99
- timestamp_array = field_wrapper.finish
100
- assert_kind_of Time, timestamp_array[0]
101
- assert_equal time.to_i, timestamp_array[0].to_i
102
- end
103
-
104
- test "#append (date32)" do
105
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date32"})
106
- date = Date.today
107
- field_wrapper.append(date)
108
- date_array = field_wrapper.finish
109
- assert_kind_of Date, date_array[0]
110
- assert_equal date, date_array[0]
111
- end
112
-
113
- test "#append (date64)" do
114
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date64"})
115
- date = Date.today
116
- field_wrapper.append(date)
117
- date_array = field_wrapper.finish
118
- assert_kind_of DateTime, date_array[0]
119
- assert_equal date, date_array[0].to_date
120
- end
121
-
122
- test "#append (nested)" do
123
- field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
124
- {"name" => "foo1", "type" => "string"},
125
- {"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
126
- ]})
127
-
128
- field_wrapper.append({"foo1" => "rec1", "foo2" => [1, 2, 3]})
129
- field_wrapper.append({"foo1" => "rec2", "foo2" => [4, 5]})
130
-
131
- struct_array = field_wrapper.finish
132
- assert_kind_of Arrow::StringArray, struct_array.fields[0]
133
- assert_equal "rec1", struct_array.fields[0][0]
134
- assert_equal "rec2", struct_array.fields[0][1]
135
-
136
- assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(0)
137
- assert_equal 1, struct_array.fields[1].get_value(0)[0]
138
- assert_equal 2, struct_array.fields[1].get_value(0)[1]
139
- assert_equal 3, struct_array.fields[1].get_value(0)[2]
140
-
141
- assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(1)
142
- assert_equal 4, struct_array.fields[1].get_value(1)[0]
143
- assert_equal 5, struct_array.fields[1].get_value(1)[1]
144
- end
145
- end