fluent-plugin-arrow 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.dockerignore +2 -0
- data/.travis.yml +10 -0
- data/Dockerfile +14 -0
- data/README.md +2 -2
- data/fluent-plugin-arrow.gemspec +2 -2
- data/lib/fluent/plugin/buf_arrow_file.rb +3 -8
- data/lib/fluent/plugin/buf_arrow_memory.rb +2 -7
- data/lib/fluent/plugin/buffer/arrow_buffer_string_builder.rb +4 -25
- data/lib/fluent/plugin/buffer/arrow_file_chunk.rb +1 -3
- data/lib/fluent/plugin/buffer/arrow_memory_chunk.rb +1 -3
- data/test/plugin/test_buf_arrow_memory.rb +2 -2
- data/test/plugin/test_buffer_arrow_memory_chunk.rb +1 -6
- metadata +8 -9
- data/lib/fluent/plugin/arrow/field_wrapper.rb +0 -259
- data/test/plugin/test_field_wrapper.rb +0 -145
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 835c64e5e2e22f41142ec79b7ba525e7d47ca2fec9a70bec9daa579af86a6f2b
|
4
|
+
data.tar.gz: 8df48380e8a2f3eab3762ff0b91f88cba787cd0ac46cb1b9f02e69dfe99926eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1fff346a50875d7b554e067bf9b50dc64764059101b30cc9bdbadc515e45ab75841a35170f6339bea1b20d3e8365b8a7b617039757958e9bbf4f90b0c1e28348
|
7
|
+
data.tar.gz: bc034add73f3f31df0a6c1b6c24cea15eef0821f3be1062dbd940104cce178d105065b24b332eddc661faeaba9905bbc68f830c54647b10c68000f1bca4361a3
|
data/.dockerignore
ADDED
data/.travis.yml
ADDED
data/Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
FROM ruby:2.6-stretch
|
2
|
+
|
3
|
+
RUN \
|
4
|
+
apt update && \
|
5
|
+
apt install -y apt-transport-https lsb-release && \
|
6
|
+
wget -O /usr/share/keyrings/apache-arrow-keyring.gpg \
|
7
|
+
https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-keyring.gpg && \
|
8
|
+
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/apache-arrow-keyring.gpg] https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/ $(lsb_release --codename --short) main" > \
|
9
|
+
/etc/apt/sources.list.d/apache-arrow.list
|
10
|
+
|
11
|
+
RUN mkdir /app
|
12
|
+
WORKDIR /app
|
13
|
+
COPY . /app
|
14
|
+
RUN apt update && bundle install
|
data/README.md
CHANGED
@@ -49,10 +49,10 @@ You can generate configuration template:
|
|
49
49
|
{"name": "key1", "type": "string"},
|
50
50
|
{"name": "key2", "type": "uint64"},
|
51
51
|
{"name": "key3", "type": "timestamp", "unit": "milli"},
|
52
|
-
{"name": "key4", "type": "list", "
|
52
|
+
{"name": "key4", "type": "list", "field": {"name": "value", "type": "uint64"}},
|
53
53
|
{"name": "key5", "type": "struct", "fields": [
|
54
54
|
{"name": "bar1", "type": "uint64"},
|
55
|
-
{"name": "bar2", "type": "list", "
|
55
|
+
{"name": "bar2", "type": "list", "field": {"name": "value", "type": "string"}}
|
56
56
|
]}
|
57
57
|
]
|
58
58
|
</buffer>
|
data/fluent-plugin-arrow.gemspec
CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = "fluent-plugin-arrow"
|
6
|
-
spec.version = "0.0.
|
6
|
+
spec.version = "0.0.4"
|
7
7
|
spec.authors = ["joker1007"]
|
8
8
|
spec.email = ["kakyoin.hierophant@gmail.com"]
|
9
9
|
|
@@ -24,6 +24,6 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "rake", "~> 12.0"
|
25
25
|
spec.add_development_dependency "test-unit", "~> 3.0"
|
26
26
|
spec.add_runtime_dependency "fluentd", [">= 0.14.10", "< 2"]
|
27
|
-
spec.add_runtime_dependency "red-arrow", ">= 0.
|
27
|
+
spec.add_runtime_dependency "red-arrow", ">= 0.12"
|
28
28
|
spec.add_runtime_dependency "red-parquet"
|
29
29
|
end
|
@@ -17,7 +17,6 @@
|
|
17
17
|
require "arrow"
|
18
18
|
require 'fluent/plugin/buf_file'
|
19
19
|
require 'fluent/plugin/buffer/arrow_file_chunk'
|
20
|
-
require 'fluent/plugin/arrow/field_wrapper'
|
21
20
|
|
22
21
|
module Fluent
|
23
22
|
module Plugin
|
@@ -34,11 +33,7 @@ module Fluent
|
|
34
33
|
super
|
35
34
|
|
36
35
|
# [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
|
37
|
-
@
|
38
|
-
h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
|
39
|
-
end
|
40
|
-
|
41
|
-
@arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
|
36
|
+
@arrow_schema = ::Arrow::Schema.new(@schema)
|
42
37
|
end
|
43
38
|
|
44
39
|
def resume
|
@@ -48,9 +43,9 @@ module Fluent
|
|
48
43
|
def generate_chunk(metadata)
|
49
44
|
# FileChunk generates real path with unique_id
|
50
45
|
if @file_permission
|
51
|
-
chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema,
|
46
|
+
chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, perm: @file_permission, chunk_size: @row_group_chunk_size, format: @arrow_format)
|
52
47
|
else
|
53
|
-
chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema,
|
48
|
+
chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
|
54
49
|
end
|
55
50
|
|
56
51
|
log.debug "Created new chunk", chunk_id: dump_unique_id_hex(chunk.unique_id), metadata: metadata
|
@@ -16,7 +16,6 @@
|
|
16
16
|
require "arrow"
|
17
17
|
require 'fluent/plugin/buffer'
|
18
18
|
require 'fluent/plugin/buffer/arrow_memory_chunk'
|
19
|
-
require 'fluent/plugin/arrow/field_wrapper'
|
20
19
|
|
21
20
|
module Fluent
|
22
21
|
module Plugin
|
@@ -33,11 +32,7 @@ module Fluent
|
|
33
32
|
super
|
34
33
|
|
35
34
|
# [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
|
36
|
-
@
|
37
|
-
h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
|
38
|
-
end
|
39
|
-
|
40
|
-
@arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
|
35
|
+
@arrow_schema = ::Arrow::Schema.new(@schema)
|
41
36
|
end
|
42
37
|
|
43
38
|
def resume
|
@@ -45,7 +40,7 @@ module Fluent
|
|
45
40
|
end
|
46
41
|
|
47
42
|
def generate_chunk(metadata)
|
48
|
-
Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema,
|
43
|
+
Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
|
49
44
|
end
|
50
45
|
end
|
51
46
|
end
|
@@ -10,32 +10,11 @@ module Fluent
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def build_arrow_buffer_string
|
13
|
-
|
14
|
-
each_record do |record|
|
15
|
-
count += 1
|
16
|
-
record.each do |k, v|
|
17
|
-
@field_wrappers[k].append(v)
|
18
|
-
end
|
19
|
-
end
|
13
|
+
record_batch = ::Arrow::RecordBatch.new(@schema, each_record)
|
20
14
|
arrow_buf = ::Arrow::ResizableBuffer.new(bytesize * 1.2)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
Parquet::ArrowFileWriter.open(@schema, output) do |writer|
|
25
|
-
columns = @schema.fields.map do |f|
|
26
|
-
::Arrow::Column.new(f, @field_wrappers[f.name].finish)
|
27
|
-
end
|
28
|
-
table = ::Arrow::Table.new(@schema, columns)
|
29
|
-
writer.write_table(table, @chunk_size)
|
30
|
-
end
|
31
|
-
else
|
32
|
-
::Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
|
33
|
-
record_batch = ::Arrow::RecordBatch.new(@schema, count, @field_wrappers.values.map(&:finish))
|
34
|
-
writer.write_record_batch(record_batch)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
15
|
+
record_batch.to_table.save(arrow_buf,
|
16
|
+
format: @format,
|
17
|
+
chunk_size: @chunk_size)
|
39
18
|
arrow_buf.data.to_s
|
40
19
|
end
|
41
20
|
end
|
@@ -19,7 +19,6 @@ require 'fluent/msgpack_factory'
|
|
19
19
|
require 'fluent/plugin/buffer/chunk'
|
20
20
|
require 'fluent/plugin/buffer/file_chunk'
|
21
21
|
require 'fluent/plugin/buffer/arrow_buffer_string_builder'
|
22
|
-
require 'fluent/plugin/arrow/field_wrapper'
|
23
22
|
|
24
23
|
module Fluent
|
25
24
|
module Plugin
|
@@ -27,10 +26,9 @@ module Fluent
|
|
27
26
|
class ArrowFileChunk < FileChunk
|
28
27
|
include ArrowBufferStringBuilder
|
29
28
|
|
30
|
-
def initialize(metadata, path, mode, schema,
|
29
|
+
def initialize(metadata, path, mode, schema, perm: system_config.file_permission || FILE_PERMISSION, chunk_size: 1024, format: :arrow)
|
31
30
|
super(metadata, path, mode, perm: perm, compress: :text)
|
32
31
|
@schema = schema
|
33
|
-
@field_wrappers = field_wrappers
|
34
32
|
@chunk_size = chunk_size
|
35
33
|
@format = format
|
36
34
|
end
|
@@ -19,7 +19,6 @@ require 'fluent/msgpack_factory'
|
|
19
19
|
require 'fluent/plugin/buffer/chunk'
|
20
20
|
require 'fluent/plugin/buffer/memory_chunk'
|
21
21
|
require 'fluent/plugin/buffer/arrow_buffer_string_builder'
|
22
|
-
require 'fluent/plugin/arrow/field_wrapper'
|
23
22
|
|
24
23
|
module Fluent
|
25
24
|
module Plugin
|
@@ -27,10 +26,9 @@ module Fluent
|
|
27
26
|
class ArrowMemoryChunk < MemoryChunk
|
28
27
|
include ArrowBufferStringBuilder
|
29
28
|
|
30
|
-
def initialize(metadata, schema,
|
29
|
+
def initialize(metadata, schema, chunk_size: 1024, format: :arrow)
|
31
30
|
super(metadata, compress: :text)
|
32
31
|
@schema = schema
|
33
|
-
@field_wrappers = field_wrappers
|
34
32
|
@chunk_size = chunk_size
|
35
33
|
@format = format
|
36
34
|
end
|
@@ -25,8 +25,8 @@ class ArrowMemoryBufferTest < Test::Unit::TestCase
|
|
25
25
|
{"name": "foo1", "type": "uint64"},
|
26
26
|
{"name": "foo2", "type": "string"},
|
27
27
|
{"name": "foo3", "type": "timestamp", "unit": "milli"},
|
28
|
-
{"name": "foo4", "type": "list", "
|
29
|
-
{"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "
|
28
|
+
{"name": "foo4", "type": "list", "field": {"name": "value", "type": "uint64"}},
|
29
|
+
{"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "field": {"name": "value", "type": "string"}}]}
|
30
30
|
]
|
31
31
|
]
|
32
32
|
buffer_conf = Fluent::Config.parse(conf, "(test)", "(test_dir)", syntax: :v1)
|
@@ -9,13 +9,8 @@ class ArrowMemoryChunkTest < Test::Unit::TestCase
|
|
9
9
|
::Arrow::Field.new("key2", :double),
|
10
10
|
::Arrow::Field.new("key3", ::Arrow::TimestampDataType.new(:second)),
|
11
11
|
]
|
12
|
-
field_wrappers = {
|
13
|
-
"key1" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "uint64"}),
|
14
|
-
"key2" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "double"}),
|
15
|
-
"key3" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "second"}),
|
16
|
-
}
|
17
12
|
@schema = Arrow::Schema.new(@fields)
|
18
|
-
@c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema
|
13
|
+
@c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema)
|
19
14
|
end
|
20
15
|
|
21
16
|
test "can #read" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-arrow
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -78,14 +78,14 @@ dependencies:
|
|
78
78
|
requirements:
|
79
79
|
- - ">="
|
80
80
|
- !ruby/object:Gem::Version
|
81
|
-
version: '0.
|
81
|
+
version: '0.12'
|
82
82
|
type: :runtime
|
83
83
|
prerelease: false
|
84
84
|
version_requirements: !ruby/object:Gem::Requirement
|
85
85
|
requirements:
|
86
86
|
- - ">="
|
87
87
|
- !ruby/object:Gem::Version
|
88
|
-
version: '0.
|
88
|
+
version: '0.12'
|
89
89
|
- !ruby/object:Gem::Dependency
|
90
90
|
name: red-parquet
|
91
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -107,13 +107,15 @@ executables: []
|
|
107
107
|
extensions: []
|
108
108
|
extra_rdoc_files: []
|
109
109
|
files:
|
110
|
+
- ".dockerignore"
|
110
111
|
- ".gitignore"
|
112
|
+
- ".travis.yml"
|
113
|
+
- Dockerfile
|
111
114
|
- Gemfile
|
112
115
|
- LICENSE
|
113
116
|
- README.md
|
114
117
|
- Rakefile
|
115
118
|
- fluent-plugin-arrow.gemspec
|
116
|
-
- lib/fluent/plugin/arrow/field_wrapper.rb
|
117
119
|
- lib/fluent/plugin/buf_arrow_file.rb
|
118
120
|
- lib/fluent/plugin/buf_arrow_memory.rb
|
119
121
|
- lib/fluent/plugin/buffer/arrow_buffer_string_builder.rb
|
@@ -123,7 +125,6 @@ files:
|
|
123
125
|
- test/helper.rb
|
124
126
|
- test/plugin/test_buf_arrow_memory.rb
|
125
127
|
- test/plugin/test_buffer_arrow_memory_chunk.rb
|
126
|
-
- test/plugin/test_field_wrapper.rb
|
127
128
|
homepage: https://github.com/joker1007/fluent-plugin-arrow
|
128
129
|
licenses:
|
129
130
|
- Apache-2.0
|
@@ -143,8 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
143
144
|
- !ruby/object:Gem::Version
|
144
145
|
version: '0'
|
145
146
|
requirements: []
|
146
|
-
|
147
|
-
rubygems_version: 2.7.8
|
147
|
+
rubygems_version: 3.0.1
|
148
148
|
signing_key:
|
149
149
|
specification_version: 4
|
150
150
|
summary: Apache Arrow formatter plugin for fluentd.
|
@@ -152,4 +152,3 @@ test_files:
|
|
152
152
|
- test/helper.rb
|
153
153
|
- test/plugin/test_buf_arrow_memory.rb
|
154
154
|
- test/plugin/test_buffer_arrow_memory_chunk.rb
|
155
|
-
- test/plugin/test_field_wrapper.rb
|
@@ -1,259 +0,0 @@
|
|
1
|
-
require "arrow"
|
2
|
-
|
3
|
-
module Fluent
|
4
|
-
module Plugin
|
5
|
-
module Arrow
|
6
|
-
class FieldWrapper
|
7
|
-
class << self
|
8
|
-
def build(field)
|
9
|
-
case field["type"]
|
10
|
-
when "string"
|
11
|
-
StringFieldWrapper.new(field)
|
12
|
-
when "int", "int8", "int16", "int32", "int64", "uint", "uint8", "uint16", "uint32", "uint64"
|
13
|
-
IntegerFieldWrapper.new(field)
|
14
|
-
when "float", "double"
|
15
|
-
FloatFieldWrapper.new(field)
|
16
|
-
when "boolean"
|
17
|
-
BooleanFieldWrapper.new(field)
|
18
|
-
when "date32"
|
19
|
-
Date32FieldWrapper.new(field)
|
20
|
-
when "date64"
|
21
|
-
Date64FieldWrapper.new(field)
|
22
|
-
when "timestamp"
|
23
|
-
TimestampFieldWrapper.new(field)
|
24
|
-
when "list"
|
25
|
-
ListFieldWrapper.new(field)
|
26
|
-
when "struct"
|
27
|
-
StructFieldWrapper.new(field)
|
28
|
-
else
|
29
|
-
raise "Unsupported data type"
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
attr_reader :field, :name, :type, :children, :arrow_field, :array_builder
|
35
|
-
|
36
|
-
def initialize(field)
|
37
|
-
@field = field
|
38
|
-
@name = field["name"]
|
39
|
-
@type = field["type"]
|
40
|
-
@children = []
|
41
|
-
|
42
|
-
field["value_type"]&.tap do |f|
|
43
|
-
@children << self.class.build(f)
|
44
|
-
end
|
45
|
-
|
46
|
-
field["fields"]&.each do |f|
|
47
|
-
@children << self.class.build(f)
|
48
|
-
end
|
49
|
-
|
50
|
-
create_arrow_field
|
51
|
-
create_array_builder
|
52
|
-
end
|
53
|
-
|
54
|
-
def append(value)
|
55
|
-
if value.nil?
|
56
|
-
@array_builder.append_null
|
57
|
-
else
|
58
|
-
@array_builder.append(cast_value(value))
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def finish
|
63
|
-
@array_builder.finish
|
64
|
-
end
|
65
|
-
|
66
|
-
def create_arrow_field
|
67
|
-
@arrow_field = ::Arrow::Field.new(name, create_arrow_data_type)
|
68
|
-
end
|
69
|
-
|
70
|
-
def create_arrow_data_type
|
71
|
-
data_type_name = type.to_s.capitalize.gsub(/\AUint/, "UInt")
|
72
|
-
data_type_class_name = "#{data_type_name}DataType"
|
73
|
-
data_type_class = ::Arrow.const_get(data_type_class_name)
|
74
|
-
data_type_class.new
|
75
|
-
end
|
76
|
-
|
77
|
-
def create_array_builder(from_parent = nil)
|
78
|
-
if from_parent
|
79
|
-
@array_builder = from_parent
|
80
|
-
else
|
81
|
-
data_type_str = arrow_field.data_type.to_s
|
82
|
-
data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
|
83
|
-
array_builder_class_name = "#{data_type_name}ArrayBuilder"
|
84
|
-
array_builder_class = ::Arrow.const_get(array_builder_class_name)
|
85
|
-
@array_builder = array_builder_class.new
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def cast_value(value)
|
90
|
-
raise NotImplementedError
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
class StringFieldWrapper < FieldWrapper
|
95
|
-
def cast_value(value)
|
96
|
-
value.to_s
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
class IntegerFieldWrapper < FieldWrapper
|
101
|
-
def cast_value(value)
|
102
|
-
value.to_i
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
class FloatFieldWrapper < FieldWrapper
|
107
|
-
def cast_value(value)
|
108
|
-
value.to_f
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
class BooleanFieldWrapper < FieldWrapper
|
113
|
-
def cast_value(value)
|
114
|
-
!!value
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
require "date"
|
119
|
-
class Date32FieldWrapper < FieldWrapper
|
120
|
-
UNIX_EPOCH = Date.new(1970, 1, 1)
|
121
|
-
def cast_value(value)
|
122
|
-
date =
|
123
|
-
if value.respond_to?(:to_date)
|
124
|
-
value.to_date
|
125
|
-
else
|
126
|
-
Date.parse(value)
|
127
|
-
end
|
128
|
-
|
129
|
-
(date - UNIX_EPOCH).to_i
|
130
|
-
end
|
131
|
-
|
132
|
-
def create_array_builder(from_parent = nil)
|
133
|
-
if from_parent
|
134
|
-
@array_builder = from_parent
|
135
|
-
else
|
136
|
-
@array_builder = ::Arrow::Date32ArrayBuilder.new
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
class Date64FieldWrapper < FieldWrapper
|
142
|
-
UNIX_EPOCH = Date.new(1970, 1, 1)
|
143
|
-
def cast_value(value)
|
144
|
-
time =
|
145
|
-
if value.respond_to?(:to_time)
|
146
|
-
value.to_time
|
147
|
-
else
|
148
|
-
Time.parse(value)
|
149
|
-
end
|
150
|
-
|
151
|
-
time.to_i * 1_000 + time.usec / 1_000
|
152
|
-
end
|
153
|
-
|
154
|
-
def create_array_builder(from_parent = nil)
|
155
|
-
if from_parent
|
156
|
-
@array_builder = from_parent
|
157
|
-
else
|
158
|
-
@array_builder = ::Arrow::Date64ArrayBuilder.new
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
require "time"
|
164
|
-
class TimestampFieldWrapper < FieldWrapper
|
165
|
-
def cast_value(value)
|
166
|
-
value =
|
167
|
-
if value.is_a?(Fluent::EventTime)
|
168
|
-
Time.at(value, value.usec)
|
169
|
-
elsif value.respond_to?(:to_time)
|
170
|
-
value.to_time
|
171
|
-
elsif value.is_a?(String)
|
172
|
-
Time.parse(value)
|
173
|
-
else
|
174
|
-
value
|
175
|
-
end
|
176
|
-
|
177
|
-
return value if value.is_a?(Numeric)
|
178
|
-
|
179
|
-
case field["unit"]
|
180
|
-
when "second"
|
181
|
-
value.to_i
|
182
|
-
when "milli"
|
183
|
-
value.to_i * 1_000 + value.usec / 1_000
|
184
|
-
when "micro"
|
185
|
-
value.to_i * 1_000_000 + value.usec
|
186
|
-
else
|
187
|
-
value.to_i * 1_000_000_000 + value.nsec
|
188
|
-
end
|
189
|
-
end
|
190
|
-
|
191
|
-
def create_arrow_data_type
|
192
|
-
::Arrow::TimestampDataType.new(field["unit"].to_sym)
|
193
|
-
end
|
194
|
-
|
195
|
-
def create_array_builder(from_parent = nil)
|
196
|
-
if from_parent
|
197
|
-
@array_builder = from_parent
|
198
|
-
else
|
199
|
-
@array_builder = ::Arrow::TimestampArrayBuilder.new(arrow_field.data_type)
|
200
|
-
end
|
201
|
-
end
|
202
|
-
end
|
203
|
-
|
204
|
-
class ListFieldWrapper < FieldWrapper
|
205
|
-
def append(value)
|
206
|
-
if value.nil?
|
207
|
-
@array_builder.append_null
|
208
|
-
else
|
209
|
-
@array_builder.append
|
210
|
-
value.each do |v|
|
211
|
-
@children[0].append(v)
|
212
|
-
end
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
def create_arrow_data_type
|
217
|
-
::Arrow::ListDataType.new(children[0].arrow_field)
|
218
|
-
end
|
219
|
-
|
220
|
-
def create_array_builder(from_parent = nil)
|
221
|
-
if from_parent
|
222
|
-
@array_builder = from_parent
|
223
|
-
else
|
224
|
-
@array_builder = ::Arrow::ListArrayBuilder.new(arrow_field.data_type)
|
225
|
-
end
|
226
|
-
|
227
|
-
@children.each { |c| c.create_array_builder(@array_builder.value_builder) }
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
class StructFieldWrapper < FieldWrapper
|
232
|
-
def append(value)
|
233
|
-
if value.nil?
|
234
|
-
@array_builder.append_null
|
235
|
-
else
|
236
|
-
@array_builder.append
|
237
|
-
value.each do |k, v|
|
238
|
-
@children.find { |c| c.name == k }.append(v)
|
239
|
-
end
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
def create_arrow_data_type
|
244
|
-
::Arrow::StructDataType.new(children.map(&:arrow_field))
|
245
|
-
end
|
246
|
-
|
247
|
-
def create_array_builder(from_parent = nil)
|
248
|
-
if from_parent
|
249
|
-
@array_builder = from_parent
|
250
|
-
else
|
251
|
-
@array_builder = ::Arrow::StructArrayBuilder.new(arrow_field.data_type)
|
252
|
-
end
|
253
|
-
|
254
|
-
@children.each_with_index { |c, i| c.create_array_builder(@array_builder.get_field_builder(i)) }
|
255
|
-
end
|
256
|
-
end
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
@@ -1,145 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
require "fluent/plugin/arrow/field_wrapper"
|
3
|
-
|
4
|
-
class ArrowFieldWrapperTest < Test::Unit::TestCase
|
5
|
-
test ".build (string)" do
|
6
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "string"})
|
7
|
-
assert_equal "key1", field_wrapper.name
|
8
|
-
assert_equal "string", field_wrapper.type
|
9
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
10
|
-
end
|
11
|
-
|
12
|
-
test ".build (timestamp)" do
|
13
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
|
14
|
-
assert_equal "key1", field_wrapper.name
|
15
|
-
assert_equal "timestamp", field_wrapper.type
|
16
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
17
|
-
end
|
18
|
-
|
19
|
-
test ".build (list)" do
|
20
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "list", "value_type" => {"name" => "value", "type" => "string"}})
|
21
|
-
assert_equal "key1", field_wrapper.name
|
22
|
-
assert_equal "list", field_wrapper.type
|
23
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
24
|
-
assert_kind_of Arrow::ListDataType, field_wrapper.arrow_field.data_type
|
25
|
-
assert_kind_of Arrow::ListArrayBuilder, field_wrapper.array_builder
|
26
|
-
|
27
|
-
assert_equal "value", field_wrapper.children[0].name
|
28
|
-
assert_equal "string", field_wrapper.children[0].type
|
29
|
-
assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
|
30
|
-
assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
|
31
|
-
assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
|
32
|
-
end
|
33
|
-
|
34
|
-
test ".build (struct)" do
|
35
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
|
36
|
-
{"name" => "foo1", "type" => "string"},
|
37
|
-
{"name" => "foo2", "type" => "uint64"},
|
38
|
-
{"name" => "foo3", "type" => "timestamp", "unit" => "milli"},
|
39
|
-
]})
|
40
|
-
assert_equal "key1", field_wrapper.name
|
41
|
-
assert_equal "struct", field_wrapper.type
|
42
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
43
|
-
assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
|
44
|
-
assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
|
45
|
-
|
46
|
-
assert_equal "foo1", field_wrapper.children[0].name
|
47
|
-
assert_equal "string", field_wrapper.children[0].type
|
48
|
-
assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
|
49
|
-
assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
|
50
|
-
assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
|
51
|
-
|
52
|
-
assert_equal "foo2", field_wrapper.children[1].name
|
53
|
-
assert_equal "uint64", field_wrapper.children[1].type
|
54
|
-
assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
|
55
|
-
assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].arrow_field.data_type
|
56
|
-
assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].array_builder
|
57
|
-
|
58
|
-
assert_equal "foo3", field_wrapper.children[2].name
|
59
|
-
assert_equal "timestamp", field_wrapper.children[2].type
|
60
|
-
assert_kind_of Arrow::Field, field_wrapper.children[2].arrow_field
|
61
|
-
assert_kind_of Arrow::TimestampDataType, field_wrapper.children[2].arrow_field.data_type
|
62
|
-
assert_kind_of Arrow::TimestampArrayBuilder, field_wrapper.children[2].array_builder
|
63
|
-
end
|
64
|
-
|
65
|
-
test ".build (nested)" do
|
66
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
|
67
|
-
{"name" => "foo1", "type" => "string"},
|
68
|
-
{"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
|
69
|
-
]})
|
70
|
-
assert_equal "key1", field_wrapper.name
|
71
|
-
assert_equal "struct", field_wrapper.type
|
72
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
73
|
-
assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
|
74
|
-
assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
|
75
|
-
|
76
|
-
assert_equal "foo1", field_wrapper.children[0].name
|
77
|
-
assert_equal "string", field_wrapper.children[0].type
|
78
|
-
assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
|
79
|
-
assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
|
80
|
-
assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
|
81
|
-
|
82
|
-
assert_equal "foo2", field_wrapper.children[1].name
|
83
|
-
assert_equal "list", field_wrapper.children[1].type
|
84
|
-
assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
|
85
|
-
assert_kind_of Arrow::ListDataType, field_wrapper.children[1].arrow_field.data_type
|
86
|
-
assert_kind_of Arrow::ListArrayBuilder, field_wrapper.children[1].array_builder
|
87
|
-
|
88
|
-
assert_equal "value", field_wrapper.children[1].children[0].name
|
89
|
-
assert_equal "uint64", field_wrapper.children[1].children[0].type
|
90
|
-
assert_kind_of Arrow::Field, field_wrapper.children[1].children[0].arrow_field
|
91
|
-
assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].children[0].arrow_field.data_type
|
92
|
-
assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].children[0].array_builder
|
93
|
-
end
|
94
|
-
|
95
|
-
test "#append (timestamp)" do
|
96
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
|
97
|
-
time = Time.now
|
98
|
-
field_wrapper.append(time)
|
99
|
-
timestamp_array = field_wrapper.finish
|
100
|
-
assert_kind_of Time, timestamp_array[0]
|
101
|
-
assert_equal time.to_i, timestamp_array[0].to_i
|
102
|
-
end
|
103
|
-
|
104
|
-
test "#append (date32)" do
|
105
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date32"})
|
106
|
-
date = Date.today
|
107
|
-
field_wrapper.append(date)
|
108
|
-
date_array = field_wrapper.finish
|
109
|
-
assert_kind_of Date, date_array[0]
|
110
|
-
assert_equal date, date_array[0]
|
111
|
-
end
|
112
|
-
|
113
|
-
test "#append (date64)" do
|
114
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date64"})
|
115
|
-
date = Date.today
|
116
|
-
field_wrapper.append(date)
|
117
|
-
date_array = field_wrapper.finish
|
118
|
-
assert_kind_of DateTime, date_array[0]
|
119
|
-
assert_equal date, date_array[0].to_date
|
120
|
-
end
|
121
|
-
|
122
|
-
test "#append (nested)" do
|
123
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
|
124
|
-
{"name" => "foo1", "type" => "string"},
|
125
|
-
{"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
|
126
|
-
]})
|
127
|
-
|
128
|
-
field_wrapper.append({"foo1" => "rec1", "foo2" => [1, 2, 3]})
|
129
|
-
field_wrapper.append({"foo1" => "rec2", "foo2" => [4, 5]})
|
130
|
-
|
131
|
-
struct_array = field_wrapper.finish
|
132
|
-
assert_kind_of Arrow::StringArray, struct_array.fields[0]
|
133
|
-
assert_equal "rec1", struct_array.fields[0][0]
|
134
|
-
assert_equal "rec2", struct_array.fields[0][1]
|
135
|
-
|
136
|
-
assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(0)
|
137
|
-
assert_equal 1, struct_array.fields[1].get_value(0)[0]
|
138
|
-
assert_equal 2, struct_array.fields[1].get_value(0)[1]
|
139
|
-
assert_equal 3, struct_array.fields[1].get_value(0)[2]
|
140
|
-
|
141
|
-
assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(1)
|
142
|
-
assert_equal 4, struct_array.fields[1].get_value(1)[0]
|
143
|
-
assert_equal 5, struct_array.fields[1].get_value(1)[1]
|
144
|
-
end
|
145
|
-
end
|