fluent-plugin-arrow 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.dockerignore +2 -0
- data/.travis.yml +10 -0
- data/Dockerfile +14 -0
- data/README.md +2 -2
- data/fluent-plugin-arrow.gemspec +2 -2
- data/lib/fluent/plugin/buf_arrow_file.rb +3 -8
- data/lib/fluent/plugin/buf_arrow_memory.rb +2 -7
- data/lib/fluent/plugin/buffer/arrow_buffer_string_builder.rb +4 -25
- data/lib/fluent/plugin/buffer/arrow_file_chunk.rb +1 -3
- data/lib/fluent/plugin/buffer/arrow_memory_chunk.rb +1 -3
- data/test/plugin/test_buf_arrow_memory.rb +2 -2
- data/test/plugin/test_buffer_arrow_memory_chunk.rb +1 -6
- metadata +8 -9
- data/lib/fluent/plugin/arrow/field_wrapper.rb +0 -259
- data/test/plugin/test_field_wrapper.rb +0 -145
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 835c64e5e2e22f41142ec79b7ba525e7d47ca2fec9a70bec9daa579af86a6f2b
|
4
|
+
data.tar.gz: 8df48380e8a2f3eab3762ff0b91f88cba787cd0ac46cb1b9f02e69dfe99926eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1fff346a50875d7b554e067bf9b50dc64764059101b30cc9bdbadc515e45ab75841a35170f6339bea1b20d3e8365b8a7b617039757958e9bbf4f90b0c1e28348
|
7
|
+
data.tar.gz: bc034add73f3f31df0a6c1b6c24cea15eef0821f3be1062dbd940104cce178d105065b24b332eddc661faeaba9905bbc68f830c54647b10c68000f1bca4361a3
|
data/.dockerignore
ADDED
data/.travis.yml
ADDED
data/Dockerfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
FROM ruby:2.6-stretch
|
2
|
+
|
3
|
+
RUN \
|
4
|
+
apt update && \
|
5
|
+
apt install -y apt-transport-https lsb-release && \
|
6
|
+
wget -O /usr/share/keyrings/apache-arrow-keyring.gpg \
|
7
|
+
https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-keyring.gpg && \
|
8
|
+
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/apache-arrow-keyring.gpg] https://dl.bintray.com/apache/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/ $(lsb_release --codename --short) main" > \
|
9
|
+
/etc/apt/sources.list.d/apache-arrow.list
|
10
|
+
|
11
|
+
RUN mkdir /app
|
12
|
+
WORKDIR /app
|
13
|
+
COPY . /app
|
14
|
+
RUN apt update && bundle install
|
data/README.md
CHANGED
@@ -49,10 +49,10 @@ You can generate configuration template:
|
|
49
49
|
{"name": "key1", "type": "string"},
|
50
50
|
{"name": "key2", "type": "uint64"},
|
51
51
|
{"name": "key3", "type": "timestamp", "unit": "milli"},
|
52
|
-
{"name": "key4", "type": "list", "
|
52
|
+
{"name": "key4", "type": "list", "field": {"name": "value", "type": "uint64"}},
|
53
53
|
{"name": "key5", "type": "struct", "fields": [
|
54
54
|
{"name": "bar1", "type": "uint64"},
|
55
|
-
{"name": "bar2", "type": "list", "
|
55
|
+
{"name": "bar2", "type": "list", "field": {"name": "value", "type": "string"}}
|
56
56
|
]}
|
57
57
|
]
|
58
58
|
</buffer>
|
data/fluent-plugin-arrow.gemspec
CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |spec|
|
5
5
|
spec.name = "fluent-plugin-arrow"
|
6
|
-
spec.version = "0.0.
|
6
|
+
spec.version = "0.0.4"
|
7
7
|
spec.authors = ["joker1007"]
|
8
8
|
spec.email = ["kakyoin.hierophant@gmail.com"]
|
9
9
|
|
@@ -24,6 +24,6 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "rake", "~> 12.0"
|
25
25
|
spec.add_development_dependency "test-unit", "~> 3.0"
|
26
26
|
spec.add_runtime_dependency "fluentd", [">= 0.14.10", "< 2"]
|
27
|
-
spec.add_runtime_dependency "red-arrow", ">= 0.
|
27
|
+
spec.add_runtime_dependency "red-arrow", ">= 0.12"
|
28
28
|
spec.add_runtime_dependency "red-parquet"
|
29
29
|
end
|
@@ -17,7 +17,6 @@
|
|
17
17
|
require "arrow"
|
18
18
|
require 'fluent/plugin/buf_file'
|
19
19
|
require 'fluent/plugin/buffer/arrow_file_chunk'
|
20
|
-
require 'fluent/plugin/arrow/field_wrapper'
|
21
20
|
|
22
21
|
module Fluent
|
23
22
|
module Plugin
|
@@ -34,11 +33,7 @@ module Fluent
|
|
34
33
|
super
|
35
34
|
|
36
35
|
# [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
|
37
|
-
@
|
38
|
-
h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
|
39
|
-
end
|
40
|
-
|
41
|
-
@arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
|
36
|
+
@arrow_schema = ::Arrow::Schema.new(@schema)
|
42
37
|
end
|
43
38
|
|
44
39
|
def resume
|
@@ -48,9 +43,9 @@ module Fluent
|
|
48
43
|
def generate_chunk(metadata)
|
49
44
|
# FileChunk generates real path with unique_id
|
50
45
|
if @file_permission
|
51
|
-
chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema,
|
46
|
+
chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, perm: @file_permission, chunk_size: @row_group_chunk_size, format: @arrow_format)
|
52
47
|
else
|
53
|
-
chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema,
|
48
|
+
chunk = Fluent::Plugin::Buffer::ArrowFileChunk.new(metadata, @path, :create, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
|
54
49
|
end
|
55
50
|
|
56
51
|
log.debug "Created new chunk", chunk_id: dump_unique_id_hex(chunk.unique_id), metadata: metadata
|
@@ -16,7 +16,6 @@
|
|
16
16
|
require "arrow"
|
17
17
|
require 'fluent/plugin/buffer'
|
18
18
|
require 'fluent/plugin/buffer/arrow_memory_chunk'
|
19
|
-
require 'fluent/plugin/arrow/field_wrapper'
|
20
19
|
|
21
20
|
module Fluent
|
22
21
|
module Plugin
|
@@ -33,11 +32,7 @@ module Fluent
|
|
33
32
|
super
|
34
33
|
|
35
34
|
# [{"name" => foo1, "type" => "uint64"}, {"name" => foo2, "type" => "struct", "fields" => [{"name" => bar1, "type" => "string"}]}
|
36
|
-
@
|
37
|
-
h[field["name"]] = Fluent::Plugin::Arrow::FieldWrapper.build(field)
|
38
|
-
end
|
39
|
-
|
40
|
-
@arrow_schema = ::Arrow::Schema.new(@field_wrappers.values.map(&:arrow_field))
|
35
|
+
@arrow_schema = ::Arrow::Schema.new(@schema)
|
41
36
|
end
|
42
37
|
|
43
38
|
def resume
|
@@ -45,7 +40,7 @@ module Fluent
|
|
45
40
|
end
|
46
41
|
|
47
42
|
def generate_chunk(metadata)
|
48
|
-
Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema,
|
43
|
+
Fluent::Plugin::Buffer::ArrowMemoryChunk.new(metadata, @arrow_schema, chunk_size: @row_group_chunk_size, format: @arrow_format)
|
49
44
|
end
|
50
45
|
end
|
51
46
|
end
|
@@ -10,32 +10,11 @@ module Fluent
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def build_arrow_buffer_string
|
13
|
-
|
14
|
-
each_record do |record|
|
15
|
-
count += 1
|
16
|
-
record.each do |k, v|
|
17
|
-
@field_wrappers[k].append(v)
|
18
|
-
end
|
19
|
-
end
|
13
|
+
record_batch = ::Arrow::RecordBatch.new(@schema, each_record)
|
20
14
|
arrow_buf = ::Arrow::ResizableBuffer.new(bytesize * 1.2)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
Parquet::ArrowFileWriter.open(@schema, output) do |writer|
|
25
|
-
columns = @schema.fields.map do |f|
|
26
|
-
::Arrow::Column.new(f, @field_wrappers[f.name].finish)
|
27
|
-
end
|
28
|
-
table = ::Arrow::Table.new(@schema, columns)
|
29
|
-
writer.write_table(table, @chunk_size)
|
30
|
-
end
|
31
|
-
else
|
32
|
-
::Arrow::RecordBatchFileWriter.open(output, @schema) do |writer|
|
33
|
-
record_batch = ::Arrow::RecordBatch.new(@schema, count, @field_wrappers.values.map(&:finish))
|
34
|
-
writer.write_record_batch(record_batch)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
15
|
+
record_batch.to_table.save(arrow_buf,
|
16
|
+
format: @format,
|
17
|
+
chunk_size: @chunk_size)
|
39
18
|
arrow_buf.data.to_s
|
40
19
|
end
|
41
20
|
end
|
@@ -19,7 +19,6 @@ require 'fluent/msgpack_factory'
|
|
19
19
|
require 'fluent/plugin/buffer/chunk'
|
20
20
|
require 'fluent/plugin/buffer/file_chunk'
|
21
21
|
require 'fluent/plugin/buffer/arrow_buffer_string_builder'
|
22
|
-
require 'fluent/plugin/arrow/field_wrapper'
|
23
22
|
|
24
23
|
module Fluent
|
25
24
|
module Plugin
|
@@ -27,10 +26,9 @@ module Fluent
|
|
27
26
|
class ArrowFileChunk < FileChunk
|
28
27
|
include ArrowBufferStringBuilder
|
29
28
|
|
30
|
-
def initialize(metadata, path, mode, schema,
|
29
|
+
def initialize(metadata, path, mode, schema, perm: system_config.file_permission || FILE_PERMISSION, chunk_size: 1024, format: :arrow)
|
31
30
|
super(metadata, path, mode, perm: perm, compress: :text)
|
32
31
|
@schema = schema
|
33
|
-
@field_wrappers = field_wrappers
|
34
32
|
@chunk_size = chunk_size
|
35
33
|
@format = format
|
36
34
|
end
|
@@ -19,7 +19,6 @@ require 'fluent/msgpack_factory'
|
|
19
19
|
require 'fluent/plugin/buffer/chunk'
|
20
20
|
require 'fluent/plugin/buffer/memory_chunk'
|
21
21
|
require 'fluent/plugin/buffer/arrow_buffer_string_builder'
|
22
|
-
require 'fluent/plugin/arrow/field_wrapper'
|
23
22
|
|
24
23
|
module Fluent
|
25
24
|
module Plugin
|
@@ -27,10 +26,9 @@ module Fluent
|
|
27
26
|
class ArrowMemoryChunk < MemoryChunk
|
28
27
|
include ArrowBufferStringBuilder
|
29
28
|
|
30
|
-
def initialize(metadata, schema,
|
29
|
+
def initialize(metadata, schema, chunk_size: 1024, format: :arrow)
|
31
30
|
super(metadata, compress: :text)
|
32
31
|
@schema = schema
|
33
|
-
@field_wrappers = field_wrappers
|
34
32
|
@chunk_size = chunk_size
|
35
33
|
@format = format
|
36
34
|
end
|
@@ -25,8 +25,8 @@ class ArrowMemoryBufferTest < Test::Unit::TestCase
|
|
25
25
|
{"name": "foo1", "type": "uint64"},
|
26
26
|
{"name": "foo2", "type": "string"},
|
27
27
|
{"name": "foo3", "type": "timestamp", "unit": "milli"},
|
28
|
-
{"name": "foo4", "type": "list", "
|
29
|
-
{"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "
|
28
|
+
{"name": "foo4", "type": "list", "field": {"name": "value", "type": "uint64"}},
|
29
|
+
{"name": "foo5", "type": "struct", "fields": [{"name": "bar1", "type": "uint64"}, {"name": "bar2", "type": "list", "field": {"name": "value", "type": "string"}}]}
|
30
30
|
]
|
31
31
|
]
|
32
32
|
buffer_conf = Fluent::Config.parse(conf, "(test)", "(test_dir)", syntax: :v1)
|
@@ -9,13 +9,8 @@ class ArrowMemoryChunkTest < Test::Unit::TestCase
|
|
9
9
|
::Arrow::Field.new("key2", :double),
|
10
10
|
::Arrow::Field.new("key3", ::Arrow::TimestampDataType.new(:second)),
|
11
11
|
]
|
12
|
-
field_wrappers = {
|
13
|
-
"key1" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "uint64"}),
|
14
|
-
"key2" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "double"}),
|
15
|
-
"key3" => Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "second"}),
|
16
|
-
}
|
17
12
|
@schema = Arrow::Schema.new(@fields)
|
18
|
-
@c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema
|
13
|
+
@c = Fluent::Plugin::Buffer::ArrowMemoryChunk.new(Object.new, @schema)
|
19
14
|
end
|
20
15
|
|
21
16
|
test "can #read" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-arrow
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- joker1007
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -78,14 +78,14 @@ dependencies:
|
|
78
78
|
requirements:
|
79
79
|
- - ">="
|
80
80
|
- !ruby/object:Gem::Version
|
81
|
-
version: '0.
|
81
|
+
version: '0.12'
|
82
82
|
type: :runtime
|
83
83
|
prerelease: false
|
84
84
|
version_requirements: !ruby/object:Gem::Requirement
|
85
85
|
requirements:
|
86
86
|
- - ">="
|
87
87
|
- !ruby/object:Gem::Version
|
88
|
-
version: '0.
|
88
|
+
version: '0.12'
|
89
89
|
- !ruby/object:Gem::Dependency
|
90
90
|
name: red-parquet
|
91
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -107,13 +107,15 @@ executables: []
|
|
107
107
|
extensions: []
|
108
108
|
extra_rdoc_files: []
|
109
109
|
files:
|
110
|
+
- ".dockerignore"
|
110
111
|
- ".gitignore"
|
112
|
+
- ".travis.yml"
|
113
|
+
- Dockerfile
|
111
114
|
- Gemfile
|
112
115
|
- LICENSE
|
113
116
|
- README.md
|
114
117
|
- Rakefile
|
115
118
|
- fluent-plugin-arrow.gemspec
|
116
|
-
- lib/fluent/plugin/arrow/field_wrapper.rb
|
117
119
|
- lib/fluent/plugin/buf_arrow_file.rb
|
118
120
|
- lib/fluent/plugin/buf_arrow_memory.rb
|
119
121
|
- lib/fluent/plugin/buffer/arrow_buffer_string_builder.rb
|
@@ -123,7 +125,6 @@ files:
|
|
123
125
|
- test/helper.rb
|
124
126
|
- test/plugin/test_buf_arrow_memory.rb
|
125
127
|
- test/plugin/test_buffer_arrow_memory_chunk.rb
|
126
|
-
- test/plugin/test_field_wrapper.rb
|
127
128
|
homepage: https://github.com/joker1007/fluent-plugin-arrow
|
128
129
|
licenses:
|
129
130
|
- Apache-2.0
|
@@ -143,8 +144,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
143
144
|
- !ruby/object:Gem::Version
|
144
145
|
version: '0'
|
145
146
|
requirements: []
|
146
|
-
|
147
|
-
rubygems_version: 2.7.8
|
147
|
+
rubygems_version: 3.0.1
|
148
148
|
signing_key:
|
149
149
|
specification_version: 4
|
150
150
|
summary: Apache Arrow formatter plugin for fluentd.
|
@@ -152,4 +152,3 @@ test_files:
|
|
152
152
|
- test/helper.rb
|
153
153
|
- test/plugin/test_buf_arrow_memory.rb
|
154
154
|
- test/plugin/test_buffer_arrow_memory_chunk.rb
|
155
|
-
- test/plugin/test_field_wrapper.rb
|
@@ -1,259 +0,0 @@
|
|
1
|
-
require "arrow"
|
2
|
-
|
3
|
-
module Fluent
|
4
|
-
module Plugin
|
5
|
-
module Arrow
|
6
|
-
class FieldWrapper
|
7
|
-
class << self
|
8
|
-
def build(field)
|
9
|
-
case field["type"]
|
10
|
-
when "string"
|
11
|
-
StringFieldWrapper.new(field)
|
12
|
-
when "int", "int8", "int16", "int32", "int64", "uint", "uint8", "uint16", "uint32", "uint64"
|
13
|
-
IntegerFieldWrapper.new(field)
|
14
|
-
when "float", "double"
|
15
|
-
FloatFieldWrapper.new(field)
|
16
|
-
when "boolean"
|
17
|
-
BooleanFieldWrapper.new(field)
|
18
|
-
when "date32"
|
19
|
-
Date32FieldWrapper.new(field)
|
20
|
-
when "date64"
|
21
|
-
Date64FieldWrapper.new(field)
|
22
|
-
when "timestamp"
|
23
|
-
TimestampFieldWrapper.new(field)
|
24
|
-
when "list"
|
25
|
-
ListFieldWrapper.new(field)
|
26
|
-
when "struct"
|
27
|
-
StructFieldWrapper.new(field)
|
28
|
-
else
|
29
|
-
raise "Unsupported data type"
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
attr_reader :field, :name, :type, :children, :arrow_field, :array_builder
|
35
|
-
|
36
|
-
def initialize(field)
|
37
|
-
@field = field
|
38
|
-
@name = field["name"]
|
39
|
-
@type = field["type"]
|
40
|
-
@children = []
|
41
|
-
|
42
|
-
field["value_type"]&.tap do |f|
|
43
|
-
@children << self.class.build(f)
|
44
|
-
end
|
45
|
-
|
46
|
-
field["fields"]&.each do |f|
|
47
|
-
@children << self.class.build(f)
|
48
|
-
end
|
49
|
-
|
50
|
-
create_arrow_field
|
51
|
-
create_array_builder
|
52
|
-
end
|
53
|
-
|
54
|
-
def append(value)
|
55
|
-
if value.nil?
|
56
|
-
@array_builder.append_null
|
57
|
-
else
|
58
|
-
@array_builder.append(cast_value(value))
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def finish
|
63
|
-
@array_builder.finish
|
64
|
-
end
|
65
|
-
|
66
|
-
def create_arrow_field
|
67
|
-
@arrow_field = ::Arrow::Field.new(name, create_arrow_data_type)
|
68
|
-
end
|
69
|
-
|
70
|
-
def create_arrow_data_type
|
71
|
-
data_type_name = type.to_s.capitalize.gsub(/\AUint/, "UInt")
|
72
|
-
data_type_class_name = "#{data_type_name}DataType"
|
73
|
-
data_type_class = ::Arrow.const_get(data_type_class_name)
|
74
|
-
data_type_class.new
|
75
|
-
end
|
76
|
-
|
77
|
-
def create_array_builder(from_parent = nil)
|
78
|
-
if from_parent
|
79
|
-
@array_builder = from_parent
|
80
|
-
else
|
81
|
-
data_type_str = arrow_field.data_type.to_s
|
82
|
-
data_type_name = data_type_str.capitalize.gsub(/\AUint/, "UInt")
|
83
|
-
array_builder_class_name = "#{data_type_name}ArrayBuilder"
|
84
|
-
array_builder_class = ::Arrow.const_get(array_builder_class_name)
|
85
|
-
@array_builder = array_builder_class.new
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def cast_value(value)
|
90
|
-
raise NotImplementedError
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
class StringFieldWrapper < FieldWrapper
|
95
|
-
def cast_value(value)
|
96
|
-
value.to_s
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
class IntegerFieldWrapper < FieldWrapper
|
101
|
-
def cast_value(value)
|
102
|
-
value.to_i
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
class FloatFieldWrapper < FieldWrapper
|
107
|
-
def cast_value(value)
|
108
|
-
value.to_f
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
class BooleanFieldWrapper < FieldWrapper
|
113
|
-
def cast_value(value)
|
114
|
-
!!value
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
require "date"
|
119
|
-
class Date32FieldWrapper < FieldWrapper
|
120
|
-
UNIX_EPOCH = Date.new(1970, 1, 1)
|
121
|
-
def cast_value(value)
|
122
|
-
date =
|
123
|
-
if value.respond_to?(:to_date)
|
124
|
-
value.to_date
|
125
|
-
else
|
126
|
-
Date.parse(value)
|
127
|
-
end
|
128
|
-
|
129
|
-
(date - UNIX_EPOCH).to_i
|
130
|
-
end
|
131
|
-
|
132
|
-
def create_array_builder(from_parent = nil)
|
133
|
-
if from_parent
|
134
|
-
@array_builder = from_parent
|
135
|
-
else
|
136
|
-
@array_builder = ::Arrow::Date32ArrayBuilder.new
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
class Date64FieldWrapper < FieldWrapper
|
142
|
-
UNIX_EPOCH = Date.new(1970, 1, 1)
|
143
|
-
def cast_value(value)
|
144
|
-
time =
|
145
|
-
if value.respond_to?(:to_time)
|
146
|
-
value.to_time
|
147
|
-
else
|
148
|
-
Time.parse(value)
|
149
|
-
end
|
150
|
-
|
151
|
-
time.to_i * 1_000 + time.usec / 1_000
|
152
|
-
end
|
153
|
-
|
154
|
-
def create_array_builder(from_parent = nil)
|
155
|
-
if from_parent
|
156
|
-
@array_builder = from_parent
|
157
|
-
else
|
158
|
-
@array_builder = ::Arrow::Date64ArrayBuilder.new
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
require "time"
|
164
|
-
class TimestampFieldWrapper < FieldWrapper
|
165
|
-
def cast_value(value)
|
166
|
-
value =
|
167
|
-
if value.is_a?(Fluent::EventTime)
|
168
|
-
Time.at(value, value.usec)
|
169
|
-
elsif value.respond_to?(:to_time)
|
170
|
-
value.to_time
|
171
|
-
elsif value.is_a?(String)
|
172
|
-
Time.parse(value)
|
173
|
-
else
|
174
|
-
value
|
175
|
-
end
|
176
|
-
|
177
|
-
return value if value.is_a?(Numeric)
|
178
|
-
|
179
|
-
case field["unit"]
|
180
|
-
when "second"
|
181
|
-
value.to_i
|
182
|
-
when "milli"
|
183
|
-
value.to_i * 1_000 + value.usec / 1_000
|
184
|
-
when "micro"
|
185
|
-
value.to_i * 1_000_000 + value.usec
|
186
|
-
else
|
187
|
-
value.to_i * 1_000_000_000 + value.nsec
|
188
|
-
end
|
189
|
-
end
|
190
|
-
|
191
|
-
def create_arrow_data_type
|
192
|
-
::Arrow::TimestampDataType.new(field["unit"].to_sym)
|
193
|
-
end
|
194
|
-
|
195
|
-
def create_array_builder(from_parent = nil)
|
196
|
-
if from_parent
|
197
|
-
@array_builder = from_parent
|
198
|
-
else
|
199
|
-
@array_builder = ::Arrow::TimestampArrayBuilder.new(arrow_field.data_type)
|
200
|
-
end
|
201
|
-
end
|
202
|
-
end
|
203
|
-
|
204
|
-
class ListFieldWrapper < FieldWrapper
|
205
|
-
def append(value)
|
206
|
-
if value.nil?
|
207
|
-
@array_builder.append_null
|
208
|
-
else
|
209
|
-
@array_builder.append
|
210
|
-
value.each do |v|
|
211
|
-
@children[0].append(v)
|
212
|
-
end
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
def create_arrow_data_type
|
217
|
-
::Arrow::ListDataType.new(children[0].arrow_field)
|
218
|
-
end
|
219
|
-
|
220
|
-
def create_array_builder(from_parent = nil)
|
221
|
-
if from_parent
|
222
|
-
@array_builder = from_parent
|
223
|
-
else
|
224
|
-
@array_builder = ::Arrow::ListArrayBuilder.new(arrow_field.data_type)
|
225
|
-
end
|
226
|
-
|
227
|
-
@children.each { |c| c.create_array_builder(@array_builder.value_builder) }
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
class StructFieldWrapper < FieldWrapper
|
232
|
-
def append(value)
|
233
|
-
if value.nil?
|
234
|
-
@array_builder.append_null
|
235
|
-
else
|
236
|
-
@array_builder.append
|
237
|
-
value.each do |k, v|
|
238
|
-
@children.find { |c| c.name == k }.append(v)
|
239
|
-
end
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
def create_arrow_data_type
|
244
|
-
::Arrow::StructDataType.new(children.map(&:arrow_field))
|
245
|
-
end
|
246
|
-
|
247
|
-
def create_array_builder(from_parent = nil)
|
248
|
-
if from_parent
|
249
|
-
@array_builder = from_parent
|
250
|
-
else
|
251
|
-
@array_builder = ::Arrow::StructArrayBuilder.new(arrow_field.data_type)
|
252
|
-
end
|
253
|
-
|
254
|
-
@children.each_with_index { |c, i| c.create_array_builder(@array_builder.get_field_builder(i)) }
|
255
|
-
end
|
256
|
-
end
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
@@ -1,145 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
require "fluent/plugin/arrow/field_wrapper"
|
3
|
-
|
4
|
-
class ArrowFieldWrapperTest < Test::Unit::TestCase
|
5
|
-
test ".build (string)" do
|
6
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "string"})
|
7
|
-
assert_equal "key1", field_wrapper.name
|
8
|
-
assert_equal "string", field_wrapper.type
|
9
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
10
|
-
end
|
11
|
-
|
12
|
-
test ".build (timestamp)" do
|
13
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
|
14
|
-
assert_equal "key1", field_wrapper.name
|
15
|
-
assert_equal "timestamp", field_wrapper.type
|
16
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
17
|
-
end
|
18
|
-
|
19
|
-
test ".build (list)" do
|
20
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "list", "value_type" => {"name" => "value", "type" => "string"}})
|
21
|
-
assert_equal "key1", field_wrapper.name
|
22
|
-
assert_equal "list", field_wrapper.type
|
23
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
24
|
-
assert_kind_of Arrow::ListDataType, field_wrapper.arrow_field.data_type
|
25
|
-
assert_kind_of Arrow::ListArrayBuilder, field_wrapper.array_builder
|
26
|
-
|
27
|
-
assert_equal "value", field_wrapper.children[0].name
|
28
|
-
assert_equal "string", field_wrapper.children[0].type
|
29
|
-
assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
|
30
|
-
assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
|
31
|
-
assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
|
32
|
-
end
|
33
|
-
|
34
|
-
test ".build (struct)" do
|
35
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
|
36
|
-
{"name" => "foo1", "type" => "string"},
|
37
|
-
{"name" => "foo2", "type" => "uint64"},
|
38
|
-
{"name" => "foo3", "type" => "timestamp", "unit" => "milli"},
|
39
|
-
]})
|
40
|
-
assert_equal "key1", field_wrapper.name
|
41
|
-
assert_equal "struct", field_wrapper.type
|
42
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
43
|
-
assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
|
44
|
-
assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
|
45
|
-
|
46
|
-
assert_equal "foo1", field_wrapper.children[0].name
|
47
|
-
assert_equal "string", field_wrapper.children[0].type
|
48
|
-
assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
|
49
|
-
assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
|
50
|
-
assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
|
51
|
-
|
52
|
-
assert_equal "foo2", field_wrapper.children[1].name
|
53
|
-
assert_equal "uint64", field_wrapper.children[1].type
|
54
|
-
assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
|
55
|
-
assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].arrow_field.data_type
|
56
|
-
assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].array_builder
|
57
|
-
|
58
|
-
assert_equal "foo3", field_wrapper.children[2].name
|
59
|
-
assert_equal "timestamp", field_wrapper.children[2].type
|
60
|
-
assert_kind_of Arrow::Field, field_wrapper.children[2].arrow_field
|
61
|
-
assert_kind_of Arrow::TimestampDataType, field_wrapper.children[2].arrow_field.data_type
|
62
|
-
assert_kind_of Arrow::TimestampArrayBuilder, field_wrapper.children[2].array_builder
|
63
|
-
end
|
64
|
-
|
65
|
-
test ".build (nested)" do
|
66
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
|
67
|
-
{"name" => "foo1", "type" => "string"},
|
68
|
-
{"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
|
69
|
-
]})
|
70
|
-
assert_equal "key1", field_wrapper.name
|
71
|
-
assert_equal "struct", field_wrapper.type
|
72
|
-
assert_kind_of Arrow::Field, field_wrapper.arrow_field
|
73
|
-
assert_kind_of Arrow::StructDataType, field_wrapper.arrow_field.data_type
|
74
|
-
assert_kind_of Arrow::StructArrayBuilder, field_wrapper.array_builder
|
75
|
-
|
76
|
-
assert_equal "foo1", field_wrapper.children[0].name
|
77
|
-
assert_equal "string", field_wrapper.children[0].type
|
78
|
-
assert_kind_of Arrow::Field, field_wrapper.children[0].arrow_field
|
79
|
-
assert_kind_of Arrow::StringDataType, field_wrapper.children[0].arrow_field.data_type
|
80
|
-
assert_kind_of Arrow::StringArrayBuilder, field_wrapper.children[0].array_builder
|
81
|
-
|
82
|
-
assert_equal "foo2", field_wrapper.children[1].name
|
83
|
-
assert_equal "list", field_wrapper.children[1].type
|
84
|
-
assert_kind_of Arrow::Field, field_wrapper.children[1].arrow_field
|
85
|
-
assert_kind_of Arrow::ListDataType, field_wrapper.children[1].arrow_field.data_type
|
86
|
-
assert_kind_of Arrow::ListArrayBuilder, field_wrapper.children[1].array_builder
|
87
|
-
|
88
|
-
assert_equal "value", field_wrapper.children[1].children[0].name
|
89
|
-
assert_equal "uint64", field_wrapper.children[1].children[0].type
|
90
|
-
assert_kind_of Arrow::Field, field_wrapper.children[1].children[0].arrow_field
|
91
|
-
assert_kind_of Arrow::UInt64DataType, field_wrapper.children[1].children[0].arrow_field.data_type
|
92
|
-
assert_kind_of Arrow::UInt64ArrayBuilder, field_wrapper.children[1].children[0].array_builder
|
93
|
-
end
|
94
|
-
|
95
|
-
test "#append (timestamp)" do
|
96
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "timestamp", "unit" => "nano"})
|
97
|
-
time = Time.now
|
98
|
-
field_wrapper.append(time)
|
99
|
-
timestamp_array = field_wrapper.finish
|
100
|
-
assert_kind_of Time, timestamp_array[0]
|
101
|
-
assert_equal time.to_i, timestamp_array[0].to_i
|
102
|
-
end
|
103
|
-
|
104
|
-
test "#append (date32)" do
|
105
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date32"})
|
106
|
-
date = Date.today
|
107
|
-
field_wrapper.append(date)
|
108
|
-
date_array = field_wrapper.finish
|
109
|
-
assert_kind_of Date, date_array[0]
|
110
|
-
assert_equal date, date_array[0]
|
111
|
-
end
|
112
|
-
|
113
|
-
test "#append (date64)" do
|
114
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "date64"})
|
115
|
-
date = Date.today
|
116
|
-
field_wrapper.append(date)
|
117
|
-
date_array = field_wrapper.finish
|
118
|
-
assert_kind_of DateTime, date_array[0]
|
119
|
-
assert_equal date, date_array[0].to_date
|
120
|
-
end
|
121
|
-
|
122
|
-
test "#append (nested)" do
|
123
|
-
field_wrapper = Fluent::Plugin::Arrow::FieldWrapper.build({"name" => "key1", "type" => "struct", "fields" => [
|
124
|
-
{"name" => "foo1", "type" => "string"},
|
125
|
-
{"name" => "foo2", "type" => "list", "value_type" => {"name" => "value", "type" => "uint64"}},
|
126
|
-
]})
|
127
|
-
|
128
|
-
field_wrapper.append({"foo1" => "rec1", "foo2" => [1, 2, 3]})
|
129
|
-
field_wrapper.append({"foo1" => "rec2", "foo2" => [4, 5]})
|
130
|
-
|
131
|
-
struct_array = field_wrapper.finish
|
132
|
-
assert_kind_of Arrow::StringArray, struct_array.fields[0]
|
133
|
-
assert_equal "rec1", struct_array.fields[0][0]
|
134
|
-
assert_equal "rec2", struct_array.fields[0][1]
|
135
|
-
|
136
|
-
assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(0)
|
137
|
-
assert_equal 1, struct_array.fields[1].get_value(0)[0]
|
138
|
-
assert_equal 2, struct_array.fields[1].get_value(0)[1]
|
139
|
-
assert_equal 3, struct_array.fields[1].get_value(0)[2]
|
140
|
-
|
141
|
-
assert_kind_of Arrow::UInt64Array, struct_array.fields[1].get_value(1)
|
142
|
-
assert_equal 4, struct_array.fields[1].get_value(1)[0]
|
143
|
-
assert_equal 5, struct_array.fields[1].get_value(1)[1]
|
144
|
-
end
|
145
|
-
end
|