red-arrow 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/lib/arrow/array-builder.rb +6 -2
- data/lib/arrow/array.rb +6 -2
- data/{test/test-csv-reader.rb → lib/arrow/compression-type.rb} +16 -13
- data/lib/arrow/csv-loader.rb +102 -2
- data/lib/arrow/csv-read-options.rb +25 -0
- data/lib/arrow/data-type.rb +135 -0
- data/lib/arrow/decimal128-array-builder.rb +64 -0
- data/lib/arrow/decimal128-data-type.rb +69 -0
- data/lib/arrow/dense-union-data-type.rb +90 -0
- data/lib/arrow/dictionary-data-type.rb +106 -0
- data/lib/arrow/field-containable.rb +35 -0
- data/lib/arrow/field.rb +92 -8
- data/lib/arrow/file-output-stream.rb +34 -0
- data/lib/arrow/list-array-builder.rb +96 -0
- data/lib/arrow/list-data-type.rb +68 -0
- data/lib/arrow/loader.rb +30 -5
- data/lib/arrow/{csv-reader.rb → path-extension.rb} +19 -28
- data/lib/arrow/record-batch-builder.rb +115 -0
- data/lib/arrow/record-batch.rb +25 -0
- data/lib/arrow/schema.rb +97 -0
- data/lib/arrow/sparse-union-data-type.rb +90 -0
- data/lib/arrow/struct-array-builder.rb +146 -0
- data/lib/arrow/struct-array.rb +34 -0
- data/lib/arrow/struct-data-type.rb +130 -0
- data/lib/arrow/struct.rb +68 -0
- data/lib/arrow/table-loader.rb +65 -25
- data/lib/arrow/table-saver.rb +73 -24
- data/lib/arrow/table.rb +11 -2
- data/lib/arrow/time32-data-type.rb +61 -0
- data/lib/arrow/time64-data-type.rb +61 -0
- data/lib/arrow/timestamp-data-type.rb +57 -0
- data/lib/arrow/version.rb +5 -7
- data/lib/arrow/writable.rb +22 -0
- data/red-arrow.gemspec +8 -4
- data/test/helper.rb +1 -2
- data/test/test-csv-loader.rb +27 -0
- data/test/test-data-type.rb +47 -0
- data/test/test-decimal128-array-builder.rb +95 -0
- data/test/test-decimal128-array.rb +38 -0
- data/test/test-decimal128-data-type.rb +31 -0
- data/test/test-dense-union-data-type.rb +41 -0
- data/test/test-dictionary-data-type.rb +40 -0
- data/test/test-feather.rb +34 -0
- data/test/test-field.rb +71 -0
- data/test/test-file-output-stream.rb +54 -0
- data/test/test-list-array-builder.rb +79 -0
- data/test/test-list-array.rb +32 -0
- data/test/test-list-data-type.rb +43 -0
- data/test/test-record-batch-builder.rb +116 -0
- data/test/test-record-batch.rb +82 -27
- data/test/test-schema.rb +104 -0
- data/test/test-sparse-union-data-type.rb +41 -0
- data/test/test-struct-array-builder.rb +180 -0
- data/test/test-struct-array.rb +60 -15
- data/test/test-struct-data-type.rb +112 -0
- data/test/test-struct.rb +81 -0
- data/test/test-table.rb +165 -29
- data/test/test-time32-data-type.rb +42 -0
- data/test/test-time64-data-type.rb +42 -0
- data/test/test-timestamp-data-type.rb +42 -0
- metadata +99 -10
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
module Arrow
|
|
19
|
+
class ListArrayBuilder
|
|
20
|
+
class << self
|
|
21
|
+
def build(data_type, values)
|
|
22
|
+
builder = new(data_type)
|
|
23
|
+
builder.build(values)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
alias_method :append_value_raw, :append_value
|
|
28
|
+
|
|
29
|
+
# @overload append_value
|
|
30
|
+
#
|
|
31
|
+
# Starts appending a list record. You also need to append list
|
|
32
|
+
# value by {#value_builder}.
|
|
33
|
+
#
|
|
34
|
+
# @overload append_value(list)
|
|
35
|
+
#
|
|
36
|
+
# Appends a list record including list value.
|
|
37
|
+
#
|
|
38
|
+
# @param value [nil, ::Array] The list value of the record.
|
|
39
|
+
#
|
|
40
|
+
# If this is `nil`, the list record is null.
|
|
41
|
+
#
|
|
42
|
+
# If this is `Array`, it's the list value of the record.
|
|
43
|
+
#
|
|
44
|
+
# @since 0.12.0
|
|
45
|
+
def append_value(*args)
|
|
46
|
+
n_args = args.size
|
|
47
|
+
|
|
48
|
+
case n_args
|
|
49
|
+
when 0
|
|
50
|
+
append_value_raw
|
|
51
|
+
when 1
|
|
52
|
+
value = args[0]
|
|
53
|
+
case value
|
|
54
|
+
when nil
|
|
55
|
+
append_null
|
|
56
|
+
when ::Array
|
|
57
|
+
append_value_raw
|
|
58
|
+
@value_builder ||= value_builder
|
|
59
|
+
@value_builder.append_values(value, nil)
|
|
60
|
+
else
|
|
61
|
+
message = "list value must be nil or Array: #{value.inspect}"
|
|
62
|
+
raise ArgumentError, message
|
|
63
|
+
end
|
|
64
|
+
else
|
|
65
|
+
message = "wrong number of arguments (given #{n_args}, expected 0..1)"
|
|
66
|
+
raise ArgumentError, message
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def append_values(lists, is_valids=nil)
|
|
71
|
+
if is_valids
|
|
72
|
+
is_valids.each_with_index do |is_valid, i|
|
|
73
|
+
if is_valid
|
|
74
|
+
append_value(lists[i])
|
|
75
|
+
else
|
|
76
|
+
append_null
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
else
|
|
80
|
+
lists.each do |list|
|
|
81
|
+
append_value(list)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# @since 0.12.0
|
|
87
|
+
def append(*values)
|
|
88
|
+
if values.empty?
|
|
89
|
+
# For backward compatibility
|
|
90
|
+
append_value
|
|
91
|
+
else
|
|
92
|
+
super
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
module Arrow
|
|
19
|
+
class ListDataType
|
|
20
|
+
alias_method :initialize_raw, :initialize
|
|
21
|
+
private :initialize_raw
|
|
22
|
+
|
|
23
|
+
# Creates a new {Arrow::ListDataType}.
|
|
24
|
+
#
|
|
25
|
+
# @overload initialize(field)
|
|
26
|
+
#
|
|
27
|
+
# @param field [Arrow::Field, Hash] The field of the list data
|
|
28
|
+
# type. You can also specify field description by `Hash`.
|
|
29
|
+
#
|
|
30
|
+
# See {Arrow::Field.new} how to specify field description.
|
|
31
|
+
#
|
|
32
|
+
# @example Create a list data type with {Arrow::Field}
|
|
33
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
|
34
|
+
# Arrow::ListDataType.new(visible_field)
|
|
35
|
+
#
|
|
36
|
+
# @example Create a list data type with field description
|
|
37
|
+
# Arrow::ListDataType.new(name: "visible", type: :boolean)
|
|
38
|
+
#
|
|
39
|
+
# @overload initialize(description)
|
|
40
|
+
#
|
|
41
|
+
# @param description [Hash] The description of the list data
|
|
42
|
+
# type. It must have `:field` value.
|
|
43
|
+
#
|
|
44
|
+
# @option description [Arrow::Field, Hash] :field The field of
|
|
45
|
+
# the list data type. You can also specify field description
|
|
46
|
+
# by `Hash`.
|
|
47
|
+
#
|
|
48
|
+
# See {Arrow::Field.new} how to specify field description.
|
|
49
|
+
#
|
|
50
|
+
# @example Create a list data type with {Arrow::Field}
|
|
51
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
|
52
|
+
# Arrow::ListDataType.new(field: visible_field)
|
|
53
|
+
#
|
|
54
|
+
# @example Create a list data type with field description
|
|
55
|
+
# Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
|
|
56
|
+
def initialize(field)
|
|
57
|
+
if field.is_a?(Hash) and field.key?(:field)
|
|
58
|
+
description = field
|
|
59
|
+
field = description[:field]
|
|
60
|
+
end
|
|
61
|
+
if field.is_a?(Hash)
|
|
62
|
+
field_description = field
|
|
63
|
+
field = Field.new(field_description)
|
|
64
|
+
end
|
|
65
|
+
initialize_raw(field)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
data/lib/arrow/loader.rb
CHANGED
|
@@ -35,18 +35,35 @@ module Arrow
|
|
|
35
35
|
require "arrow/array-builder"
|
|
36
36
|
require "arrow/chunked-array"
|
|
37
37
|
require "arrow/column"
|
|
38
|
+
require "arrow/compression-type"
|
|
38
39
|
require "arrow/csv-loader"
|
|
39
|
-
require "arrow/csv-
|
|
40
|
+
require "arrow/csv-read-options"
|
|
41
|
+
require "arrow/data-type"
|
|
40
42
|
require "arrow/date32-array"
|
|
41
43
|
require "arrow/date32-array-builder"
|
|
42
44
|
require "arrow/date64-array"
|
|
43
45
|
require "arrow/date64-array-builder"
|
|
46
|
+
require "arrow/decimal128-array-builder"
|
|
47
|
+
require "arrow/decimal128-data-type"
|
|
48
|
+
require "arrow/dense-union-data-type"
|
|
49
|
+
require "arrow/dictionary-data-type"
|
|
44
50
|
require "arrow/field"
|
|
51
|
+
require "arrow/file-output-stream"
|
|
52
|
+
require "arrow/list-array-builder"
|
|
53
|
+
require "arrow/list-data-type"
|
|
54
|
+
require "arrow/path-extension"
|
|
45
55
|
require "arrow/record"
|
|
46
56
|
require "arrow/record-batch"
|
|
57
|
+
require "arrow/record-batch-builder"
|
|
58
|
+
require "arrow/record-batch-file-reader"
|
|
59
|
+
require "arrow/record-batch-stream-reader"
|
|
47
60
|
require "arrow/rolling-window"
|
|
61
|
+
require "arrow/schema"
|
|
48
62
|
require "arrow/slicer"
|
|
63
|
+
require "arrow/sparse-union-data-type"
|
|
49
64
|
require "arrow/struct-array"
|
|
65
|
+
require "arrow/struct-array-builder"
|
|
66
|
+
require "arrow/struct-data-type"
|
|
50
67
|
require "arrow/table"
|
|
51
68
|
require "arrow/table-formatter"
|
|
52
69
|
require "arrow/table-list-formatter"
|
|
@@ -54,11 +71,12 @@ module Arrow
|
|
|
54
71
|
require "arrow/table-loader"
|
|
55
72
|
require "arrow/table-saver"
|
|
56
73
|
require "arrow/tensor"
|
|
74
|
+
require "arrow/time32-data-type"
|
|
75
|
+
require "arrow/time64-data-type"
|
|
57
76
|
require "arrow/timestamp-array"
|
|
58
77
|
require "arrow/timestamp-array-builder"
|
|
59
|
-
|
|
60
|
-
require "arrow/
|
|
61
|
-
require "arrow/record-batch-stream-reader"
|
|
78
|
+
require "arrow/timestamp-data-type"
|
|
79
|
+
require "arrow/writable"
|
|
62
80
|
end
|
|
63
81
|
|
|
64
82
|
def load_object_info(info)
|
|
@@ -72,6 +90,13 @@ module Arrow
|
|
|
72
90
|
|
|
73
91
|
def load_method_info(info, klass, method_name)
|
|
74
92
|
case klass.name
|
|
93
|
+
when /Builder\z/
|
|
94
|
+
case method_name
|
|
95
|
+
when "append"
|
|
96
|
+
return
|
|
97
|
+
else
|
|
98
|
+
super
|
|
99
|
+
end
|
|
75
100
|
when "Arrow::StringArray"
|
|
76
101
|
case method_name
|
|
77
102
|
when "get_value"
|
|
@@ -87,7 +112,7 @@ module Arrow
|
|
|
87
112
|
end
|
|
88
113
|
super(info, klass, method_name)
|
|
89
114
|
else
|
|
90
|
-
|
|
115
|
+
super
|
|
91
116
|
end
|
|
92
117
|
end
|
|
93
118
|
end
|
|
@@ -15,40 +15,31 @@
|
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
|
-
require "csv"
|
|
19
|
-
|
|
20
18
|
module Arrow
|
|
21
|
-
class
|
|
22
|
-
def initialize(
|
|
23
|
-
@
|
|
19
|
+
class PathExtension
|
|
20
|
+
def initialize(path)
|
|
21
|
+
@path = path
|
|
24
22
|
end
|
|
25
23
|
|
|
26
|
-
def
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
row = row.collect(&:last)
|
|
31
|
-
end
|
|
32
|
-
row.each_with_index do |value, i|
|
|
33
|
-
values = (values_set[i] ||= [])
|
|
34
|
-
values << value
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
return nil if values_set.empty?
|
|
24
|
+
def extract
|
|
25
|
+
basename = ::File.basename(@path)
|
|
26
|
+
components = basename.split(".")
|
|
27
|
+
return {} if components.size == 1
|
|
38
28
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
29
|
+
extension = components.last.downcase
|
|
30
|
+
if components.size > 2
|
|
31
|
+
compression = CompressionType.resolve_extension(extension)
|
|
32
|
+
if compression
|
|
33
|
+
{
|
|
34
|
+
format: components[-2].downcase,
|
|
35
|
+
compression: compression,
|
|
36
|
+
}
|
|
37
|
+
else
|
|
38
|
+
{format: extension}
|
|
39
|
+
end
|
|
44
40
|
else
|
|
45
|
-
|
|
46
|
-
end
|
|
47
|
-
raw_table = {}
|
|
48
|
-
names.each_with_index do |name, i|
|
|
49
|
-
raw_table[name] = arrays[i]
|
|
41
|
+
{format: extension}
|
|
50
42
|
end
|
|
51
|
-
Table.new(raw_table)
|
|
52
43
|
end
|
|
53
44
|
end
|
|
54
45
|
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
module Arrow
|
|
19
|
+
class RecordBatchBuilder
|
|
20
|
+
class << self
|
|
21
|
+
# @since 0.12.0
|
|
22
|
+
def build(schema, data)
|
|
23
|
+
builder = new(schema)
|
|
24
|
+
builder.append(data)
|
|
25
|
+
builder.flush
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
alias_method :initialize_raw, :initialize
|
|
30
|
+
private :initialize_raw
|
|
31
|
+
def initialize(schema)
|
|
32
|
+
unless schema.is_a?(Schema)
|
|
33
|
+
schema = Schema.new(schema)
|
|
34
|
+
end
|
|
35
|
+
initialize_raw(schema)
|
|
36
|
+
@name_to_index = {}
|
|
37
|
+
schema.fields.each_with_index do |field, i|
|
|
38
|
+
@name_to_index[field.name] = i
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @since 0.12.0
|
|
43
|
+
def [](name_or_index)
|
|
44
|
+
case name_or_index
|
|
45
|
+
when String, Symbol
|
|
46
|
+
name = name_or_index
|
|
47
|
+
self[resolve_name(name)]
|
|
48
|
+
else
|
|
49
|
+
index = name_or_index
|
|
50
|
+
column_builders[index]
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# @since 0.12.0
|
|
55
|
+
def append(*values)
|
|
56
|
+
values.each do |value|
|
|
57
|
+
case value
|
|
58
|
+
when Hash
|
|
59
|
+
append_columns(value)
|
|
60
|
+
else
|
|
61
|
+
append_records(value)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# @since 0.12.0
|
|
67
|
+
def append_records(records)
|
|
68
|
+
n = n_fields
|
|
69
|
+
columns = n.times.collect do
|
|
70
|
+
[]
|
|
71
|
+
end
|
|
72
|
+
records.each_with_index do |record, nth_record|
|
|
73
|
+
case record
|
|
74
|
+
when nil
|
|
75
|
+
when Hash
|
|
76
|
+
record.each do |name, value|
|
|
77
|
+
nth_column = resolve_name(name)
|
|
78
|
+
next if nth_column.nil?
|
|
79
|
+
columns[nth_column] << value
|
|
80
|
+
end
|
|
81
|
+
else
|
|
82
|
+
record.each_with_index do |value, nth_column|
|
|
83
|
+
columns[nth_column] << value
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
columns.each do |column|
|
|
87
|
+
column << nil if column.size != (nth_record + 1)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
columns.each_with_index do |column, i|
|
|
91
|
+
self[i].append(*column)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# @since 0.12.0
|
|
96
|
+
def append_columns(columns)
|
|
97
|
+
columns.each do |name, values|
|
|
98
|
+
self[name].append(*values)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
def resolve_name(name)
|
|
104
|
+
@name_to_index[name.to_s]
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# TODO: Make public with good name. Is column_builders good enough?
|
|
108
|
+
# builders? sub_builders?
|
|
109
|
+
def column_builders
|
|
110
|
+
@column_builders ||= n_fields.times.collect do |i|
|
|
111
|
+
get_field(i)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
data/lib/arrow/record-batch.rb
CHANGED
|
@@ -22,6 +22,22 @@ module Arrow
|
|
|
22
22
|
include RecordContainable
|
|
23
23
|
include Enumerable
|
|
24
24
|
|
|
25
|
+
class << self
|
|
26
|
+
def new(*args)
|
|
27
|
+
n_args = args.size
|
|
28
|
+
case n_args
|
|
29
|
+
when 2
|
|
30
|
+
schema, data = args
|
|
31
|
+
RecordBatchBuilder.build(schema, data)
|
|
32
|
+
when 3
|
|
33
|
+
super
|
|
34
|
+
else
|
|
35
|
+
message = "wrong number of arguments (given #{n_args}, expected 2..3)"
|
|
36
|
+
raise ArgumentError, message
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
25
41
|
alias_method :each, :each_record
|
|
26
42
|
|
|
27
43
|
alias_method :columns_raw, :columns
|
|
@@ -29,6 +45,15 @@ module Arrow
|
|
|
29
45
|
@columns ||= columns_raw
|
|
30
46
|
end
|
|
31
47
|
|
|
48
|
+
# Converts the record batch to {Arrow::Table}.
|
|
49
|
+
#
|
|
50
|
+
# @return [Arrow::Table]
|
|
51
|
+
#
|
|
52
|
+
# @since 0.12.0
|
|
53
|
+
def to_table
|
|
54
|
+
Table.new(schema, [self])
|
|
55
|
+
end
|
|
56
|
+
|
|
32
57
|
def respond_to_missing?(name, include_private)
|
|
33
58
|
return true if find_column(name)
|
|
34
59
|
super
|