red-arrow 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +4 -0
- data/lib/arrow/array-builder.rb +6 -2
- data/lib/arrow/array.rb +6 -2
- data/{test/test-csv-reader.rb → lib/arrow/compression-type.rb} +16 -13
- data/lib/arrow/csv-loader.rb +102 -2
- data/lib/arrow/csv-read-options.rb +25 -0
- data/lib/arrow/data-type.rb +135 -0
- data/lib/arrow/decimal128-array-builder.rb +64 -0
- data/lib/arrow/decimal128-data-type.rb +69 -0
- data/lib/arrow/dense-union-data-type.rb +90 -0
- data/lib/arrow/dictionary-data-type.rb +106 -0
- data/lib/arrow/field-containable.rb +35 -0
- data/lib/arrow/field.rb +92 -8
- data/lib/arrow/file-output-stream.rb +34 -0
- data/lib/arrow/list-array-builder.rb +96 -0
- data/lib/arrow/list-data-type.rb +68 -0
- data/lib/arrow/loader.rb +30 -5
- data/lib/arrow/{csv-reader.rb → path-extension.rb} +19 -28
- data/lib/arrow/record-batch-builder.rb +115 -0
- data/lib/arrow/record-batch.rb +25 -0
- data/lib/arrow/schema.rb +97 -0
- data/lib/arrow/sparse-union-data-type.rb +90 -0
- data/lib/arrow/struct-array-builder.rb +146 -0
- data/lib/arrow/struct-array.rb +34 -0
- data/lib/arrow/struct-data-type.rb +130 -0
- data/lib/arrow/struct.rb +68 -0
- data/lib/arrow/table-loader.rb +65 -25
- data/lib/arrow/table-saver.rb +73 -24
- data/lib/arrow/table.rb +11 -2
- data/lib/arrow/time32-data-type.rb +61 -0
- data/lib/arrow/time64-data-type.rb +61 -0
- data/lib/arrow/timestamp-data-type.rb +57 -0
- data/lib/arrow/version.rb +5 -7
- data/lib/arrow/writable.rb +22 -0
- data/red-arrow.gemspec +8 -4
- data/test/helper.rb +1 -2
- data/test/test-csv-loader.rb +27 -0
- data/test/test-data-type.rb +47 -0
- data/test/test-decimal128-array-builder.rb +95 -0
- data/test/test-decimal128-array.rb +38 -0
- data/test/test-decimal128-data-type.rb +31 -0
- data/test/test-dense-union-data-type.rb +41 -0
- data/test/test-dictionary-data-type.rb +40 -0
- data/test/test-feather.rb +34 -0
- data/test/test-field.rb +71 -0
- data/test/test-file-output-stream.rb +54 -0
- data/test/test-list-array-builder.rb +79 -0
- data/test/test-list-array.rb +32 -0
- data/test/test-list-data-type.rb +43 -0
- data/test/test-record-batch-builder.rb +116 -0
- data/test/test-record-batch.rb +82 -27
- data/test/test-schema.rb +104 -0
- data/test/test-sparse-union-data-type.rb +41 -0
- data/test/test-struct-array-builder.rb +180 -0
- data/test/test-struct-array.rb +60 -15
- data/test/test-struct-data-type.rb +112 -0
- data/test/test-struct.rb +81 -0
- data/test/test-table.rb +165 -29
- data/test/test-time32-data-type.rb +42 -0
- data/test/test-time64-data-type.rb +42 -0
- data/test/test-timestamp-data-type.rb +42 -0
- metadata +99 -10
@@ -0,0 +1,96 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class ListArrayBuilder
|
20
|
+
class << self
|
21
|
+
def build(data_type, values)
|
22
|
+
builder = new(data_type)
|
23
|
+
builder.build(values)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
alias_method :append_value_raw, :append_value
|
28
|
+
|
29
|
+
# @overload append_value
|
30
|
+
#
|
31
|
+
# Starts appending a list record. You also need to append list
|
32
|
+
# value by {#value_builder}.
|
33
|
+
#
|
34
|
+
# @overload append_value(list)
|
35
|
+
#
|
36
|
+
# Appends a list record including list value.
|
37
|
+
#
|
38
|
+
# @param value [nil, ::Array] The list value of the record.
|
39
|
+
#
|
40
|
+
# If this is `nil`, the list record is null.
|
41
|
+
#
|
42
|
+
# If this is `Array`, it's the list value of the record.
|
43
|
+
#
|
44
|
+
# @since 0.12.0
|
45
|
+
def append_value(*args)
|
46
|
+
n_args = args.size
|
47
|
+
|
48
|
+
case n_args
|
49
|
+
when 0
|
50
|
+
append_value_raw
|
51
|
+
when 1
|
52
|
+
value = args[0]
|
53
|
+
case value
|
54
|
+
when nil
|
55
|
+
append_null
|
56
|
+
when ::Array
|
57
|
+
append_value_raw
|
58
|
+
@value_builder ||= value_builder
|
59
|
+
@value_builder.append_values(value, nil)
|
60
|
+
else
|
61
|
+
message = "list value must be nil or Array: #{value.inspect}"
|
62
|
+
raise ArgumentError, message
|
63
|
+
end
|
64
|
+
else
|
65
|
+
message = "wrong number of arguments (given #{n_args}, expected 0..1)"
|
66
|
+
raise ArgumentError, message
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def append_values(lists, is_valids=nil)
|
71
|
+
if is_valids
|
72
|
+
is_valids.each_with_index do |is_valid, i|
|
73
|
+
if is_valid
|
74
|
+
append_value(lists[i])
|
75
|
+
else
|
76
|
+
append_null
|
77
|
+
end
|
78
|
+
end
|
79
|
+
else
|
80
|
+
lists.each do |list|
|
81
|
+
append_value(list)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# @since 0.12.0
|
87
|
+
def append(*values)
|
88
|
+
if values.empty?
|
89
|
+
# For backward compatibility
|
90
|
+
append_value
|
91
|
+
else
|
92
|
+
super
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class ListDataType
|
20
|
+
alias_method :initialize_raw, :initialize
|
21
|
+
private :initialize_raw
|
22
|
+
|
23
|
+
# Creates a new {Arrow::ListDataType}.
|
24
|
+
#
|
25
|
+
# @overload initialize(field)
|
26
|
+
#
|
27
|
+
# @param field [Arrow::Field, Hash] The field of the list data
|
28
|
+
# type. You can also specify field description by `Hash`.
|
29
|
+
#
|
30
|
+
# See {Arrow::Field.new} how to specify field description.
|
31
|
+
#
|
32
|
+
# @example Create a list data type with {Arrow::Field}
|
33
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
34
|
+
# Arrow::ListDataType.new(visible_field)
|
35
|
+
#
|
36
|
+
# @example Create a list data type with field description
|
37
|
+
# Arrow::ListDataType.new(name: "visible", type: :boolean)
|
38
|
+
#
|
39
|
+
# @overload initialize(description)
|
40
|
+
#
|
41
|
+
# @param description [Hash] The description of the list data
|
42
|
+
# type. It must have `:field` value.
|
43
|
+
#
|
44
|
+
# @option description [Arrow::Field, Hash] :field The field of
|
45
|
+
# the list data type. You can also specify field description
|
46
|
+
# by `Hash`.
|
47
|
+
#
|
48
|
+
# See {Arrow::Field.new} how to specify field description.
|
49
|
+
#
|
50
|
+
# @example Create a list data type with {Arrow::Field}
|
51
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
52
|
+
# Arrow::ListDataType.new(field: visible_field)
|
53
|
+
#
|
54
|
+
# @example Create a list data type with field description
|
55
|
+
# Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
|
56
|
+
def initialize(field)
|
57
|
+
if field.is_a?(Hash) and field.key?(:field)
|
58
|
+
description = field
|
59
|
+
field = description[:field]
|
60
|
+
end
|
61
|
+
if field.is_a?(Hash)
|
62
|
+
field_description = field
|
63
|
+
field = Field.new(field_description)
|
64
|
+
end
|
65
|
+
initialize_raw(field)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/arrow/loader.rb
CHANGED
@@ -35,18 +35,35 @@ module Arrow
|
|
35
35
|
require "arrow/array-builder"
|
36
36
|
require "arrow/chunked-array"
|
37
37
|
require "arrow/column"
|
38
|
+
require "arrow/compression-type"
|
38
39
|
require "arrow/csv-loader"
|
39
|
-
require "arrow/csv-
|
40
|
+
require "arrow/csv-read-options"
|
41
|
+
require "arrow/data-type"
|
40
42
|
require "arrow/date32-array"
|
41
43
|
require "arrow/date32-array-builder"
|
42
44
|
require "arrow/date64-array"
|
43
45
|
require "arrow/date64-array-builder"
|
46
|
+
require "arrow/decimal128-array-builder"
|
47
|
+
require "arrow/decimal128-data-type"
|
48
|
+
require "arrow/dense-union-data-type"
|
49
|
+
require "arrow/dictionary-data-type"
|
44
50
|
require "arrow/field"
|
51
|
+
require "arrow/file-output-stream"
|
52
|
+
require "arrow/list-array-builder"
|
53
|
+
require "arrow/list-data-type"
|
54
|
+
require "arrow/path-extension"
|
45
55
|
require "arrow/record"
|
46
56
|
require "arrow/record-batch"
|
57
|
+
require "arrow/record-batch-builder"
|
58
|
+
require "arrow/record-batch-file-reader"
|
59
|
+
require "arrow/record-batch-stream-reader"
|
47
60
|
require "arrow/rolling-window"
|
61
|
+
require "arrow/schema"
|
48
62
|
require "arrow/slicer"
|
63
|
+
require "arrow/sparse-union-data-type"
|
49
64
|
require "arrow/struct-array"
|
65
|
+
require "arrow/struct-array-builder"
|
66
|
+
require "arrow/struct-data-type"
|
50
67
|
require "arrow/table"
|
51
68
|
require "arrow/table-formatter"
|
52
69
|
require "arrow/table-list-formatter"
|
@@ -54,11 +71,12 @@ module Arrow
|
|
54
71
|
require "arrow/table-loader"
|
55
72
|
require "arrow/table-saver"
|
56
73
|
require "arrow/tensor"
|
74
|
+
require "arrow/time32-data-type"
|
75
|
+
require "arrow/time64-data-type"
|
57
76
|
require "arrow/timestamp-array"
|
58
77
|
require "arrow/timestamp-array-builder"
|
59
|
-
|
60
|
-
require "arrow/
|
61
|
-
require "arrow/record-batch-stream-reader"
|
78
|
+
require "arrow/timestamp-data-type"
|
79
|
+
require "arrow/writable"
|
62
80
|
end
|
63
81
|
|
64
82
|
def load_object_info(info)
|
@@ -72,6 +90,13 @@ module Arrow
|
|
72
90
|
|
73
91
|
def load_method_info(info, klass, method_name)
|
74
92
|
case klass.name
|
93
|
+
when /Builder\z/
|
94
|
+
case method_name
|
95
|
+
when "append"
|
96
|
+
return
|
97
|
+
else
|
98
|
+
super
|
99
|
+
end
|
75
100
|
when "Arrow::StringArray"
|
76
101
|
case method_name
|
77
102
|
when "get_value"
|
@@ -87,7 +112,7 @@ module Arrow
|
|
87
112
|
end
|
88
113
|
super(info, klass, method_name)
|
89
114
|
else
|
90
|
-
|
115
|
+
super
|
91
116
|
end
|
92
117
|
end
|
93
118
|
end
|
@@ -15,40 +15,31 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
-
require "csv"
|
19
|
-
|
20
18
|
module Arrow
|
21
|
-
class
|
22
|
-
def initialize(
|
23
|
-
@
|
19
|
+
class PathExtension
|
20
|
+
def initialize(path)
|
21
|
+
@path = path
|
24
22
|
end
|
25
23
|
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
row = row.collect(&:last)
|
31
|
-
end
|
32
|
-
row.each_with_index do |value, i|
|
33
|
-
values = (values_set[i] ||= [])
|
34
|
-
values << value
|
35
|
-
end
|
36
|
-
end
|
37
|
-
return nil if values_set.empty?
|
24
|
+
def extract
|
25
|
+
basename = ::File.basename(@path)
|
26
|
+
components = basename.split(".")
|
27
|
+
return {} if components.size == 1
|
38
28
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
29
|
+
extension = components.last.downcase
|
30
|
+
if components.size > 2
|
31
|
+
compression = CompressionType.resolve_extension(extension)
|
32
|
+
if compression
|
33
|
+
{
|
34
|
+
format: components[-2].downcase,
|
35
|
+
compression: compression,
|
36
|
+
}
|
37
|
+
else
|
38
|
+
{format: extension}
|
39
|
+
end
|
44
40
|
else
|
45
|
-
|
46
|
-
end
|
47
|
-
raw_table = {}
|
48
|
-
names.each_with_index do |name, i|
|
49
|
-
raw_table[name] = arrays[i]
|
41
|
+
{format: extension}
|
50
42
|
end
|
51
|
-
Table.new(raw_table)
|
52
43
|
end
|
53
44
|
end
|
54
45
|
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class RecordBatchBuilder
|
20
|
+
class << self
|
21
|
+
# @since 0.12.0
|
22
|
+
def build(schema, data)
|
23
|
+
builder = new(schema)
|
24
|
+
builder.append(data)
|
25
|
+
builder.flush
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
alias_method :initialize_raw, :initialize
|
30
|
+
private :initialize_raw
|
31
|
+
def initialize(schema)
|
32
|
+
unless schema.is_a?(Schema)
|
33
|
+
schema = Schema.new(schema)
|
34
|
+
end
|
35
|
+
initialize_raw(schema)
|
36
|
+
@name_to_index = {}
|
37
|
+
schema.fields.each_with_index do |field, i|
|
38
|
+
@name_to_index[field.name] = i
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# @since 0.12.0
|
43
|
+
def [](name_or_index)
|
44
|
+
case name_or_index
|
45
|
+
when String, Symbol
|
46
|
+
name = name_or_index
|
47
|
+
self[resolve_name(name)]
|
48
|
+
else
|
49
|
+
index = name_or_index
|
50
|
+
column_builders[index]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# @since 0.12.0
|
55
|
+
def append(*values)
|
56
|
+
values.each do |value|
|
57
|
+
case value
|
58
|
+
when Hash
|
59
|
+
append_columns(value)
|
60
|
+
else
|
61
|
+
append_records(value)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# @since 0.12.0
|
67
|
+
def append_records(records)
|
68
|
+
n = n_fields
|
69
|
+
columns = n.times.collect do
|
70
|
+
[]
|
71
|
+
end
|
72
|
+
records.each_with_index do |record, nth_record|
|
73
|
+
case record
|
74
|
+
when nil
|
75
|
+
when Hash
|
76
|
+
record.each do |name, value|
|
77
|
+
nth_column = resolve_name(name)
|
78
|
+
next if nth_column.nil?
|
79
|
+
columns[nth_column] << value
|
80
|
+
end
|
81
|
+
else
|
82
|
+
record.each_with_index do |value, nth_column|
|
83
|
+
columns[nth_column] << value
|
84
|
+
end
|
85
|
+
end
|
86
|
+
columns.each do |column|
|
87
|
+
column << nil if column.size != (nth_record + 1)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
columns.each_with_index do |column, i|
|
91
|
+
self[i].append(*column)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# @since 0.12.0
|
96
|
+
def append_columns(columns)
|
97
|
+
columns.each do |name, values|
|
98
|
+
self[name].append(*values)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
def resolve_name(name)
|
104
|
+
@name_to_index[name.to_s]
|
105
|
+
end
|
106
|
+
|
107
|
+
# TODO: Make public with good name. Is column_builders good enough?
|
108
|
+
# builders? sub_builders?
|
109
|
+
def column_builders
|
110
|
+
@column_builders ||= n_fields.times.collect do |i|
|
111
|
+
get_field(i)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
data/lib/arrow/record-batch.rb
CHANGED
@@ -22,6 +22,22 @@ module Arrow
|
|
22
22
|
include RecordContainable
|
23
23
|
include Enumerable
|
24
24
|
|
25
|
+
class << self
|
26
|
+
def new(*args)
|
27
|
+
n_args = args.size
|
28
|
+
case n_args
|
29
|
+
when 2
|
30
|
+
schema, data = args
|
31
|
+
RecordBatchBuilder.build(schema, data)
|
32
|
+
when 3
|
33
|
+
super
|
34
|
+
else
|
35
|
+
message = "wrong number of arguments (given #{n_args}, expected 2..3)"
|
36
|
+
raise ArgumentError, message
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
25
41
|
alias_method :each, :each_record
|
26
42
|
|
27
43
|
alias_method :columns_raw, :columns
|
@@ -29,6 +45,15 @@ module Arrow
|
|
29
45
|
@columns ||= columns_raw
|
30
46
|
end
|
31
47
|
|
48
|
+
# Converts the record batch to {Arrow::Table}.
|
49
|
+
#
|
50
|
+
# @return [Arrow::Table]
|
51
|
+
#
|
52
|
+
# @since 0.12.0
|
53
|
+
def to_table
|
54
|
+
Table.new(schema, [self])
|
55
|
+
end
|
56
|
+
|
32
57
|
def respond_to_missing?(name, include_private)
|
33
58
|
return true if find_column(name)
|
34
59
|
super
|