red-arrow 0.15.1 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +28 -16
- data/ext/arrow/converters.hpp +63 -33
- data/ext/arrow/raw-records.cpp +2 -1
- data/ext/arrow/values.cpp +2 -1
- data/lib/arrow/array-builder.rb +101 -52
- data/lib/arrow/array.rb +28 -10
- data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
- data/lib/arrow/chunked-array.rb +2 -0
- data/lib/arrow/csv-loader.rb +5 -0
- data/lib/arrow/csv-read-options.rb +18 -0
- data/lib/arrow/data-type.rb +35 -2
- data/lib/arrow/decimal128-array-builder.rb +0 -2
- data/lib/arrow/dictionary-array.rb +24 -0
- data/lib/arrow/field.rb +1 -1
- data/lib/arrow/generic-filterable.rb +43 -0
- data/lib/arrow/generic-takeable.rb +38 -0
- data/lib/arrow/list-data-type.rb +58 -8
- data/lib/arrow/loader.rb +12 -1
- data/lib/arrow/null-array-builder.rb +1 -1
- data/lib/arrow/null-array.rb +24 -0
- data/lib/arrow/raw-table-converter.rb +47 -0
- data/lib/arrow/record-batch-iterator.rb +22 -0
- data/lib/arrow/record-batch.rb +8 -3
- data/lib/arrow/schema.rb +5 -2
- data/lib/arrow/struct-array-builder.rb +13 -7
- data/lib/arrow/struct-data-type.rb +0 -2
- data/lib/arrow/table-loader.rb +29 -6
- data/lib/arrow/table-saver.rb +37 -13
- data/lib/arrow/table.rb +20 -73
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +3 -1
- data/test/helper.rb +1 -0
- data/test/helper/omittable.rb +36 -0
- data/test/raw-records/test-dense-union-array.rb +1 -34
- data/test/raw-records/test-sparse-union-array.rb +1 -33
- data/test/run-test.rb +14 -3
- data/test/test-array-builder.rb +17 -0
- data/test/test-array.rb +104 -0
- data/test/test-buffer.rb +11 -0
- data/test/test-chunked-array.rb +96 -0
- data/test/test-csv-loader.rb +2 -2
- data/test/test-data-type.rb +11 -0
- data/test/test-dense-union-data-type.rb +2 -2
- data/test/test-dictionary-array.rb +41 -0
- data/test/test-feather.rb +21 -6
- data/test/test-list-data-type.rb +27 -1
- data/test/test-null-array.rb +23 -0
- data/test/test-record-batch-iterator.rb +37 -0
- data/test/test-record-batch.rb +14 -0
- data/test/test-schema.rb +16 -0
- data/test/test-slicer.rb +74 -30
- data/test/test-sparse-union-data-type.rb +2 -2
- data/test/test-struct-array-builder.rb +8 -4
- data/test/test-table.rb +153 -14
- data/test/test-timestamp-array.rb +19 -0
- data/test/values/test-dense-union-array.rb +1 -34
- data/test/values/test-sparse-union-array.rb +1 -33
- metadata +22 -8
data/lib/arrow/array.rb
CHANGED
@@ -18,20 +18,21 @@
|
|
18
18
|
module Arrow
|
19
19
|
class Array
|
20
20
|
include Enumerable
|
21
|
+
include GenericFilterable
|
22
|
+
include GenericTakeable
|
21
23
|
|
22
24
|
class << self
|
23
25
|
def new(*args)
|
26
|
+
_builder_class = builder_class
|
27
|
+
return super if _builder_class.nil?
|
28
|
+
return super unless _builder_class.buildable?(args)
|
29
|
+
_builder_class.build(*args)
|
30
|
+
end
|
31
|
+
|
32
|
+
def builder_class
|
24
33
|
builder_class_name = "#{name}Builder"
|
25
|
-
|
26
|
-
|
27
|
-
if builder_class.buildable?(args)
|
28
|
-
builder_class.build(*args)
|
29
|
-
else
|
30
|
-
super
|
31
|
-
end
|
32
|
-
else
|
33
|
-
super
|
34
|
-
end
|
34
|
+
return nil unless const_defined?(builder_class_name)
|
35
|
+
const_get(builder_class_name)
|
35
36
|
end
|
36
37
|
end
|
37
38
|
|
@@ -82,5 +83,22 @@ module Arrow
|
|
82
83
|
def to_a
|
83
84
|
values
|
84
85
|
end
|
86
|
+
|
87
|
+
alias_method :is_in_raw, :is_in
|
88
|
+
def is_in(values)
|
89
|
+
case values
|
90
|
+
when ::Array
|
91
|
+
if self.class.builder_class.buildable?([values])
|
92
|
+
values = self.class.new(values)
|
93
|
+
else
|
94
|
+
values = self.class.new(value_data_type, values)
|
95
|
+
end
|
96
|
+
is_in_raw(values)
|
97
|
+
when ChunkedArray
|
98
|
+
is_in_chunked_array(values)
|
99
|
+
else
|
100
|
+
is_in_raw(values)
|
101
|
+
end
|
102
|
+
end
|
85
103
|
end
|
86
104
|
end
|
@@ -16,21 +16,13 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
module Arrow
|
19
|
-
class
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
append_null
|
27
|
-
end
|
28
|
-
end
|
29
|
-
else
|
30
|
-
values.each do |value|
|
31
|
-
append_value(value)
|
32
|
-
end
|
33
|
-
end
|
19
|
+
class Buffer
|
20
|
+
alias_method :initialize_raw, :initialize
|
21
|
+
private :initialize_raw
|
22
|
+
|
23
|
+
def initialize(data)
|
24
|
+
@data = data
|
25
|
+
initialize_raw(data)
|
34
26
|
end
|
35
27
|
end
|
36
28
|
end
|
data/lib/arrow/chunked-array.rb
CHANGED
data/lib/arrow/csv-loader.rb
CHANGED
@@ -30,6 +30,9 @@ module Arrow
|
|
30
30
|
def initialize(path_or_data, **options)
|
31
31
|
@path_or_data = path_or_data
|
32
32
|
@options = options
|
33
|
+
if @options.key?(:delimiter)
|
34
|
+
@options[:col_sep] = @options.delete(:delimiter)
|
35
|
+
end
|
33
36
|
@compression = @options.delete(:compression)
|
34
37
|
end
|
35
38
|
|
@@ -113,6 +116,8 @@ module Arrow
|
|
113
116
|
options.add_schema(value)
|
114
117
|
when :encoding
|
115
118
|
# process encoding on opening input
|
119
|
+
when :col_sep
|
120
|
+
options.delimiter = value
|
116
121
|
else
|
117
122
|
setter = "#{key}="
|
118
123
|
if options.respond_to?(setter)
|
@@ -21,5 +21,23 @@ module Arrow
|
|
21
21
|
def add_column_type(name, type)
|
22
22
|
add_column_type_raw(name, DataType.resolve(type))
|
23
23
|
end
|
24
|
+
|
25
|
+
alias_method :delimiter_raw, :delimiter
|
26
|
+
def delimiter
|
27
|
+
delimiter_raw.chr
|
28
|
+
end
|
29
|
+
|
30
|
+
alias_method :delimiter_raw=, :delimiter=
|
31
|
+
def delimiter=(delimiter)
|
32
|
+
case delimiter
|
33
|
+
when String
|
34
|
+
if delimiter.bytesize != 1
|
35
|
+
message = "delimiter must be 1 byte character: #{delimiter.inspect}"
|
36
|
+
raise ArgumentError, message
|
37
|
+
end
|
38
|
+
delimiter = delimiter.ord
|
39
|
+
end
|
40
|
+
self.delimiter_raw = delimiter
|
41
|
+
end
|
24
42
|
end
|
25
43
|
end
|
data/lib/arrow/data-type.rb
CHANGED
@@ -121,6 +121,26 @@ module Arrow
|
|
121
121
|
end
|
122
122
|
end
|
123
123
|
|
124
|
+
def sub_types
|
125
|
+
types = {}
|
126
|
+
gtype.children.each do |child|
|
127
|
+
sub_type = child.to_class
|
128
|
+
types[sub_type] = true
|
129
|
+
sub_type.sub_types.each do |sub_sub_type|
|
130
|
+
types[sub_sub_type] = true
|
131
|
+
end
|
132
|
+
end
|
133
|
+
types.keys
|
134
|
+
end
|
135
|
+
|
136
|
+
def try_convert(value)
|
137
|
+
begin
|
138
|
+
resolve(value)
|
139
|
+
rescue ArgumentError
|
140
|
+
nil
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
124
144
|
private
|
125
145
|
def resolve_class(data_type)
|
126
146
|
components = data_type.to_s.split("_").collect(&:capitalize)
|
@@ -137,11 +157,24 @@ module Arrow
|
|
137
157
|
available_types << components.collect(&:downcase).join("_").to_sym
|
138
158
|
end
|
139
159
|
message =
|
140
|
-
"unknown type:
|
160
|
+
"unknown type: <#{data_type.inspect}>: " +
|
141
161
|
"available types: #{available_types.inspect}"
|
142
162
|
raise ArgumentError, message
|
143
163
|
end
|
144
|
-
Arrow.const_get(data_type_class_name)
|
164
|
+
data_type_class = Arrow.const_get(data_type_class_name)
|
165
|
+
if data_type_class.gtype.abstract?
|
166
|
+
not_abstract_types = data_type_class.sub_types.find_all do |sub_type|
|
167
|
+
not sub_type.gtype.abstract?
|
168
|
+
end
|
169
|
+
not_abstract_types = not_abstract_types.sort_by do |type|
|
170
|
+
type.name
|
171
|
+
end
|
172
|
+
message =
|
173
|
+
"abstract type: <#{data_type.inspect}>: " +
|
174
|
+
"use one of not abstract type: #{not_abstract_types.inspect}"
|
175
|
+
raise ArgumentError, message
|
176
|
+
end
|
177
|
+
data_type_class
|
145
178
|
end
|
146
179
|
end
|
147
180
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class DictionaryArray
|
20
|
+
def get_value(i)
|
21
|
+
dictionary[indices[i]]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/arrow/field.rb
CHANGED
@@ -59,7 +59,7 @@ module Arrow
|
|
59
59
|
# There is a shortcut for convenience. If field description
|
60
60
|
# doesn't have `:data_type`, all keys except `:name` are
|
61
61
|
# processes as data type description. For example, the
|
62
|
-
# following field
|
62
|
+
# following field descriptions are the same:
|
63
63
|
#
|
64
64
|
# ```ruby
|
65
65
|
# {name: "visible", data_type: {type: :boolean}}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module GenericFilterable
|
20
|
+
class << self
|
21
|
+
def included(base)
|
22
|
+
base.__send__(:alias_method, :filter_raw, :filter)
|
23
|
+
base.__send__(:alias_method, :filter, :filter_generic)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def filter_generic(filter, options=nil)
|
28
|
+
case filter
|
29
|
+
when ::Array
|
30
|
+
filter_raw(BooleanArray.new(filter), options)
|
31
|
+
when ChunkedArray
|
32
|
+
if respond_to?(:filter_chunked_array)
|
33
|
+
filter_chunked_array(filter, options)
|
34
|
+
else
|
35
|
+
# TODO: Implement this in C++
|
36
|
+
filter_raw(filter.pack, options)
|
37
|
+
end
|
38
|
+
else
|
39
|
+
filter_raw(filter, options)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module GenericTakeable
|
20
|
+
class << self
|
21
|
+
def included(base)
|
22
|
+
base.__send__(:alias_method, :take_raw, :take)
|
23
|
+
base.__send__(:alias_method, :take, :take_generic)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def take_generic(indices)
|
28
|
+
case indices
|
29
|
+
when ::Array
|
30
|
+
take_raw(IntArrayBuilder.build(indices))
|
31
|
+
when ChunkedArray
|
32
|
+
take_chunked_array(indices)
|
33
|
+
else
|
34
|
+
take_raw(indices)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/arrow/list-data-type.rb
CHANGED
@@ -53,16 +53,66 @@ module Arrow
|
|
53
53
|
#
|
54
54
|
# @example Create a list data type with field description
|
55
55
|
# Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
56
|
+
#
|
57
|
+
# @overload initialize(data_type)
|
58
|
+
#
|
59
|
+
# @param data_type [Arrow::DataType, String, Symbol,
|
60
|
+
# ::Array<String>, ::Array<Symbol>, Hash] The element data
|
61
|
+
# type of the list data type. A field is created with the
|
62
|
+
# default name `"item"` from the data type automatically.
|
63
|
+
#
|
64
|
+
# See {Arrow::DataType.resolve} how to specify data type.
|
65
|
+
#
|
66
|
+
# @example Create a list data type with {Arrow::DataType}
|
67
|
+
# Arrow::ListDataType.new(Arrow::BooleanDataType.new)
|
68
|
+
#
|
69
|
+
# @example Create a list data type with data type name as String
|
70
|
+
# Arrow::ListDataType.new("boolean")
|
71
|
+
#
|
72
|
+
# @example Create a list data type with data type name as Symbol
|
73
|
+
# Arrow::ListDataType.new(:boolean)
|
74
|
+
#
|
75
|
+
# @example Create a list data type with data type as Array
|
76
|
+
# Arrow::ListDataType.new([:time32, :milli])
|
77
|
+
def initialize(arg)
|
78
|
+
data_type = resolve_data_type(arg)
|
79
|
+
if data_type
|
80
|
+
field = Field.new(default_field_name, data_type)
|
81
|
+
else
|
82
|
+
field = resolve_field(arg)
|
64
83
|
end
|
65
84
|
initialize_raw(field)
|
66
85
|
end
|
86
|
+
|
87
|
+
private
|
88
|
+
def resolve_data_type(arg)
|
89
|
+
case arg
|
90
|
+
when DataType, String, Symbol, ::Array
|
91
|
+
DataType.resolve(arg)
|
92
|
+
when Hash
|
93
|
+
return nil if arg[:name]
|
94
|
+
return nil unless arg[:type]
|
95
|
+
DataType.resolve(arg)
|
96
|
+
else
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def default_field_name
|
102
|
+
"item"
|
103
|
+
end
|
104
|
+
|
105
|
+
def resolve_field(arg)
|
106
|
+
if arg.is_a?(Hash) and arg.key?(:field)
|
107
|
+
description = arg
|
108
|
+
arg = description[:field]
|
109
|
+
end
|
110
|
+
if arg.is_a?(Hash)
|
111
|
+
field_description = arg
|
112
|
+
Field.new(field_description)
|
113
|
+
else
|
114
|
+
arg
|
115
|
+
end
|
116
|
+
end
|
67
117
|
end
|
68
118
|
end
|
data/lib/arrow/loader.rb
CHANGED
@@ -32,9 +32,16 @@ module Arrow
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def require_libraries
|
35
|
+
require "arrow/column-containable"
|
36
|
+
require "arrow/field-containable"
|
37
|
+
require "arrow/generic-filterable"
|
38
|
+
require "arrow/generic-takeable"
|
39
|
+
require "arrow/record-containable"
|
40
|
+
|
35
41
|
require "arrow/array"
|
36
42
|
require "arrow/array-builder"
|
37
|
-
require "arrow/
|
43
|
+
require "arrow/bigdecimal-extension"
|
44
|
+
require "arrow/buffer"
|
38
45
|
require "arrow/chunked-array"
|
39
46
|
require "arrow/column"
|
40
47
|
require "arrow/compression-type"
|
@@ -50,17 +57,21 @@ module Arrow
|
|
50
57
|
require "arrow/decimal128-array-builder"
|
51
58
|
require "arrow/decimal128-data-type"
|
52
59
|
require "arrow/dense-union-data-type"
|
60
|
+
require "arrow/dictionary-array"
|
53
61
|
require "arrow/dictionary-data-type"
|
54
62
|
require "arrow/field"
|
55
63
|
require "arrow/file-output-stream"
|
64
|
+
require "arrow/group"
|
56
65
|
require "arrow/list-array-builder"
|
57
66
|
require "arrow/list-data-type"
|
67
|
+
require "arrow/null-array"
|
58
68
|
require "arrow/null-array-builder"
|
59
69
|
require "arrow/path-extension"
|
60
70
|
require "arrow/record"
|
61
71
|
require "arrow/record-batch"
|
62
72
|
require "arrow/record-batch-builder"
|
63
73
|
require "arrow/record-batch-file-reader"
|
74
|
+
require "arrow/record-batch-iterator"
|
64
75
|
require "arrow/record-batch-stream-reader"
|
65
76
|
require "arrow/rolling-window"
|
66
77
|
require "arrow/schema"
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class NullArray
|
20
|
+
def get_value(i)
|
21
|
+
nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|