red-arrow 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Rakefile +49 -4
- data/ext/arrow/arrow.cpp +43 -0
- data/ext/arrow/extconf.rb +52 -0
- data/ext/arrow/record-batch.cpp +756 -0
- data/ext/arrow/red-arrow.hpp +60 -0
- data/lib/arrow.rb +2 -1
- data/lib/arrow/array-builder.rb +4 -0
- data/lib/arrow/array.rb +11 -1
- data/lib/arrow/bigdecimal-extension.rb +24 -0
- data/lib/arrow/binary-array-builder.rb +36 -0
- data/lib/arrow/block-closable.rb +5 -1
- data/lib/arrow/csv-loader.rb +28 -6
- data/lib/arrow/data-type.rb +8 -4
- data/lib/arrow/decimal128-array-builder.rb +2 -2
- data/lib/arrow/decimal128.rb +42 -0
- data/lib/arrow/list-array-builder.rb +1 -1
- data/lib/arrow/loader.rb +8 -0
- data/lib/arrow/null-array-builder.rb +26 -0
- data/lib/arrow/record-batch-builder.rb +8 -9
- data/lib/arrow/struct-array-builder.rb +3 -3
- data/lib/arrow/struct-array.rb +15 -7
- data/lib/arrow/struct.rb +11 -0
- data/lib/arrow/table-loader.rb +14 -14
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +8 -4
- data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
- data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
- data/test/raw-records/record-batch/test-list-array.rb +498 -0
- data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
- data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
- data/test/raw-records/record-batch/test-struct-array.rb +426 -0
- data/test/run-test.rb +25 -2
- data/test/test-array.rb +38 -9
- data/test/test-bigdecimal.rb +23 -0
- data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
- data/test/test-chunked-array.rb +22 -0
- data/test/test-column.rb +24 -0
- data/test/test-csv-loader.rb +30 -0
- data/test/test-data-type.rb +25 -0
- data/test/test-decimal128.rb +64 -0
- data/test/test-field.rb +20 -0
- data/test/test-group.rb +2 -2
- data/test/test-record-batch-builder.rb +9 -0
- data/test/test-record-batch.rb +14 -0
- data/test/test-schema.rb +14 -0
- data/test/test-struct-array.rb +16 -3
- data/test/test-table.rb +14 -0
- data/test/test-tensor.rb +56 -0
- metadata +117 -47
@@ -0,0 +1,60 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#pragma once
|
21
|
+
|
22
|
+
#include <arrow/api.h>
|
23
|
+
|
24
|
+
#ifdef _WIN32
|
25
|
+
# define gmtime_r gmtime_r_ruby_win32
|
26
|
+
# define localtime_r localtime_r_ruby_win32
|
27
|
+
# include <ruby.h>
|
28
|
+
# undef gmtime_r
|
29
|
+
# undef localtime_r
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#include <arrow-glib/arrow-glib.hpp>
|
33
|
+
#include <rbgobject.h>
|
34
|
+
|
35
|
+
namespace red_arrow {
|
36
|
+
extern VALUE cDate;
|
37
|
+
|
38
|
+
extern ID id_BigDecimal;
|
39
|
+
extern ID id_jd;
|
40
|
+
extern ID id_to_datetime;
|
41
|
+
|
42
|
+
VALUE record_batch_raw_records(VALUE obj);
|
43
|
+
|
44
|
+
inline VALUE time_unit_to_scale(arrow::TimeUnit::type unit) {
|
45
|
+
switch (unit) {
|
46
|
+
case arrow::TimeUnit::SECOND:
|
47
|
+
return INT2FIX(1);
|
48
|
+
case arrow::TimeUnit::MILLI:
|
49
|
+
return INT2FIX(1000);
|
50
|
+
case arrow::TimeUnit::MICRO:
|
51
|
+
return INT2FIX(1000 * 1000);
|
52
|
+
case arrow::TimeUnit::NANO:
|
53
|
+
// NOTE: INT2FIX works for 1e+9 because: FIXNUM_MAX >= (1<<30) - 1 > 1e+9
|
54
|
+
return INT2FIX(1000 * 1000 * 1000);
|
55
|
+
default:
|
56
|
+
break; // NOT REACHED
|
57
|
+
}
|
58
|
+
return Qnil;
|
59
|
+
}
|
60
|
+
}
|
data/lib/arrow.rb
CHANGED
data/lib/arrow/array-builder.rb
CHANGED
data/lib/arrow/array.rb
CHANGED
@@ -24,7 +24,7 @@ module Arrow
|
|
24
24
|
builder_class_name = "#{name}Builder"
|
25
25
|
if const_defined?(builder_class_name)
|
26
26
|
builder_class = const_get(builder_class_name)
|
27
|
-
if
|
27
|
+
if builder_class.buildable?(args)
|
28
28
|
builder_class.build(*args)
|
29
29
|
else
|
30
30
|
super
|
@@ -35,8 +35,18 @@ module Arrow
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
+
# @param i [Integer]
|
39
|
+
# The index of the value to be gotten.
|
40
|
+
#
|
41
|
+
# You can specify negative index like for `::Array#[]`.
|
42
|
+
#
|
43
|
+
# @return [Object, nil]
|
44
|
+
# The `i`-th value.
|
45
|
+
#
|
46
|
+
# `nil` for NULL value or out of range `i`.
|
38
47
|
def [](i)
|
39
48
|
i += length if i < 0
|
49
|
+
return nil if i < 0 or i >= length
|
40
50
|
if null?(i)
|
41
51
|
nil
|
42
52
|
else
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
require "bigdecimal"
|
19
|
+
|
20
|
+
class BigDecimal
|
21
|
+
def to_arrow
|
22
|
+
Arrow::Decimal128.new(to_s)
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class BinaryArrayBuilder
|
20
|
+
def append_values(values, is_valids=nil)
|
21
|
+
if is_valids
|
22
|
+
is_valids.each_with_index do |is_valid, i|
|
23
|
+
if is_valid
|
24
|
+
append_value(values[i])
|
25
|
+
else
|
26
|
+
append_null
|
27
|
+
end
|
28
|
+
end
|
29
|
+
else
|
30
|
+
values.each do |value|
|
31
|
+
append_value(value)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/arrow/block-closable.rb
CHANGED
data/lib/arrow/csv-loader.rb
CHANGED
@@ -104,6 +104,8 @@ module Arrow
|
|
104
104
|
end
|
105
105
|
when :schema
|
106
106
|
options.add_schema(value)
|
107
|
+
when :encoding
|
108
|
+
# process encoding on opening input
|
107
109
|
else
|
108
110
|
setter = "#{key}="
|
109
111
|
if options.respond_to?(setter)
|
@@ -116,7 +118,7 @@ module Arrow
|
|
116
118
|
options
|
117
119
|
end
|
118
120
|
|
119
|
-
def
|
121
|
+
def open_decompress_input(raw_input)
|
120
122
|
if @compression
|
121
123
|
codec = Codec.new(@compression)
|
122
124
|
CompressedInputStream.open(codec, raw_input) do |input|
|
@@ -127,16 +129,36 @@ module Arrow
|
|
127
129
|
end
|
128
130
|
end
|
129
131
|
|
132
|
+
def open_encoding_convert_stream(raw_input, &block)
|
133
|
+
encoding = @options[:encoding]
|
134
|
+
if encoding
|
135
|
+
converter = Gio::CharsetConverter.new("UTF-8", encoding)
|
136
|
+
convert_input_stream =
|
137
|
+
Gio::ConverterInputStream.new(raw_input, converter)
|
138
|
+
GIOInputStream.open(convert_input_stream, &block)
|
139
|
+
else
|
140
|
+
yield(raw_input)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def wrap_input(raw_input)
|
145
|
+
open_decompress_input(raw_input) do |input_|
|
146
|
+
open_encoding_convert_stream(input_) do |input__|
|
147
|
+
yield(input__)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
130
152
|
def load_from_path(path)
|
131
153
|
options = reader_options
|
132
154
|
if options
|
133
155
|
begin
|
134
|
-
MemoryMappedInputStream.open(path
|
135
|
-
|
156
|
+
MemoryMappedInputStream.open(path) do |raw_input|
|
157
|
+
wrap_input(raw_input) do |input|
|
136
158
|
return CSVReader.new(input, options).read
|
137
159
|
end
|
138
160
|
end
|
139
|
-
rescue Arrow::Error::Invalid
|
161
|
+
rescue Arrow::Error::Invalid, Gio::Error
|
140
162
|
end
|
141
163
|
end
|
142
164
|
|
@@ -151,11 +173,11 @@ module Arrow
|
|
151
173
|
if options
|
152
174
|
begin
|
153
175
|
BufferInputStream.open(Buffer.new(data)) do |raw_input|
|
154
|
-
|
176
|
+
wrap_input(raw_input) do |input|
|
155
177
|
return CSVReader.new(input, options).read
|
156
178
|
end
|
157
179
|
end
|
158
|
-
rescue Arrow::Error::Invalid
|
180
|
+
rescue Arrow::Error::Invalid, Gio::Error
|
159
181
|
end
|
160
182
|
end
|
161
183
|
|
data/lib/arrow/data-type.rb
CHANGED
@@ -114,14 +114,18 @@ module Arrow
|
|
114
114
|
|
115
115
|
private
|
116
116
|
def resolve_class(data_type)
|
117
|
-
|
117
|
+
components = data_type.to_s.split("_").collect(&:capitalize)
|
118
|
+
data_type_name = components.join.gsub(/\AUint/, "UInt")
|
118
119
|
data_type_class_name = "#{data_type_name}DataType"
|
119
120
|
unless Arrow.const_defined?(data_type_class_name)
|
120
121
|
available_types = []
|
121
122
|
Arrow.constants.each do |name|
|
122
|
-
|
123
|
-
|
124
|
-
|
123
|
+
name = name.to_s
|
124
|
+
next if name == "DataType"
|
125
|
+
next unless name.end_with?("DataType")
|
126
|
+
name = name.gsub(/DataType\z/, "")
|
127
|
+
components = name.scan(/(UInt[0-9]+|[A-Z][a-z\d]+)/).flatten
|
128
|
+
available_types << components.collect(&:downcase).join("_").to_sym
|
125
129
|
end
|
126
130
|
message =
|
127
131
|
"unknown type: #{data_type.inspect}: " +
|
@@ -15,7 +15,7 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
-
require "bigdecimal"
|
18
|
+
require "arrow/bigdecimal-extension"
|
19
19
|
|
20
20
|
module Arrow
|
21
21
|
class Decimal128ArrayBuilder
|
@@ -36,7 +36,7 @@ module Arrow
|
|
36
36
|
when Float
|
37
37
|
value = Decimal128.new(value.to_s)
|
38
38
|
when BigDecimal
|
39
|
-
value =
|
39
|
+
value = value.to_arrow
|
40
40
|
end
|
41
41
|
append_value_raw(value)
|
42
42
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Decimal128
|
20
|
+
alias_method :to_s_raw, :to_s
|
21
|
+
|
22
|
+
# @overload to_s
|
23
|
+
#
|
24
|
+
# @return [String]
|
25
|
+
# The string representation of the decimal.
|
26
|
+
#
|
27
|
+
# @overload to_s(scale)
|
28
|
+
#
|
29
|
+
# @param scale [Integer] The scale of the decimal.
|
30
|
+
# @return [String]
|
31
|
+
# The string representation of the decimal including the scale.
|
32
|
+
#
|
33
|
+
# @since 0.13.0
|
34
|
+
def to_s(scale=nil)
|
35
|
+
if scale
|
36
|
+
to_string_scale(scale)
|
37
|
+
else
|
38
|
+
to_s_raw
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -56,7 +56,7 @@ module Arrow
|
|
56
56
|
when ::Array
|
57
57
|
append_value_raw
|
58
58
|
@value_builder ||= value_builder
|
59
|
-
@value_builder.
|
59
|
+
@value_builder.append(*value)
|
60
60
|
else
|
61
61
|
message = "list value must be nil or Array: #{value.inspect}"
|
62
62
|
raise ArgumentError, message
|
data/lib/arrow/loader.rb
CHANGED
@@ -28,11 +28,13 @@ module Arrow
|
|
28
28
|
private
|
29
29
|
def post_load(repository, namespace)
|
30
30
|
require_libraries
|
31
|
+
require_extension_library
|
31
32
|
end
|
32
33
|
|
33
34
|
def require_libraries
|
34
35
|
require "arrow/array"
|
35
36
|
require "arrow/array-builder"
|
37
|
+
require "arrow/binary-array-builder"
|
36
38
|
require "arrow/chunked-array"
|
37
39
|
require "arrow/column"
|
38
40
|
require "arrow/compression-type"
|
@@ -43,6 +45,7 @@ module Arrow
|
|
43
45
|
require "arrow/date32-array-builder"
|
44
46
|
require "arrow/date64-array"
|
45
47
|
require "arrow/date64-array-builder"
|
48
|
+
require "arrow/decimal128"
|
46
49
|
require "arrow/decimal128-array-builder"
|
47
50
|
require "arrow/decimal128-data-type"
|
48
51
|
require "arrow/dense-union-data-type"
|
@@ -51,6 +54,7 @@ module Arrow
|
|
51
54
|
require "arrow/file-output-stream"
|
52
55
|
require "arrow/list-array-builder"
|
53
56
|
require "arrow/list-data-type"
|
57
|
+
require "arrow/null-array-builder"
|
54
58
|
require "arrow/path-extension"
|
55
59
|
require "arrow/record"
|
56
60
|
require "arrow/record-batch"
|
@@ -79,6 +83,10 @@ module Arrow
|
|
79
83
|
require "arrow/writable"
|
80
84
|
end
|
81
85
|
|
86
|
+
def require_extension_library
|
87
|
+
require "arrow.so"
|
88
|
+
end
|
89
|
+
|
82
90
|
def load_object_info(info)
|
83
91
|
super
|
84
92
|
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class NullArrayBuilder
|
20
|
+
class << self
|
21
|
+
def buildable?(args)
|
22
|
+
super and args.collect(&:class) != [Integer]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -65,7 +65,7 @@ module Arrow
|
|
65
65
|
|
66
66
|
# @since 0.12.0
|
67
67
|
def append_records(records)
|
68
|
-
n =
|
68
|
+
n = n_columns
|
69
69
|
columns = n.times.collect do
|
70
70
|
[]
|
71
71
|
end
|
@@ -99,17 +99,16 @@ module Arrow
|
|
99
99
|
end
|
100
100
|
end
|
101
101
|
|
102
|
+
# @since 0.13.0
|
103
|
+
def column_builders
|
104
|
+
@column_builders ||= n_columns.times.collect do |i|
|
105
|
+
get_column_builder(i)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
102
109
|
private
|
103
110
|
def resolve_name(name)
|
104
111
|
@name_to_index[name.to_s]
|
105
112
|
end
|
106
|
-
|
107
|
-
# TODO: Make public with good name. Is column_builders good enough?
|
108
|
-
# builders? sub_builders?
|
109
|
-
def column_builders
|
110
|
-
@column_builders ||= n_fields.times.collect do |i|
|
111
|
-
get_field(i)
|
112
|
-
end
|
113
|
-
end
|
114
113
|
end
|
115
114
|
end
|