red-arrow 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Rakefile +49 -4
- data/ext/arrow/arrow.cpp +43 -0
- data/ext/arrow/extconf.rb +52 -0
- data/ext/arrow/record-batch.cpp +756 -0
- data/ext/arrow/red-arrow.hpp +60 -0
- data/lib/arrow.rb +2 -1
- data/lib/arrow/array-builder.rb +4 -0
- data/lib/arrow/array.rb +11 -1
- data/lib/arrow/bigdecimal-extension.rb +24 -0
- data/lib/arrow/binary-array-builder.rb +36 -0
- data/lib/arrow/block-closable.rb +5 -1
- data/lib/arrow/csv-loader.rb +28 -6
- data/lib/arrow/data-type.rb +8 -4
- data/lib/arrow/decimal128-array-builder.rb +2 -2
- data/lib/arrow/decimal128.rb +42 -0
- data/lib/arrow/list-array-builder.rb +1 -1
- data/lib/arrow/loader.rb +8 -0
- data/lib/arrow/null-array-builder.rb +26 -0
- data/lib/arrow/record-batch-builder.rb +8 -9
- data/lib/arrow/struct-array-builder.rb +3 -3
- data/lib/arrow/struct-array.rb +15 -7
- data/lib/arrow/struct.rb +11 -0
- data/lib/arrow/table-loader.rb +14 -14
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +8 -4
- data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
- data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
- data/test/raw-records/record-batch/test-list-array.rb +498 -0
- data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
- data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
- data/test/raw-records/record-batch/test-struct-array.rb +426 -0
- data/test/run-test.rb +25 -2
- data/test/test-array.rb +38 -9
- data/test/test-bigdecimal.rb +23 -0
- data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
- data/test/test-chunked-array.rb +22 -0
- data/test/test-column.rb +24 -0
- data/test/test-csv-loader.rb +30 -0
- data/test/test-data-type.rb +25 -0
- data/test/test-decimal128.rb +64 -0
- data/test/test-field.rb +20 -0
- data/test/test-group.rb +2 -2
- data/test/test-record-batch-builder.rb +9 -0
- data/test/test-record-batch.rb +14 -0
- data/test/test-schema.rb +14 -0
- data/test/test-struct-array.rb +16 -3
- data/test/test-table.rb +14 -0
- data/test/test-tensor.rb +56 -0
- metadata +117 -47
@@ -0,0 +1,60 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#pragma once
|
21
|
+
|
22
|
+
#include <arrow/api.h>
|
23
|
+
|
24
|
+
#ifdef _WIN32
|
25
|
+
# define gmtime_r gmtime_r_ruby_win32
|
26
|
+
# define localtime_r localtime_r_ruby_win32
|
27
|
+
# include <ruby.h>
|
28
|
+
# undef gmtime_r
|
29
|
+
# undef localtime_r
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#include <arrow-glib/arrow-glib.hpp>
|
33
|
+
#include <rbgobject.h>
|
34
|
+
|
35
|
+
namespace red_arrow {
|
36
|
+
extern VALUE cDate;
|
37
|
+
|
38
|
+
extern ID id_BigDecimal;
|
39
|
+
extern ID id_jd;
|
40
|
+
extern ID id_to_datetime;
|
41
|
+
|
42
|
+
VALUE record_batch_raw_records(VALUE obj);
|
43
|
+
|
44
|
+
inline VALUE time_unit_to_scale(arrow::TimeUnit::type unit) {
|
45
|
+
switch (unit) {
|
46
|
+
case arrow::TimeUnit::SECOND:
|
47
|
+
return INT2FIX(1);
|
48
|
+
case arrow::TimeUnit::MILLI:
|
49
|
+
return INT2FIX(1000);
|
50
|
+
case arrow::TimeUnit::MICRO:
|
51
|
+
return INT2FIX(1000 * 1000);
|
52
|
+
case arrow::TimeUnit::NANO:
|
53
|
+
// NOTE: INT2FIX works for 1e+9 because: FIXNUM_MAX >= (1<<30) - 1 > 1e+9
|
54
|
+
return INT2FIX(1000 * 1000 * 1000);
|
55
|
+
default:
|
56
|
+
break; // NOT REACHED
|
57
|
+
}
|
58
|
+
return Qnil;
|
59
|
+
}
|
60
|
+
}
|
data/lib/arrow.rb
CHANGED
data/lib/arrow/array-builder.rb
CHANGED
data/lib/arrow/array.rb
CHANGED
@@ -24,7 +24,7 @@ module Arrow
|
|
24
24
|
builder_class_name = "#{name}Builder"
|
25
25
|
if const_defined?(builder_class_name)
|
26
26
|
builder_class = const_get(builder_class_name)
|
27
|
-
if
|
27
|
+
if builder_class.buildable?(args)
|
28
28
|
builder_class.build(*args)
|
29
29
|
else
|
30
30
|
super
|
@@ -35,8 +35,18 @@ module Arrow
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
+
# @param i [Integer]
|
39
|
+
# The index of the value to be gotten.
|
40
|
+
#
|
41
|
+
# You can specify negative index like for `::Array#[]`.
|
42
|
+
#
|
43
|
+
# @return [Object, nil]
|
44
|
+
# The `i`-th value.
|
45
|
+
#
|
46
|
+
# `nil` for NULL value or out of range `i`.
|
38
47
|
def [](i)
|
39
48
|
i += length if i < 0
|
49
|
+
return nil if i < 0 or i >= length
|
40
50
|
if null?(i)
|
41
51
|
nil
|
42
52
|
else
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
require "bigdecimal"
|
19
|
+
|
20
|
+
class BigDecimal
|
21
|
+
def to_arrow
|
22
|
+
Arrow::Decimal128.new(to_s)
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class BinaryArrayBuilder
|
20
|
+
def append_values(values, is_valids=nil)
|
21
|
+
if is_valids
|
22
|
+
is_valids.each_with_index do |is_valid, i|
|
23
|
+
if is_valid
|
24
|
+
append_value(values[i])
|
25
|
+
else
|
26
|
+
append_null
|
27
|
+
end
|
28
|
+
end
|
29
|
+
else
|
30
|
+
values.each do |value|
|
31
|
+
append_value(value)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/arrow/block-closable.rb
CHANGED
data/lib/arrow/csv-loader.rb
CHANGED
@@ -104,6 +104,8 @@ module Arrow
|
|
104
104
|
end
|
105
105
|
when :schema
|
106
106
|
options.add_schema(value)
|
107
|
+
when :encoding
|
108
|
+
# process encoding on opening input
|
107
109
|
else
|
108
110
|
setter = "#{key}="
|
109
111
|
if options.respond_to?(setter)
|
@@ -116,7 +118,7 @@ module Arrow
|
|
116
118
|
options
|
117
119
|
end
|
118
120
|
|
119
|
-
def
|
121
|
+
def open_decompress_input(raw_input)
|
120
122
|
if @compression
|
121
123
|
codec = Codec.new(@compression)
|
122
124
|
CompressedInputStream.open(codec, raw_input) do |input|
|
@@ -127,16 +129,36 @@ module Arrow
|
|
127
129
|
end
|
128
130
|
end
|
129
131
|
|
132
|
+
def open_encoding_convert_stream(raw_input, &block)
|
133
|
+
encoding = @options[:encoding]
|
134
|
+
if encoding
|
135
|
+
converter = Gio::CharsetConverter.new("UTF-8", encoding)
|
136
|
+
convert_input_stream =
|
137
|
+
Gio::ConverterInputStream.new(raw_input, converter)
|
138
|
+
GIOInputStream.open(convert_input_stream, &block)
|
139
|
+
else
|
140
|
+
yield(raw_input)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def wrap_input(raw_input)
|
145
|
+
open_decompress_input(raw_input) do |input_|
|
146
|
+
open_encoding_convert_stream(input_) do |input__|
|
147
|
+
yield(input__)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
130
152
|
def load_from_path(path)
|
131
153
|
options = reader_options
|
132
154
|
if options
|
133
155
|
begin
|
134
|
-
MemoryMappedInputStream.open(path
|
135
|
-
|
156
|
+
MemoryMappedInputStream.open(path) do |raw_input|
|
157
|
+
wrap_input(raw_input) do |input|
|
136
158
|
return CSVReader.new(input, options).read
|
137
159
|
end
|
138
160
|
end
|
139
|
-
rescue Arrow::Error::Invalid
|
161
|
+
rescue Arrow::Error::Invalid, Gio::Error
|
140
162
|
end
|
141
163
|
end
|
142
164
|
|
@@ -151,11 +173,11 @@ module Arrow
|
|
151
173
|
if options
|
152
174
|
begin
|
153
175
|
BufferInputStream.open(Buffer.new(data)) do |raw_input|
|
154
|
-
|
176
|
+
wrap_input(raw_input) do |input|
|
155
177
|
return CSVReader.new(input, options).read
|
156
178
|
end
|
157
179
|
end
|
158
|
-
rescue Arrow::Error::Invalid
|
180
|
+
rescue Arrow::Error::Invalid, Gio::Error
|
159
181
|
end
|
160
182
|
end
|
161
183
|
|
data/lib/arrow/data-type.rb
CHANGED
@@ -114,14 +114,18 @@ module Arrow
|
|
114
114
|
|
115
115
|
private
|
116
116
|
def resolve_class(data_type)
|
117
|
-
|
117
|
+
components = data_type.to_s.split("_").collect(&:capitalize)
|
118
|
+
data_type_name = components.join.gsub(/\AUint/, "UInt")
|
118
119
|
data_type_class_name = "#{data_type_name}DataType"
|
119
120
|
unless Arrow.const_defined?(data_type_class_name)
|
120
121
|
available_types = []
|
121
122
|
Arrow.constants.each do |name|
|
122
|
-
|
123
|
-
|
124
|
-
|
123
|
+
name = name.to_s
|
124
|
+
next if name == "DataType"
|
125
|
+
next unless name.end_with?("DataType")
|
126
|
+
name = name.gsub(/DataType\z/, "")
|
127
|
+
components = name.scan(/(UInt[0-9]+|[A-Z][a-z\d]+)/).flatten
|
128
|
+
available_types << components.collect(&:downcase).join("_").to_sym
|
125
129
|
end
|
126
130
|
message =
|
127
131
|
"unknown type: #{data_type.inspect}: " +
|
@@ -15,7 +15,7 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
-
require "bigdecimal"
|
18
|
+
require "arrow/bigdecimal-extension"
|
19
19
|
|
20
20
|
module Arrow
|
21
21
|
class Decimal128ArrayBuilder
|
@@ -36,7 +36,7 @@ module Arrow
|
|
36
36
|
when Float
|
37
37
|
value = Decimal128.new(value.to_s)
|
38
38
|
when BigDecimal
|
39
|
-
value =
|
39
|
+
value = value.to_arrow
|
40
40
|
end
|
41
41
|
append_value_raw(value)
|
42
42
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Decimal128
|
20
|
+
alias_method :to_s_raw, :to_s
|
21
|
+
|
22
|
+
# @overload to_s
|
23
|
+
#
|
24
|
+
# @return [String]
|
25
|
+
# The string representation of the decimal.
|
26
|
+
#
|
27
|
+
# @overload to_s(scale)
|
28
|
+
#
|
29
|
+
# @param scale [Integer] The scale of the decimal.
|
30
|
+
# @return [String]
|
31
|
+
# The string representation of the decimal including the scale.
|
32
|
+
#
|
33
|
+
# @since 0.13.0
|
34
|
+
def to_s(scale=nil)
|
35
|
+
if scale
|
36
|
+
to_string_scale(scale)
|
37
|
+
else
|
38
|
+
to_s_raw
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -56,7 +56,7 @@ module Arrow
|
|
56
56
|
when ::Array
|
57
57
|
append_value_raw
|
58
58
|
@value_builder ||= value_builder
|
59
|
-
@value_builder.
|
59
|
+
@value_builder.append(*value)
|
60
60
|
else
|
61
61
|
message = "list value must be nil or Array: #{value.inspect}"
|
62
62
|
raise ArgumentError, message
|
data/lib/arrow/loader.rb
CHANGED
@@ -28,11 +28,13 @@ module Arrow
|
|
28
28
|
private
|
29
29
|
def post_load(repository, namespace)
|
30
30
|
require_libraries
|
31
|
+
require_extension_library
|
31
32
|
end
|
32
33
|
|
33
34
|
def require_libraries
|
34
35
|
require "arrow/array"
|
35
36
|
require "arrow/array-builder"
|
37
|
+
require "arrow/binary-array-builder"
|
36
38
|
require "arrow/chunked-array"
|
37
39
|
require "arrow/column"
|
38
40
|
require "arrow/compression-type"
|
@@ -43,6 +45,7 @@ module Arrow
|
|
43
45
|
require "arrow/date32-array-builder"
|
44
46
|
require "arrow/date64-array"
|
45
47
|
require "arrow/date64-array-builder"
|
48
|
+
require "arrow/decimal128"
|
46
49
|
require "arrow/decimal128-array-builder"
|
47
50
|
require "arrow/decimal128-data-type"
|
48
51
|
require "arrow/dense-union-data-type"
|
@@ -51,6 +54,7 @@ module Arrow
|
|
51
54
|
require "arrow/file-output-stream"
|
52
55
|
require "arrow/list-array-builder"
|
53
56
|
require "arrow/list-data-type"
|
57
|
+
require "arrow/null-array-builder"
|
54
58
|
require "arrow/path-extension"
|
55
59
|
require "arrow/record"
|
56
60
|
require "arrow/record-batch"
|
@@ -79,6 +83,10 @@ module Arrow
|
|
79
83
|
require "arrow/writable"
|
80
84
|
end
|
81
85
|
|
86
|
+
def require_extension_library
|
87
|
+
require "arrow.so"
|
88
|
+
end
|
89
|
+
|
82
90
|
def load_object_info(info)
|
83
91
|
super
|
84
92
|
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class NullArrayBuilder
|
20
|
+
class << self
|
21
|
+
def buildable?(args)
|
22
|
+
super and args.collect(&:class) != [Integer]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -65,7 +65,7 @@ module Arrow
|
|
65
65
|
|
66
66
|
# @since 0.12.0
|
67
67
|
def append_records(records)
|
68
|
-
n =
|
68
|
+
n = n_columns
|
69
69
|
columns = n.times.collect do
|
70
70
|
[]
|
71
71
|
end
|
@@ -99,17 +99,16 @@ module Arrow
|
|
99
99
|
end
|
100
100
|
end
|
101
101
|
|
102
|
+
# @since 0.13.0
|
103
|
+
def column_builders
|
104
|
+
@column_builders ||= n_columns.times.collect do |i|
|
105
|
+
get_column_builder(i)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
102
109
|
private
|
103
110
|
def resolve_name(name)
|
104
111
|
@name_to_index[name.to_s]
|
105
112
|
end
|
106
|
-
|
107
|
-
# TODO: Make public with good name. Is column_builders good enough?
|
108
|
-
# builders? sub_builders?
|
109
|
-
def column_builders
|
110
|
-
@column_builders ||= n_fields.times.collect do |i|
|
111
|
-
get_field(i)
|
112
|
-
end
|
113
|
-
end
|
114
113
|
end
|
115
114
|
end
|