red-arrow 10.0.0 → 16.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/arrow/arrow.cpp +31 -0
- data/ext/arrow/converters.hpp +45 -41
- data/ext/arrow/extconf.rb +16 -4
- data/ext/arrow/raw-records.cpp +155 -2
- data/ext/arrow/red-arrow.hpp +2 -0
- data/ext/arrow/values.cpp +1 -2
- data/lib/arrow/array-computable.rb +13 -0
- data/lib/arrow/array.rb +6 -1
- data/lib/arrow/chunked-array.rb +35 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +1 -0
- data/lib/arrow/data-type.rb +9 -0
- data/lib/arrow/dense-union-array-builder.rb +49 -0
- data/lib/arrow/dense-union-array.rb +26 -0
- data/lib/arrow/expression.rb +6 -2
- data/lib/arrow/function.rb +0 -1
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/lib/arrow/input-referable.rb +29 -0
- data/lib/arrow/loader.rb +11 -0
- data/lib/arrow/raw-table-converter.rb +7 -5
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sort-key.rb +3 -3
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-loader.rb +11 -5
- data/lib/arrow/table-saver.rb +1 -0
- data/lib/arrow/table.rb +180 -33
- data/lib/arrow/tensor.rb +4 -0
- data/lib/arrow/timestamp-parser.rb +33 -0
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +2 -1
- data/test/each-raw-record/test-basic-arrays.rb +411 -0
- data/test/each-raw-record/test-dense-union-array.rb +566 -0
- data/test/each-raw-record/test-dictionary-array.rb +341 -0
- data/test/each-raw-record/test-list-array.rb +628 -0
- data/test/each-raw-record/test-map-array.rb +507 -0
- data/test/each-raw-record/test-multiple-columns.rb +72 -0
- data/test/each-raw-record/test-sparse-union-array.rb +528 -0
- data/test/each-raw-record/test-struct-array.rb +529 -0
- data/test/each-raw-record/test-table.rb +47 -0
- data/test/helper/omittable.rb +13 -0
- data/test/helper.rb +1 -0
- data/test/raw-records/test-basic-arrays.rb +11 -1
- data/test/raw-records/test-dense-union-array.rb +90 -45
- data/test/raw-records/test-list-array.rb +28 -10
- data/test/raw-records/test-map-array.rb +39 -10
- data/test/raw-records/test-sparse-union-array.rb +86 -41
- data/test/raw-records/test-struct-array.rb +22 -8
- data/test/test-array.rb +7 -0
- data/test/test-chunked-array.rb +9 -0
- data/test/test-csv-loader.rb +39 -0
- data/test/test-data-type.rb +2 -1
- data/test/test-dense-union-array.rb +42 -0
- data/test/test-dense-union-data-type.rb +1 -1
- data/test/test-expression.rb +11 -0
- data/test/test-function.rb +7 -7
- data/test/test-group.rb +58 -58
- data/test/test-half-float-array.rb +43 -0
- data/test/test-half-float.rb +130 -0
- data/test/test-ractor.rb +34 -0
- data/test/test-record-batch-file-reader.rb +21 -0
- data/test/test-record-batch-stream-reader.rb +129 -0
- data/test/test-scalar.rb +65 -0
- data/test/test-slicer.rb +194 -129
- data/test/test-sparse-union-array.rb +38 -0
- data/test/test-table.rb +356 -40
- data/test/values/test-basic-arrays.rb +10 -0
- data/test/values/test-dense-union-array.rb +88 -45
- data/test/values/test-list-array.rb +26 -10
- data/test/values/test-map-array.rb +33 -10
- data/test/values/test-sparse-union-array.rb +84 -41
- data/test/values/test-struct-array.rb +20 -8
- metadata +62 -9
@@ -0,0 +1,26 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class DenseUnionArray
|
20
|
+
def get_value(i)
|
21
|
+
child_id = get_child_id(i)
|
22
|
+
field = get_field(child_id)
|
23
|
+
field[get_value_offset(i)]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/arrow/expression.rb
CHANGED
@@ -31,10 +31,14 @@ module Arrow
|
|
31
31
|
else
|
32
32
|
return nil
|
33
33
|
end
|
34
|
+
options = nil
|
34
35
|
if arguments.last.is_a?(FunctionOptions)
|
35
36
|
options = arguments.pop
|
36
|
-
|
37
|
-
|
37
|
+
elsif arguments.last.is_a?(Hash)
|
38
|
+
function = Function.find(function_name)
|
39
|
+
if function
|
40
|
+
options = function.resolve_options(arguments.pop)
|
41
|
+
end
|
38
42
|
end
|
39
43
|
CallExpression.new(function_name, arguments, options)
|
40
44
|
else
|
data/lib/arrow/function.rb
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class HalfFloatArrayBuilder
|
20
|
+
private
|
21
|
+
def convert_to_arrow_value(value)
|
22
|
+
case value
|
23
|
+
when Float
|
24
|
+
HalfFloat.new(value).to_uint16
|
25
|
+
when HalfFloat
|
26
|
+
value.to_uint16
|
27
|
+
else
|
28
|
+
value
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class HalfFloatArray
|
20
|
+
def get_value(i)
|
21
|
+
HalfFloat.new(get_raw_value(i)).to_f
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class HalfFloat
|
20
|
+
MAX = 65504
|
21
|
+
MIN = -65504
|
22
|
+
EXPONENT_N_BITS = 5
|
23
|
+
EXPONENT_MASK = (2 ** EXPONENT_N_BITS) - 1
|
24
|
+
EXPONENT_BIAS = 15
|
25
|
+
FRACTION_N_BITS = 10
|
26
|
+
FRACTION_MASK = (2 ** FRACTION_N_BITS) - 1
|
27
|
+
FRACTION_DENOMINATOR = 2.0 ** FRACTION_N_BITS
|
28
|
+
|
29
|
+
attr_reader :sign
|
30
|
+
attr_reader :exponent
|
31
|
+
attr_reader :fraction
|
32
|
+
def initialize(*args)
|
33
|
+
n_args = args.size
|
34
|
+
case n_args
|
35
|
+
when 1
|
36
|
+
if args[0].is_a?(Float)
|
37
|
+
@sign, @exponent, @fraction = deconstruct_float(args[0])
|
38
|
+
else
|
39
|
+
@sign, @exponent, @fraction = deconstruct_uint16(args[0])
|
40
|
+
end
|
41
|
+
when 3
|
42
|
+
@sign, @exponent, @fraction = *args
|
43
|
+
else
|
44
|
+
message = "wrong number of arguments (given #{n_args}, expected 1 or 3)"
|
45
|
+
raise ArgumentError, message
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_f
|
50
|
+
if @exponent == EXPONENT_MASK
|
51
|
+
if @sign.zero?
|
52
|
+
Float::INFINITY
|
53
|
+
else
|
54
|
+
-Float::INFINITY
|
55
|
+
end
|
56
|
+
else
|
57
|
+
if @exponent.zero?
|
58
|
+
implicit_fraction = 0
|
59
|
+
else
|
60
|
+
implicit_fraction = 1
|
61
|
+
end
|
62
|
+
((-1) ** @sign) *
|
63
|
+
(2 ** (@exponent - EXPONENT_BIAS)) *
|
64
|
+
(implicit_fraction + @fraction / FRACTION_DENOMINATOR)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def to_uint16
|
69
|
+
(@sign << (EXPONENT_N_BITS + FRACTION_N_BITS)) ^
|
70
|
+
(@exponent << FRACTION_N_BITS) ^
|
71
|
+
@fraction
|
72
|
+
end
|
73
|
+
|
74
|
+
def pack
|
75
|
+
[to_uint16].pack("S")
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
def deconstruct_float(float)
|
80
|
+
if float > MAX
|
81
|
+
float = Float::INFINITY
|
82
|
+
elsif float < MIN
|
83
|
+
float = -Float::INFINITY
|
84
|
+
end
|
85
|
+
is_infinite = float.infinite?
|
86
|
+
if is_infinite
|
87
|
+
sign = (is_infinite == 1) ? 0 : 1
|
88
|
+
exponent = EXPONENT_MASK
|
89
|
+
fraction = 0
|
90
|
+
elsif float.zero?
|
91
|
+
sign = 0
|
92
|
+
exponent = 0
|
93
|
+
fraction = 0
|
94
|
+
else
|
95
|
+
sign = (float.positive? ? 0 : 1)
|
96
|
+
float_abs = float.abs
|
97
|
+
1.upto(EXPONENT_MASK) do |e|
|
98
|
+
next_exponent_value = 2 ** (e + 1 - EXPONENT_BIAS)
|
99
|
+
next if float_abs > next_exponent_value
|
100
|
+
exponent = e
|
101
|
+
exponent_value = 2 ** (e - EXPONENT_BIAS)
|
102
|
+
fraction =
|
103
|
+
((float_abs / exponent_value - 1) * FRACTION_DENOMINATOR).round
|
104
|
+
break
|
105
|
+
end
|
106
|
+
end
|
107
|
+
[sign, exponent, fraction]
|
108
|
+
end
|
109
|
+
|
110
|
+
def deconstruct_uint16(uint16)
|
111
|
+
# | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
|
112
|
+
sign = (uint16 >> (EXPONENT_N_BITS + FRACTION_N_BITS))
|
113
|
+
exponent = ((uint16 >> FRACTION_N_BITS) & EXPONENT_MASK)
|
114
|
+
fraction = (uint16 & FRACTION_MASK)
|
115
|
+
[sign, exponent, fraction]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module InputReferable
|
20
|
+
def refer_input(input)
|
21
|
+
@input = input
|
22
|
+
end
|
23
|
+
|
24
|
+
def share_input(other)
|
25
|
+
return unless defined?(@input)
|
26
|
+
other.refer_input(@input)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/arrow/loader.rb
CHANGED
@@ -39,6 +39,7 @@ module Arrow
|
|
39
39
|
require "arrow/field-containable"
|
40
40
|
require "arrow/generic-filterable"
|
41
41
|
require "arrow/generic-takeable"
|
42
|
+
require "arrow/input-referable"
|
42
43
|
require "arrow/record-containable"
|
43
44
|
require "arrow/symbol-values-appendable"
|
44
45
|
|
@@ -69,6 +70,8 @@ module Arrow
|
|
69
70
|
require "arrow/decimal256-array"
|
70
71
|
require "arrow/decimal256-array-builder"
|
71
72
|
require "arrow/decimal256-data-type"
|
73
|
+
require "arrow/dense-union-array"
|
74
|
+
require "arrow/dense-union-array-builder"
|
72
75
|
require "arrow/dense-union-data-type"
|
73
76
|
require "arrow/dictionary-array"
|
74
77
|
require "arrow/dictionary-data-type"
|
@@ -81,6 +84,9 @@ module Arrow
|
|
81
84
|
require "arrow/fixed-size-binary-array-builder"
|
82
85
|
require "arrow/function"
|
83
86
|
require "arrow/group"
|
87
|
+
require "arrow/half-float"
|
88
|
+
require "arrow/half-float-array"
|
89
|
+
require "arrow/half-float-array-builder"
|
84
90
|
require "arrow/list-array-builder"
|
85
91
|
require "arrow/list-data-type"
|
86
92
|
require "arrow/map-array"
|
@@ -105,6 +111,8 @@ module Arrow
|
|
105
111
|
require "arrow/sort-key"
|
106
112
|
require "arrow/sort-options"
|
107
113
|
require "arrow/source-node-options"
|
114
|
+
require "arrow/sparse-union-array"
|
115
|
+
require "arrow/sparse-union-array-builder"
|
108
116
|
require "arrow/sparse-union-data-type"
|
109
117
|
require "arrow/string-dictionary-array-builder"
|
110
118
|
require "arrow/string-array-builder"
|
@@ -130,6 +138,8 @@ module Arrow
|
|
130
138
|
require "arrow/timestamp-array"
|
131
139
|
require "arrow/timestamp-array-builder"
|
132
140
|
require "arrow/timestamp-data-type"
|
141
|
+
require "arrow/timestamp-parser"
|
142
|
+
require "arrow/union-array-builder"
|
133
143
|
require "arrow/writable"
|
134
144
|
end
|
135
145
|
|
@@ -196,6 +206,7 @@ module Arrow
|
|
196
206
|
"Arrow::Date64Array",
|
197
207
|
"Arrow::Decimal128Array",
|
198
208
|
"Arrow::Decimal256Array",
|
209
|
+
"Arrow::HalfFloatArray",
|
199
210
|
"Arrow::Time32Array",
|
200
211
|
"Arrow::Time64Array",
|
201
212
|
"Arrow::TimestampArray"
|
@@ -35,14 +35,16 @@ module Arrow
|
|
35
35
|
fields = []
|
36
36
|
@values = []
|
37
37
|
@raw_table.each do |name, array|
|
38
|
-
if array.respond_to?(:
|
39
|
-
|
38
|
+
if array.respond_to?(:to_arrow_chunked_array)
|
39
|
+
chunked_array = array.to_arrow_chunked_array
|
40
|
+
elsif array.respond_to?(:to_arrow_array)
|
41
|
+
chunked_array = ChunkedArray.new([array.to_arrow_array])
|
40
42
|
else
|
41
43
|
array = array.to_ary if array.respond_to?(:to_ary)
|
42
|
-
|
44
|
+
chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
|
43
45
|
end
|
44
|
-
fields << Field.new(name.to_s,
|
45
|
-
@values <<
|
46
|
+
fields << Field.new(name.to_s, chunked_array.value_data_type)
|
47
|
+
@values << chunked_array
|
46
48
|
end
|
47
49
|
@schema = Schema.new(fields)
|
48
50
|
end
|
data/lib/arrow/record-batch.rb
CHANGED
@@ -19,9 +19,11 @@ require "arrow/raw-table-converter"
|
|
19
19
|
|
20
20
|
module Arrow
|
21
21
|
class RecordBatch
|
22
|
+
include Enumerable
|
23
|
+
|
22
24
|
include ColumnContainable
|
25
|
+
include InputReferable
|
23
26
|
include RecordContainable
|
24
|
-
include Enumerable
|
25
27
|
|
26
28
|
class << self
|
27
29
|
def new(*args)
|
@@ -56,7 +58,9 @@ module Arrow
|
|
56
58
|
#
|
57
59
|
# @since 0.12.0
|
58
60
|
def to_table
|
59
|
-
Table.new(schema, [self])
|
61
|
+
table = Table.new(schema, [self])
|
62
|
+
share_input(table)
|
63
|
+
table
|
60
64
|
end
|
61
65
|
|
62
66
|
def respond_to_missing?(name, include_private)
|
data/lib/arrow/scalar.rb
CHANGED
@@ -17,6 +17,73 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Scalar
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when self
|
25
|
+
value
|
26
|
+
when true, false
|
27
|
+
BooleanScalar.new(value)
|
28
|
+
when Symbol, String
|
29
|
+
StringScalar.new(value.to_s)
|
30
|
+
when Integer
|
31
|
+
Int64Scalar.new(value)
|
32
|
+
when Float
|
33
|
+
DoubleScalar.new(value)
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Ensure returning suitable {Arrow::Scalar}.
|
40
|
+
#
|
41
|
+
# @overload resolve(scalar)
|
42
|
+
#
|
43
|
+
# Returns the given scalar itself. This is convenient to
|
44
|
+
# use this method as {Arrow::Scalar} converter.
|
45
|
+
#
|
46
|
+
# @param scalar [Arrow::Scalar] The scalar.
|
47
|
+
#
|
48
|
+
# @return [Arrow::Scalar] The given scalar itself.
|
49
|
+
#
|
50
|
+
# @overload resolve(value)
|
51
|
+
#
|
52
|
+
# Creates a suitable scalar from the given value. For example,
|
53
|
+
# you can create {Arrow::BooleanScalar} from `true`.
|
54
|
+
#
|
55
|
+
# @param value [Object] The value.
|
56
|
+
#
|
57
|
+
# @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
|
58
|
+
#
|
59
|
+
# @overload resolve(value, data_type)
|
60
|
+
#
|
61
|
+
# Creates a scalar of `data_type.scalar_class` from the given
|
62
|
+
# value. For example, you can create {Arrow::Int32Scalar} from
|
63
|
+
# `29` and {Arrow::Int32DataType}.
|
64
|
+
#
|
65
|
+
# @param value [Object] The value.
|
66
|
+
#
|
67
|
+
# @param data_type [Arrow::DataType] The {Arrow::DataType} to
|
68
|
+
# decide the returned scalar class.
|
69
|
+
#
|
70
|
+
# @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
|
71
|
+
#
|
72
|
+
# @since 12.0.0
|
73
|
+
def resolve(value, data_type=nil)
|
74
|
+
return try_convert(value) if data_type.nil?
|
75
|
+
|
76
|
+
data_type = DataType.resolve(data_type)
|
77
|
+
scalar_class = data_type.scalar_class
|
78
|
+
case value
|
79
|
+
when Scalar
|
80
|
+
return value if value.class == scalar_class
|
81
|
+
value = value.value
|
82
|
+
end
|
83
|
+
scalar_class.new(value)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
20
87
|
# @param other [Arrow::Scalar] The scalar to be compared.
|
21
88
|
# @param options [Arrow::EqualOptions, Hash] (nil)
|
22
89
|
# The options to custom how to compare.
|
data/lib/arrow/slicer.rb
CHANGED
@@ -162,6 +162,40 @@ module Arrow
|
|
162
162
|
def reject(&block)
|
163
163
|
RejectCondition.new(@column, block)
|
164
164
|
end
|
165
|
+
|
166
|
+
def end_with?(substring, ignore_case: false)
|
167
|
+
MatchSubstringFamilyCondition.new("ends_with",
|
168
|
+
@column, substring, ignore_case)
|
169
|
+
end
|
170
|
+
|
171
|
+
def match_like?(pattern, ignore_case: false)
|
172
|
+
MatchSubstringFamilyCondition.new("match_like",
|
173
|
+
@column, pattern, ignore_case)
|
174
|
+
end
|
175
|
+
|
176
|
+
def match_substring?(pattern, ignore_case: nil)
|
177
|
+
case pattern
|
178
|
+
when String
|
179
|
+
ignore_case = false if ignore_case.nil?
|
180
|
+
MatchSubstringFamilyCondition.new("match_substring",
|
181
|
+
@column, pattern, ignore_case)
|
182
|
+
when Regexp
|
183
|
+
ignore_case = pattern.casefold? if ignore_case.nil?
|
184
|
+
MatchSubstringFamilyCondition.new("match_substring_regex",
|
185
|
+
@column,
|
186
|
+
pattern.source,
|
187
|
+
ignore_case)
|
188
|
+
else
|
189
|
+
message =
|
190
|
+
"pattern must be either String or Regexp: #{pattern.inspect}"
|
191
|
+
raise ArgumentError, message
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def start_with?(substring, ignore_case: false)
|
196
|
+
MatchSubstringFamilyCondition.new("starts_with",
|
197
|
+
@column, substring, ignore_case)
|
198
|
+
end
|
165
199
|
end
|
166
200
|
|
167
201
|
class NotColumnCondition < Condition
|
@@ -351,5 +385,32 @@ module Arrow
|
|
351
385
|
BooleanArray.new(raw_array)
|
352
386
|
end
|
353
387
|
end
|
388
|
+
|
389
|
+
class MatchSubstringFamilyCondition < Condition
|
390
|
+
def initialize(function, column, pattern, ignore_case, invert: false)
|
391
|
+
@function = function
|
392
|
+
@column = column
|
393
|
+
@options = MatchSubstringOptions.new
|
394
|
+
@options.pattern = pattern
|
395
|
+
@options.ignore_case = ignore_case
|
396
|
+
@invert = invert
|
397
|
+
end
|
398
|
+
|
399
|
+
def !@
|
400
|
+
MatchSubstringFamilyCondition.new(@function,
|
401
|
+
@column,
|
402
|
+
@options.pattern,
|
403
|
+
@options.ignore_case?,
|
404
|
+
invert: !@invert)
|
405
|
+
end
|
406
|
+
|
407
|
+
def evaluate
|
408
|
+
datum = Function.find(@function).execute([@column.data], @options)
|
409
|
+
if @invert
|
410
|
+
datum = Function.find("invert").execute([datum])
|
411
|
+
end
|
412
|
+
datum.value
|
413
|
+
end
|
414
|
+
end
|
354
415
|
end
|
355
416
|
end
|
data/lib/arrow/sort-key.rb
CHANGED
@@ -79,9 +79,9 @@ module Arrow
|
|
79
79
|
# target and corresponding order is used. `"+"` uses ascending
|
80
80
|
# order and `"-"` uses ascending order.
|
81
81
|
#
|
82
|
-
# If `target` is not a String
|
83
|
-
# leading order mark, sort column
|
84
|
-
# ascending order is used.
|
82
|
+
# If `target` is either not a String or `target` doesn't start
|
83
|
+
# with the leading order mark, sort column is `target` as-is
|
84
|
+
# and ascending order is used.
|
85
85
|
#
|
86
86
|
# @example String without the leading order mark
|
87
87
|
# key = Arrow::SortKey.new("count")
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SparseUnionArrayBuilder
|
20
|
+
alias_method :append_value_raw, :append_value
|
21
|
+
|
22
|
+
# @overload append_value
|
23
|
+
#
|
24
|
+
# Starts appending an union record. You need to append values of
|
25
|
+
# fields.
|
26
|
+
#
|
27
|
+
# @overload append_value(value)
|
28
|
+
#
|
29
|
+
# Appends an union record including values of fields.
|
30
|
+
#
|
31
|
+
# @param value [nil, Hash] The union record value.
|
32
|
+
#
|
33
|
+
# If this is `nil`, the union record is null.
|
34
|
+
#
|
35
|
+
# If this is `Hash`, it's values of fields.
|
36
|
+
#
|
37
|
+
# @since 12.0.0
|
38
|
+
def append_value(value)
|
39
|
+
if value.nil?
|
40
|
+
append_null
|
41
|
+
else
|
42
|
+
key = value.keys[0]
|
43
|
+
child_info = child_infos[key]
|
44
|
+
append_value_raw(child_info[:id])
|
45
|
+
child_infos.each do |child_key, child_info|
|
46
|
+
builder = child_info[:builder]
|
47
|
+
if child_key == key
|
48
|
+
builder.append(value.values[0])
|
49
|
+
else
|
50
|
+
builder.append_null
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SparseUnionArray
|
20
|
+
def get_value(i)
|
21
|
+
child_id = get_child_id(i)
|
22
|
+
field = get_field(child_id)
|
23
|
+
field[i]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/arrow/table-loader.rb
CHANGED
@@ -161,7 +161,7 @@ module Arrow
|
|
161
161
|
record_batches << record_batch
|
162
162
|
end
|
163
163
|
table = Table.new(schema, record_batches)
|
164
|
-
table.
|
164
|
+
table.refer_input(input)
|
165
165
|
table
|
166
166
|
end
|
167
167
|
|
@@ -211,7 +211,7 @@ module Arrow
|
|
211
211
|
field_indexes = @options[:field_indexes]
|
212
212
|
reader.set_field_indexes(field_indexes) if field_indexes
|
213
213
|
table = reader.read_stripes
|
214
|
-
table.
|
214
|
+
table.refer_input(input)
|
215
215
|
table
|
216
216
|
end
|
217
217
|
end
|
@@ -245,16 +245,22 @@ module Arrow
|
|
245
245
|
open_input_stream do |input|
|
246
246
|
reader = FeatherFileReader.new(input)
|
247
247
|
table = reader.read
|
248
|
-
table.
|
248
|
+
table.refer_input(input)
|
249
249
|
table
|
250
250
|
end
|
251
251
|
end
|
252
252
|
|
253
253
|
def load_as_json
|
254
254
|
open_input_stream do |input|
|
255
|
-
|
255
|
+
options = JSONReadOptions.new
|
256
|
+
@options.each do |key, value|
|
257
|
+
next if value.nil?
|
258
|
+
setter = :"#{key}="
|
259
|
+
options.__send__(setter, value) if options.respond_to?(setter)
|
260
|
+
end
|
261
|
+
reader = JSONReader.new(input, options)
|
256
262
|
table = reader.read
|
257
|
-
table.
|
263
|
+
table.refer_input(input)
|
258
264
|
table
|
259
265
|
end
|
260
266
|
end
|