red-arrow 10.0.1 → 12.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/converters.hpp +45 -41
  4. data/ext/arrow/extconf.rb +14 -2
  5. data/ext/arrow/raw-records.cpp +1 -2
  6. data/ext/arrow/values.cpp +1 -2
  7. data/lib/arrow/array-computable.rb +13 -0
  8. data/lib/arrow/array.rb +5 -0
  9. data/lib/arrow/chunked-array.rb +23 -1
  10. data/lib/arrow/column-containable.rb +9 -0
  11. data/lib/arrow/column.rb +1 -0
  12. data/lib/arrow/data-type.rb +9 -0
  13. data/lib/arrow/dense-union-array-builder.rb +49 -0
  14. data/lib/arrow/dense-union-array.rb +26 -0
  15. data/lib/arrow/half-float-array-builder.rb +32 -0
  16. data/lib/arrow/half-float-array.rb +24 -0
  17. data/lib/arrow/half-float.rb +118 -0
  18. data/lib/arrow/input-referable.rb +29 -0
  19. data/lib/arrow/loader.rb +10 -0
  20. data/lib/arrow/raw-table-converter.rb +7 -5
  21. data/lib/arrow/record-batch-file-reader.rb +2 -0
  22. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  23. data/lib/arrow/record-batch.rb +6 -2
  24. data/lib/arrow/scalar.rb +67 -0
  25. data/lib/arrow/slicer.rb +61 -0
  26. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  27. data/lib/arrow/sparse-union-array.rb +26 -0
  28. data/lib/arrow/struct-array-builder.rb +0 -5
  29. data/lib/arrow/table-loader.rb +4 -4
  30. data/lib/arrow/table-saver.rb +1 -0
  31. data/lib/arrow/table.rb +178 -31
  32. data/lib/arrow/tensor.rb +4 -0
  33. data/lib/arrow/union-array-builder.rb +59 -0
  34. data/lib/arrow/version.rb +1 -1
  35. data/red-arrow.gemspec +1 -1
  36. data/test/raw-records/test-basic-arrays.rb +10 -0
  37. data/test/raw-records/test-dense-union-array.rb +90 -45
  38. data/test/raw-records/test-list-array.rb +28 -10
  39. data/test/raw-records/test-map-array.rb +39 -10
  40. data/test/raw-records/test-sparse-union-array.rb +86 -41
  41. data/test/raw-records/test-struct-array.rb +22 -8
  42. data/test/test-array.rb +7 -0
  43. data/test/test-chunked-array.rb +9 -0
  44. data/test/test-data-type.rb +2 -1
  45. data/test/test-dense-union-array.rb +42 -0
  46. data/test/test-dense-union-data-type.rb +1 -1
  47. data/test/test-function.rb +7 -7
  48. data/test/test-group.rb +58 -58
  49. data/test/test-half-float-array.rb +43 -0
  50. data/test/test-half-float.rb +130 -0
  51. data/test/test-record-batch-file-reader.rb +21 -0
  52. data/test/test-record-batch-stream-reader.rb +129 -0
  53. data/test/test-scalar.rb +65 -0
  54. data/test/test-slicer.rb +194 -129
  55. data/test/test-sparse-union-array.rb +38 -0
  56. data/test/test-table.rb +324 -40
  57. data/test/values/test-basic-arrays.rb +10 -0
  58. data/test/values/test-dense-union-array.rb +88 -45
  59. data/test/values/test-list-array.rb +26 -10
  60. data/test/values/test-map-array.rb +33 -10
  61. data/test/values/test-sparse-union-array.rb +84 -41
  62. data/test/values/test-struct-array.rb +20 -8
  63. metadata +30 -9
data/lib/arrow/loader.rb CHANGED
@@ -39,6 +39,7 @@ module Arrow
39
39
  require "arrow/field-containable"
40
40
  require "arrow/generic-filterable"
41
41
  require "arrow/generic-takeable"
42
+ require "arrow/input-referable"
42
43
  require "arrow/record-containable"
43
44
  require "arrow/symbol-values-appendable"
44
45
 
@@ -69,6 +70,8 @@ module Arrow
69
70
  require "arrow/decimal256-array"
70
71
  require "arrow/decimal256-array-builder"
71
72
  require "arrow/decimal256-data-type"
73
+ require "arrow/dense-union-array"
74
+ require "arrow/dense-union-array-builder"
72
75
  require "arrow/dense-union-data-type"
73
76
  require "arrow/dictionary-array"
74
77
  require "arrow/dictionary-data-type"
@@ -81,6 +84,9 @@ module Arrow
81
84
  require "arrow/fixed-size-binary-array-builder"
82
85
  require "arrow/function"
83
86
  require "arrow/group"
87
+ require "arrow/half-float"
88
+ require "arrow/half-float-array"
89
+ require "arrow/half-float-array-builder"
84
90
  require "arrow/list-array-builder"
85
91
  require "arrow/list-data-type"
86
92
  require "arrow/map-array"
@@ -105,6 +111,8 @@ module Arrow
105
111
  require "arrow/sort-key"
106
112
  require "arrow/sort-options"
107
113
  require "arrow/source-node-options"
114
+ require "arrow/sparse-union-array"
115
+ require "arrow/sparse-union-array-builder"
108
116
  require "arrow/sparse-union-data-type"
109
117
  require "arrow/string-dictionary-array-builder"
110
118
  require "arrow/string-array-builder"
@@ -130,6 +138,7 @@ module Arrow
130
138
  require "arrow/timestamp-array"
131
139
  require "arrow/timestamp-array-builder"
132
140
  require "arrow/timestamp-data-type"
141
+ require "arrow/union-array-builder"
133
142
  require "arrow/writable"
134
143
  end
135
144
 
@@ -196,6 +205,7 @@ module Arrow
196
205
  "Arrow::Date64Array",
197
206
  "Arrow::Decimal128Array",
198
207
  "Arrow::Decimal256Array",
208
+ "Arrow::HalfFloatArray",
199
209
  "Arrow::Time32Array",
200
210
  "Arrow::Time64Array",
201
211
  "Arrow::TimestampArray"
@@ -35,14 +35,16 @@ module Arrow
35
35
  fields = []
36
36
  @values = []
37
37
  @raw_table.each do |name, array|
38
- if array.respond_to?(:to_arrow_array)
39
- array = array.to_arrow_array
38
+ if array.respond_to?(:to_arrow_chunked_array)
39
+ chunked_array = array.to_arrow_chunked_array
40
+ elsif array.respond_to?(:to_arrow_array)
41
+ chunked_array = ChunkedArray.new([array.to_arrow_array])
40
42
  else
41
43
  array = array.to_ary if array.respond_to?(:to_ary)
42
- array = ArrayBuilder.build(array)
44
+ chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
43
45
  end
44
- fields << Field.new(name.to_s, array.value_data_type)
45
- @values << array
46
+ fields << Field.new(name.to_s, chunked_array.value_data_type)
47
+ @values << chunked_array
46
48
  end
47
49
  @schema = Schema.new(fields)
48
50
  end
@@ -20,6 +20,8 @@ module Arrow
20
20
  include Enumerable
21
21
 
22
22
  def each
23
+ return to_enum(__method__) {n_record_batches} unless block_given?
24
+
23
25
  n_record_batches.times do |i|
24
26
  yield(get_record_batch(i))
25
27
  end
@@ -20,6 +20,8 @@ module Arrow
20
20
  include Enumerable
21
21
 
22
22
  def each
23
+ return to_enum(__method__) unless block_given?
24
+
23
25
  loop do
24
26
  record_batch = next_record_batch
25
27
  break if record_batch.nil?
@@ -19,9 +19,11 @@ require "arrow/raw-table-converter"
19
19
 
20
20
  module Arrow
21
21
  class RecordBatch
22
+ include Enumerable
23
+
22
24
  include ColumnContainable
25
+ include InputReferable
23
26
  include RecordContainable
24
- include Enumerable
25
27
 
26
28
  class << self
27
29
  def new(*args)
@@ -56,7 +58,9 @@ module Arrow
56
58
  #
57
59
  # @since 0.12.0
58
60
  def to_table
59
- Table.new(schema, [self])
61
+ table = Table.new(schema, [self])
62
+ share_input(table)
63
+ table
60
64
  end
61
65
 
62
66
  def respond_to_missing?(name, include_private)
data/lib/arrow/scalar.rb CHANGED
@@ -17,6 +17,73 @@
17
17
 
18
18
  module Arrow
19
19
  class Scalar
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when self
25
+ value
26
+ when true, false
27
+ BooleanScalar.new(value)
28
+ when Symbol, String
29
+ StringScalar.new(value.to_s)
30
+ when Integer
31
+ Int64Scalar.new(value)
32
+ when Float
33
+ DoubleScalar.new(value)
34
+ else
35
+ nil
36
+ end
37
+ end
38
+
39
+ # Ensure returning suitable {Arrow::Scalar}.
40
+ #
41
+ # @overload resolve(scalar)
42
+ #
43
+ # Returns the given scalar itself. This is convenient to
44
+ # use this method as {Arrow::Scalar} converter.
45
+ #
46
+ # @param scalar [Arrow::Scalar] The scalar.
47
+ #
48
+ # @return [Arrow::Scalar] The given scalar itself.
49
+ #
50
+ # @overload resolve(value)
51
+ #
52
+ # Creates a suitable scalar from the given value. For example,
53
+ # you can create {Arrow::BooleanScalar} from `true`.
54
+ #
55
+ # @param value [Object] The value.
56
+ #
57
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
58
+ #
59
+ # @overload resolve(value, data_type)
60
+ #
61
+ # Creates a scalar of `data_type.scalar_class` from the given
62
+ # value. For example, you can create {Arrow::Int32Scalar} from
63
+ # `29` and {Arrow::Int32DataType}.
64
+ #
65
+ # @param value [Object] The value.
66
+ #
67
+ # @param data_type [Arrow::DataType] The {Arrow::DataType} to
68
+ # decide the returned scalar class.
69
+ #
70
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
71
+ #
72
+ # @since 12.0.0
73
+ def resolve(value, data_type=nil)
74
+ return try_convert(value) if data_type.nil?
75
+
76
+ data_type = DataType.resolve(data_type)
77
+ scalar_class = data_type.scalar_class
78
+ case value
79
+ when Scalar
80
+ return value if value.class == scalar_class
81
+ value = value.value
82
+ end
83
+ scalar_class.new(value)
84
+ end
85
+ end
86
+
20
87
  # @param other [Arrow::Scalar] The scalar to be compared.
21
88
  # @param options [Arrow::EqualOptions, Hash] (nil)
22
89
  # The options to custom how to compare.
data/lib/arrow/slicer.rb CHANGED
@@ -162,6 +162,40 @@ module Arrow
162
162
  def reject(&block)
163
163
  RejectCondition.new(@column, block)
164
164
  end
165
+
166
+ def end_with?(substring, ignore_case: false)
167
+ MatchSubstringFamilyCondition.new("ends_with",
168
+ @column, substring, ignore_case)
169
+ end
170
+
171
+ def match_like?(pattern, ignore_case: false)
172
+ MatchSubstringFamilyCondition.new("match_like",
173
+ @column, pattern, ignore_case)
174
+ end
175
+
176
+ def match_substring?(pattern, ignore_case: nil)
177
+ case pattern
178
+ when String
179
+ ignore_case = false if ignore_case.nil?
180
+ MatchSubstringFamilyCondition.new("match_substring",
181
+ @column, pattern, ignore_case)
182
+ when Regexp
183
+ ignore_case = pattern.casefold? if ignore_case.nil?
184
+ MatchSubstringFamilyCondition.new("match_substring_regex",
185
+ @column,
186
+ pattern.source,
187
+ ignore_case)
188
+ else
189
+ message =
190
+ "pattern must be either String or Regexp: #{pattern.inspect}"
191
+ raise ArgumentError, message
192
+ end
193
+ end
194
+
195
+ def start_with?(substring, ignore_case: false)
196
+ MatchSubstringFamilyCondition.new("starts_with",
197
+ @column, substring, ignore_case)
198
+ end
165
199
  end
166
200
 
167
201
  class NotColumnCondition < Condition
@@ -351,5 +385,32 @@ module Arrow
351
385
  BooleanArray.new(raw_array)
352
386
  end
353
387
  end
388
+
389
+ class MatchSubstringFamilyCondition < Condition
390
+ def initialize(function, column, pattern, ignore_case, invert: false)
391
+ @function = function
392
+ @column = column
393
+ @options = MatchSubstringOptions.new
394
+ @options.pattern = pattern
395
+ @options.ignore_case = ignore_case
396
+ @invert = invert
397
+ end
398
+
399
+ def !@
400
+ MatchSubstringFamilyCondition.new(@function,
401
+ @column,
402
+ @options.pattern,
403
+ @options.ignore_case?,
404
+ invert: !@invert)
405
+ end
406
+
407
+ def evaluate
408
+ datum = Function.find(@function).execute([@column.data], @options)
409
+ if @invert
410
+ datum = Function.find("invert").execute([datum])
411
+ end
412
+ datum.value
413
+ end
414
+ end
354
415
  end
355
416
  end
@@ -0,0 +1,56 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SparseUnionArrayBuilder
20
+ alias_method :append_value_raw, :append_value
21
+
22
+ # @overload append_value
23
+ #
24
+ # Starts appending an union record. You need to append values of
25
+ # fields.
26
+ #
27
+ # @overload append_value(value)
28
+ #
29
+ # Appends an union record including values of fields.
30
+ #
31
+ # @param value [nil, Hash] The union record value.
32
+ #
33
+ # If this is `nil`, the union record is null.
34
+ #
35
+ # If this is `Hash`, it's values of fields.
36
+ #
37
+ # @since 12.0.0
38
+ def append_value(value)
39
+ if value.nil?
40
+ append_null
41
+ else
42
+ key = value.keys[0]
43
+ child_info = child_infos[key]
44
+ append_value_raw(child_info[:id])
45
+ child_infos.each do |child_key, child_info|
46
+ builder = child_info[:builder]
47
+ if child_key == key
48
+ builder.append(value.values[0])
49
+ else
50
+ builder.append_null
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SparseUnionArray
20
+ def get_value(i)
21
+ child_id = get_child_id(i)
22
+ field = get_field(child_id)
23
+ field[i]
24
+ end
25
+ end
26
+ end
@@ -110,11 +110,6 @@ module Arrow
110
110
  end
111
111
  end
112
112
 
113
- alias_method :append_null_raw, :append_null
114
- def append_null
115
- append_null_raw
116
- end
117
-
118
113
  # @since 0.12.0
119
114
  def append(*values)
120
115
  if values.empty?
@@ -161,7 +161,7 @@ module Arrow
161
161
  record_batches << record_batch
162
162
  end
163
163
  table = Table.new(schema, record_batches)
164
- table.instance_variable_set(:@input, input)
164
+ table.refer_input(input)
165
165
  table
166
166
  end
167
167
 
@@ -211,7 +211,7 @@ module Arrow
211
211
  field_indexes = @options[:field_indexes]
212
212
  reader.set_field_indexes(field_indexes) if field_indexes
213
213
  table = reader.read_stripes
214
- table.instance_variable_set(:@input, input)
214
+ table.refer_input(input)
215
215
  table
216
216
  end
217
217
  end
@@ -245,7 +245,7 @@ module Arrow
245
245
  open_input_stream do |input|
246
246
  reader = FeatherFileReader.new(input)
247
247
  table = reader.read
248
- table.instance_variable_set(:@input, input)
248
+ table.refer_input(input)
249
249
  table
250
250
  end
251
251
  end
@@ -254,7 +254,7 @@ module Arrow
254
254
  open_input_stream do |input|
255
255
  reader = JSONReader.new(input)
256
256
  table = reader.read
257
- table.instance_variable_set(:@input, input)
257
+ table.refer_input(input)
258
258
  table
259
259
  end
260
260
  end
@@ -51,6 +51,7 @@ module Arrow
51
51
  raise ArgumentError, message
52
52
  end
53
53
  __send__(custom_save_method)
54
+ @table
54
55
  end
55
56
 
56
57
  private