red-arrow 10.0.1 → 12.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/converters.hpp +45 -41
  4. data/ext/arrow/extconf.rb +14 -2
  5. data/ext/arrow/raw-records.cpp +1 -2
  6. data/ext/arrow/values.cpp +1 -2
  7. data/lib/arrow/array-computable.rb +13 -0
  8. data/lib/arrow/array.rb +5 -0
  9. data/lib/arrow/chunked-array.rb +23 -1
  10. data/lib/arrow/column-containable.rb +9 -0
  11. data/lib/arrow/column.rb +1 -0
  12. data/lib/arrow/data-type.rb +9 -0
  13. data/lib/arrow/dense-union-array-builder.rb +49 -0
  14. data/lib/arrow/dense-union-array.rb +26 -0
  15. data/lib/arrow/half-float-array-builder.rb +32 -0
  16. data/lib/arrow/half-float-array.rb +24 -0
  17. data/lib/arrow/half-float.rb +118 -0
  18. data/lib/arrow/input-referable.rb +29 -0
  19. data/lib/arrow/loader.rb +10 -0
  20. data/lib/arrow/raw-table-converter.rb +7 -5
  21. data/lib/arrow/record-batch-file-reader.rb +2 -0
  22. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  23. data/lib/arrow/record-batch.rb +6 -2
  24. data/lib/arrow/scalar.rb +67 -0
  25. data/lib/arrow/slicer.rb +61 -0
  26. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  27. data/lib/arrow/sparse-union-array.rb +26 -0
  28. data/lib/arrow/struct-array-builder.rb +0 -5
  29. data/lib/arrow/table-loader.rb +4 -4
  30. data/lib/arrow/table-saver.rb +1 -0
  31. data/lib/arrow/table.rb +178 -31
  32. data/lib/arrow/tensor.rb +4 -0
  33. data/lib/arrow/union-array-builder.rb +59 -0
  34. data/lib/arrow/version.rb +1 -1
  35. data/red-arrow.gemspec +1 -1
  36. data/test/raw-records/test-basic-arrays.rb +10 -0
  37. data/test/raw-records/test-dense-union-array.rb +90 -45
  38. data/test/raw-records/test-list-array.rb +28 -10
  39. data/test/raw-records/test-map-array.rb +39 -10
  40. data/test/raw-records/test-sparse-union-array.rb +86 -41
  41. data/test/raw-records/test-struct-array.rb +22 -8
  42. data/test/test-array.rb +7 -0
  43. data/test/test-chunked-array.rb +9 -0
  44. data/test/test-data-type.rb +2 -1
  45. data/test/test-dense-union-array.rb +42 -0
  46. data/test/test-dense-union-data-type.rb +1 -1
  47. data/test/test-function.rb +7 -7
  48. data/test/test-group.rb +58 -58
  49. data/test/test-half-float-array.rb +43 -0
  50. data/test/test-half-float.rb +130 -0
  51. data/test/test-record-batch-file-reader.rb +21 -0
  52. data/test/test-record-batch-stream-reader.rb +129 -0
  53. data/test/test-scalar.rb +65 -0
  54. data/test/test-slicer.rb +194 -129
  55. data/test/test-sparse-union-array.rb +38 -0
  56. data/test/test-table.rb +324 -40
  57. data/test/values/test-basic-arrays.rb +10 -0
  58. data/test/values/test-dense-union-array.rb +88 -45
  59. data/test/values/test-list-array.rb +26 -10
  60. data/test/values/test-map-array.rb +33 -10
  61. data/test/values/test-sparse-union-array.rb +84 -41
  62. data/test/values/test-struct-array.rb +20 -8
  63. metadata +30 -9
data/lib/arrow/loader.rb CHANGED
@@ -39,6 +39,7 @@ module Arrow
39
39
  require "arrow/field-containable"
40
40
  require "arrow/generic-filterable"
41
41
  require "arrow/generic-takeable"
42
+ require "arrow/input-referable"
42
43
  require "arrow/record-containable"
43
44
  require "arrow/symbol-values-appendable"
44
45
 
@@ -69,6 +70,8 @@ module Arrow
69
70
  require "arrow/decimal256-array"
70
71
  require "arrow/decimal256-array-builder"
71
72
  require "arrow/decimal256-data-type"
73
+ require "arrow/dense-union-array"
74
+ require "arrow/dense-union-array-builder"
72
75
  require "arrow/dense-union-data-type"
73
76
  require "arrow/dictionary-array"
74
77
  require "arrow/dictionary-data-type"
@@ -81,6 +84,9 @@ module Arrow
81
84
  require "arrow/fixed-size-binary-array-builder"
82
85
  require "arrow/function"
83
86
  require "arrow/group"
87
+ require "arrow/half-float"
88
+ require "arrow/half-float-array"
89
+ require "arrow/half-float-array-builder"
84
90
  require "arrow/list-array-builder"
85
91
  require "arrow/list-data-type"
86
92
  require "arrow/map-array"
@@ -105,6 +111,8 @@ module Arrow
105
111
  require "arrow/sort-key"
106
112
  require "arrow/sort-options"
107
113
  require "arrow/source-node-options"
114
+ require "arrow/sparse-union-array"
115
+ require "arrow/sparse-union-array-builder"
108
116
  require "arrow/sparse-union-data-type"
109
117
  require "arrow/string-dictionary-array-builder"
110
118
  require "arrow/string-array-builder"
@@ -130,6 +138,7 @@ module Arrow
130
138
  require "arrow/timestamp-array"
131
139
  require "arrow/timestamp-array-builder"
132
140
  require "arrow/timestamp-data-type"
141
+ require "arrow/union-array-builder"
133
142
  require "arrow/writable"
134
143
  end
135
144
 
@@ -196,6 +205,7 @@ module Arrow
196
205
  "Arrow::Date64Array",
197
206
  "Arrow::Decimal128Array",
198
207
  "Arrow::Decimal256Array",
208
+ "Arrow::HalfFloatArray",
199
209
  "Arrow::Time32Array",
200
210
  "Arrow::Time64Array",
201
211
  "Arrow::TimestampArray"
@@ -35,14 +35,16 @@ module Arrow
35
35
  fields = []
36
36
  @values = []
37
37
  @raw_table.each do |name, array|
38
- if array.respond_to?(:to_arrow_array)
39
- array = array.to_arrow_array
38
+ if array.respond_to?(:to_arrow_chunked_array)
39
+ chunked_array = array.to_arrow_chunked_array
40
+ elsif array.respond_to?(:to_arrow_array)
41
+ chunked_array = ChunkedArray.new([array.to_arrow_array])
40
42
  else
41
43
  array = array.to_ary if array.respond_to?(:to_ary)
42
- array = ArrayBuilder.build(array)
44
+ chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
43
45
  end
44
- fields << Field.new(name.to_s, array.value_data_type)
45
- @values << array
46
+ fields << Field.new(name.to_s, chunked_array.value_data_type)
47
+ @values << chunked_array
46
48
  end
47
49
  @schema = Schema.new(fields)
48
50
  end
@@ -20,6 +20,8 @@ module Arrow
20
20
  include Enumerable
21
21
 
22
22
  def each
23
+ return to_enum(__method__) {n_record_batches} unless block_given?
24
+
23
25
  n_record_batches.times do |i|
24
26
  yield(get_record_batch(i))
25
27
  end
@@ -20,6 +20,8 @@ module Arrow
20
20
  include Enumerable
21
21
 
22
22
  def each
23
+ return to_enum(__method__) unless block_given?
24
+
23
25
  loop do
24
26
  record_batch = next_record_batch
25
27
  break if record_batch.nil?
@@ -19,9 +19,11 @@ require "arrow/raw-table-converter"
19
19
 
20
20
  module Arrow
21
21
  class RecordBatch
22
+ include Enumerable
23
+
22
24
  include ColumnContainable
25
+ include InputReferable
23
26
  include RecordContainable
24
- include Enumerable
25
27
 
26
28
  class << self
27
29
  def new(*args)
@@ -56,7 +58,9 @@ module Arrow
56
58
  #
57
59
  # @since 0.12.0
58
60
  def to_table
59
- Table.new(schema, [self])
61
+ table = Table.new(schema, [self])
62
+ share_input(table)
63
+ table
60
64
  end
61
65
 
62
66
  def respond_to_missing?(name, include_private)
data/lib/arrow/scalar.rb CHANGED
@@ -17,6 +17,73 @@
17
17
 
18
18
  module Arrow
19
19
  class Scalar
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when self
25
+ value
26
+ when true, false
27
+ BooleanScalar.new(value)
28
+ when Symbol, String
29
+ StringScalar.new(value.to_s)
30
+ when Integer
31
+ Int64Scalar.new(value)
32
+ when Float
33
+ DoubleScalar.new(value)
34
+ else
35
+ nil
36
+ end
37
+ end
38
+
39
+ # Ensure returning suitable {Arrow::Scalar}.
40
+ #
41
+ # @overload resolve(scalar)
42
+ #
43
+ # Returns the given scalar itself. This is convenient to
44
+ # use this method as {Arrow::Scalar} converter.
45
+ #
46
+ # @param scalar [Arrow::Scalar] The scalar.
47
+ #
48
+ # @return [Arrow::Scalar] The given scalar itself.
49
+ #
50
+ # @overload resolve(value)
51
+ #
52
+ # Creates a suitable scalar from the given value. For example,
53
+ # you can create {Arrow::BooleanScalar} from `true`.
54
+ #
55
+ # @param value [Object] The value.
56
+ #
57
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
58
+ #
59
+ # @overload resolve(value, data_type)
60
+ #
61
+ # Creates a scalar of `data_type.scalar_class` from the given
62
+ # value. For example, you can create {Arrow::Int32Scalar} from
63
+ # `29` and {Arrow::Int32DataType}.
64
+ #
65
+ # @param value [Object] The value.
66
+ #
67
+ # @param data_type [Arrow::DataType] The {Arrow::DataType} to
68
+ # decide the returned scalar class.
69
+ #
70
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
71
+ #
72
+ # @since 12.0.0
73
+ def resolve(value, data_type=nil)
74
+ return try_convert(value) if data_type.nil?
75
+
76
+ data_type = DataType.resolve(data_type)
77
+ scalar_class = data_type.scalar_class
78
+ case value
79
+ when Scalar
80
+ return value if value.class == scalar_class
81
+ value = value.value
82
+ end
83
+ scalar_class.new(value)
84
+ end
85
+ end
86
+
20
87
  # @param other [Arrow::Scalar] The scalar to be compared.
21
88
  # @param options [Arrow::EqualOptions, Hash] (nil)
22
89
  # The options to custom how to compare.
data/lib/arrow/slicer.rb CHANGED
@@ -162,6 +162,40 @@ module Arrow
162
162
  def reject(&block)
163
163
  RejectCondition.new(@column, block)
164
164
  end
165
+
166
+ def end_with?(substring, ignore_case: false)
167
+ MatchSubstringFamilyCondition.new("ends_with",
168
+ @column, substring, ignore_case)
169
+ end
170
+
171
+ def match_like?(pattern, ignore_case: false)
172
+ MatchSubstringFamilyCondition.new("match_like",
173
+ @column, pattern, ignore_case)
174
+ end
175
+
176
+ def match_substring?(pattern, ignore_case: nil)
177
+ case pattern
178
+ when String
179
+ ignore_case = false if ignore_case.nil?
180
+ MatchSubstringFamilyCondition.new("match_substring",
181
+ @column, pattern, ignore_case)
182
+ when Regexp
183
+ ignore_case = pattern.casefold? if ignore_case.nil?
184
+ MatchSubstringFamilyCondition.new("match_substring_regex",
185
+ @column,
186
+ pattern.source,
187
+ ignore_case)
188
+ else
189
+ message =
190
+ "pattern must be either String or Regexp: #{pattern.inspect}"
191
+ raise ArgumentError, message
192
+ end
193
+ end
194
+
195
+ def start_with?(substring, ignore_case: false)
196
+ MatchSubstringFamilyCondition.new("starts_with",
197
+ @column, substring, ignore_case)
198
+ end
165
199
  end
166
200
 
167
201
  class NotColumnCondition < Condition
@@ -351,5 +385,32 @@ module Arrow
351
385
  BooleanArray.new(raw_array)
352
386
  end
353
387
  end
388
+
389
+ class MatchSubstringFamilyCondition < Condition
390
+ def initialize(function, column, pattern, ignore_case, invert: false)
391
+ @function = function
392
+ @column = column
393
+ @options = MatchSubstringOptions.new
394
+ @options.pattern = pattern
395
+ @options.ignore_case = ignore_case
396
+ @invert = invert
397
+ end
398
+
399
+ def !@
400
+ MatchSubstringFamilyCondition.new(@function,
401
+ @column,
402
+ @options.pattern,
403
+ @options.ignore_case?,
404
+ invert: !@invert)
405
+ end
406
+
407
+ def evaluate
408
+ datum = Function.find(@function).execute([@column.data], @options)
409
+ if @invert
410
+ datum = Function.find("invert").execute([datum])
411
+ end
412
+ datum.value
413
+ end
414
+ end
354
415
  end
355
416
  end
@@ -0,0 +1,56 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SparseUnionArrayBuilder
20
+ alias_method :append_value_raw, :append_value
21
+
22
+ # @overload append_value
23
+ #
24
+ # Starts appending an union record. You need to append values of
25
+ # fields.
26
+ #
27
+ # @overload append_value(value)
28
+ #
29
+ # Appends an union record including values of fields.
30
+ #
31
+ # @param value [nil, Hash] The union record value.
32
+ #
33
+ # If this is `nil`, the union record is null.
34
+ #
35
+ # If this is `Hash`, it's values of fields.
36
+ #
37
+ # @since 12.0.0
38
+ def append_value(value)
39
+ if value.nil?
40
+ append_null
41
+ else
42
+ key = value.keys[0]
43
+ child_info = child_infos[key]
44
+ append_value_raw(child_info[:id])
45
+ child_infos.each do |child_key, child_info|
46
+ builder = child_info[:builder]
47
+ if child_key == key
48
+ builder.append(value.values[0])
49
+ else
50
+ builder.append_null
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SparseUnionArray
20
+ def get_value(i)
21
+ child_id = get_child_id(i)
22
+ field = get_field(child_id)
23
+ field[i]
24
+ end
25
+ end
26
+ end
@@ -110,11 +110,6 @@ module Arrow
110
110
  end
111
111
  end
112
112
 
113
- alias_method :append_null_raw, :append_null
114
- def append_null
115
- append_null_raw
116
- end
117
-
118
113
  # @since 0.12.0
119
114
  def append(*values)
120
115
  if values.empty?
@@ -161,7 +161,7 @@ module Arrow
161
161
  record_batches << record_batch
162
162
  end
163
163
  table = Table.new(schema, record_batches)
164
- table.instance_variable_set(:@input, input)
164
+ table.refer_input(input)
165
165
  table
166
166
  end
167
167
 
@@ -211,7 +211,7 @@ module Arrow
211
211
  field_indexes = @options[:field_indexes]
212
212
  reader.set_field_indexes(field_indexes) if field_indexes
213
213
  table = reader.read_stripes
214
- table.instance_variable_set(:@input, input)
214
+ table.refer_input(input)
215
215
  table
216
216
  end
217
217
  end
@@ -245,7 +245,7 @@ module Arrow
245
245
  open_input_stream do |input|
246
246
  reader = FeatherFileReader.new(input)
247
247
  table = reader.read
248
- table.instance_variable_set(:@input, input)
248
+ table.refer_input(input)
249
249
  table
250
250
  end
251
251
  end
@@ -254,7 +254,7 @@ module Arrow
254
254
  open_input_stream do |input|
255
255
  reader = JSONReader.new(input)
256
256
  table = reader.read
257
- table.instance_variable_set(:@input, input)
257
+ table.refer_input(input)
258
258
  table
259
259
  end
260
260
  end
@@ -51,6 +51,7 @@ module Arrow
51
51
  raise ArgumentError, message
52
52
  end
53
53
  __send__(custom_save_method)
54
+ @table
54
55
  end
55
56
 
56
57
  private