red-arrow 0.17.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +75 -32
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +3 -1
  5. data/ext/arrow/values.cpp +3 -1
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/buffer.rb +28 -0
  10. data/lib/arrow/data-type.rb +14 -5
  11. data/lib/arrow/decimal128-array-builder.rb +21 -25
  12. data/lib/arrow/decimal128-data-type.rb +2 -0
  13. data/lib/arrow/decimal128.rb +18 -0
  14. data/lib/arrow/decimal256-array-builder.rb +61 -0
  15. data/lib/arrow/decimal256-array.rb +25 -0
  16. data/lib/arrow/decimal256-data-type.rb +73 -0
  17. data/lib/arrow/decimal256.rb +60 -0
  18. data/lib/arrow/dense-union-data-type.rb +2 -2
  19. data/lib/arrow/dictionary-array.rb +24 -0
  20. data/lib/arrow/dictionary-data-type.rb +2 -2
  21. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  22. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  23. data/lib/arrow/loader.rb +18 -0
  24. data/lib/arrow/raw-table-converter.rb +47 -0
  25. data/lib/arrow/record-batch-iterator.rb +22 -0
  26. data/lib/arrow/record-batch.rb +9 -1
  27. data/lib/arrow/sort-key.rb +193 -0
  28. data/lib/arrow/sort-options.rb +109 -0
  29. data/lib/arrow/sparse-union-data-type.rb +2 -2
  30. data/lib/arrow/struct-array-builder.rb +13 -7
  31. data/lib/arrow/table-saver.rb +6 -6
  32. data/lib/arrow/table.rb +5 -24
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +1 -0
  38. data/test/raw-records/test-basic-arrays.rb +17 -0
  39. data/test/raw-records/test-dense-union-array.rb +15 -34
  40. data/test/raw-records/test-list-array.rb +20 -0
  41. data/test/raw-records/test-sparse-union-array.rb +15 -33
  42. data/test/raw-records/test-struct-array.rb +15 -0
  43. data/test/test-array.rb +122 -2
  44. data/test/test-bigdecimal.rb +20 -3
  45. data/test/test-buffer.rb +11 -0
  46. data/test/test-decimal128-array-builder.rb +18 -1
  47. data/test/test-decimal128-data-type.rb +2 -2
  48. data/test/test-decimal128.rb +38 -0
  49. data/test/test-decimal256-array-builder.rb +112 -0
  50. data/test/test-decimal256-array.rb +38 -0
  51. data/test/test-decimal256-data-type.rb +31 -0
  52. data/test/test-decimal256.rb +102 -0
  53. data/test/test-dense-union-data-type.rb +2 -2
  54. data/test/test-dictionary-array.rb +41 -0
  55. data/test/test-feather.rb +1 -1
  56. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  57. data/test/test-fixed-size-binary-array.rb +36 -0
  58. data/test/test-orc.rb +19 -23
  59. data/test/test-record-batch-iterator.rb +37 -0
  60. data/test/test-record-batch.rb +14 -0
  61. data/test/test-sort-indices.rb +40 -0
  62. data/test/test-sort-key.rb +81 -0
  63. data/test/test-sort-options.rb +58 -0
  64. data/test/test-sparse-union-data-type.rb +2 -2
  65. data/test/test-struct-array-builder.rb +16 -12
  66. data/test/test-struct-array.rb +2 -2
  67. data/test/values/test-basic-arrays.rb +11 -0
  68. data/test/values/test-dense-union-array.rb +15 -34
  69. data/test/values/test-list-array.rb +18 -0
  70. data/test/values/test-sparse-union-array.rb +15 -33
  71. data/test/values/test-struct-array.rb +15 -0
  72. metadata +107 -59
@@ -33,7 +33,7 @@ module Arrow
33
33
  # @param type_codes [::Array<Integer>] The IDs that indicates
34
34
  # corresponding fields.
35
35
  #
36
- # @example Create a sparse union data type for {2: visible, 9: count}
36
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
37
37
  # fields = [
38
38
  # Arrow::Field.new("visible", :boolean),
39
39
  # {
@@ -57,7 +57,7 @@ module Arrow
57
57
  # @option description [::Array<Integer>] :type_codes The IDs
58
58
  # that indicates corresponding fields.
59
59
  #
60
- # @example Create a sparse union data type for {2: visible, 9: count}
60
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
61
61
  # fields = [
62
62
  # Arrow::Field.new("visible", :boolean),
63
63
  # {
@@ -32,7 +32,7 @@ module Arrow
32
32
  case index_or_name
33
33
  when String, Symbol
34
34
  name = index_or_name
35
- (@name_to_builder ||= build_name_to_builder)[name.to_s]
35
+ cached_name_to_builder[name.to_s]
36
36
  else
37
37
  index = index_or_name
38
38
  cached_field_builders[index]
@@ -70,13 +70,18 @@ module Arrow
70
70
  append_null
71
71
  when ::Array
72
72
  append_value_raw
73
- value.each_with_index do |sub_value, i|
74
- self[i].append(sub_value)
73
+ cached_field_builders.zip(value) do |builder, sub_value|
74
+ builder.append(sub_value)
75
75
  end
76
76
  when Hash
77
77
  append_value_raw
78
+ local_name_to_builder = cached_name_to_builder.dup
78
79
  value.each do |name, sub_value|
79
- self[name].append(sub_value)
80
+ builder = local_name_to_builder.delete(name.to_s)
81
+ builder.append(sub_value)
82
+ end
83
+ local_name_to_builder.each do |_, builder|
84
+ builder.append_null
80
85
  end
81
86
  else
82
87
  message =
@@ -108,9 +113,6 @@ module Arrow
108
113
  alias_method :append_null_raw, :append_null
109
114
  def append_null
110
115
  append_null_raw
111
- cached_field_builders.each do |builder|
112
- builder.append_null
113
- end
114
116
  end
115
117
 
116
118
  # @since 0.12.0
@@ -136,5 +138,9 @@ module Arrow
136
138
  end
137
139
  name_to_builder
138
140
  end
141
+
142
+ def cached_name_to_builder
143
+ @name_to_builder ||= build_name_to_builder
144
+ end
139
145
  end
140
146
  end
@@ -155,13 +155,13 @@ module Arrow
155
155
  end
156
156
 
157
157
  def save_as_feather
158
+ properties = FeatherWriteProperties.new
159
+ properties.class.properties.each do |name|
160
+ value = @options[name.to_sym]
161
+ next if value.nil?
162
+ properties.__send__("#{name}=", value)
163
+ end
158
164
  open_raw_output_stream do |output|
159
- properties = FeatherWriteProperties.new
160
- properties.class.properties.each do |name|
161
- value = @options[name.to_sym]
162
- next if value.nil?
163
- properties.__send__("#{name}=", value)
164
- end
165
165
  @table.write_as_feather(output, properties)
166
166
  end
167
167
  end
data/lib/arrow/table.rb CHANGED
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "arrow/raw-table-converter"
19
+
18
20
  module Arrow
19
21
  class Table
20
22
  include ColumnContainable
@@ -81,14 +83,6 @@ module Arrow
81
83
  # `Array`.
82
84
  #
83
85
  # @example Create a table from column name and values
84
- # count_chunks = [
85
- # Arrow::UInt32Array.new([0, 2]),
86
- # Arrow::UInt32Array.new([nil, 4]),
87
- # ]
88
- # visible_chunks = [
89
- # Arrow::BooleanArray.new([true]),
90
- # Arrow::BooleanArray.new([nil, nil, false]),
91
- # ]
92
86
  # Arrow::Table.new("count" => [0, 2, nil, 4],
93
87
  # "visible" => [true, nil, nil, false])
94
88
  #
@@ -169,22 +163,9 @@ module Arrow
169
163
  n_args = args.size
170
164
  case n_args
171
165
  when 1
172
- if args[0][0].is_a?(Column)
173
- columns = args[0]
174
- fields = columns.collect(&:field)
175
- values = columns.collect(&:data)
176
- schema = Schema.new(fields)
177
- else
178
- raw_table = args[0]
179
- fields = []
180
- values = []
181
- raw_table.each do |name, array|
182
- array = ArrayBuilder.build(array) if array.is_a?(::Array)
183
- fields << Field.new(name.to_s, array.value_data_type)
184
- values << array
185
- end
186
- schema = Schema.new(fields)
187
- end
166
+ raw_table_converter = RawTableConverter.new(args[0])
167
+ schema = raw_table_converter.schema
168
+ values = raw_table_converter.values
188
169
  when 2
189
170
  schema = args[0]
190
171
  schema = Schema.new(schema) unless schema.is_a?(Schema)
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be second or millisecond.
31
31
  #
32
- # @example Create a time32 data type with {Arrow::TimeUnit}
32
+ # @example Create a time32 data type with Arrow::TimeUnit
33
33
  # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
34
34
  #
35
35
  # @example Create a time32 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be second or millisecond.
47
47
  #
48
- # @example Create a time32 data type with {Arrow::TimeUnit}
48
+ # @example Create a time32 data type with Arrow::TimeUnit
49
49
  # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
50
50
  #
51
51
  # @example Create a time32 data type with Symbol
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be microsecond or nanosecond.
31
31
  #
32
- # @example Create a time64 data type with {Arrow::TimeUnit}
32
+ # @example Create a time64 data type with Arrow::TimeUnit
33
33
  # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
34
34
  #
35
35
  # @example Create a time64 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be microsecond or nanosecond.
47
47
  #
48
- # @example Create a time64 data type with {Arrow::TimeUnit}
48
+ # @example Create a time64 data type with Arrow::TimeUnit
49
49
  # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
50
50
  #
51
51
  # @example Create a time64 data type with Symbol
@@ -27,7 +27,7 @@ module Arrow
27
27
  # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
28
  # timestamp data type.
29
29
  #
30
- # @example Create a timestamp data type with {Arrow::TimeUnit}
30
+ # @example Create a timestamp data type with Arrow::TimeUnit
31
31
  # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
32
32
  #
33
33
  # @example Create a timestamp data type with Symbol
@@ -41,7 +41,7 @@ module Arrow
41
41
  # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
42
42
  # the timestamp data type.
43
43
  #
44
- # @example Create a timestamp data type with {Arrow::TimeUnit}
44
+ # @example Create a timestamp data type with Arrow::TimeUnit
45
45
  # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
46
46
  #
47
47
  # @example Create a timestamp data type with Symbol
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.17.1"
19
+ VERSION = "4.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -46,6 +46,7 @@ Gem::Specification.new do |spec|
46
46
  spec.test_files += Dir.glob("test/**/*")
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
+ spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
49
50
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
51
  spec.add_runtime_dependency("gio2", ">= 3.3.6")
51
52
  spec.add_runtime_dependency("native-package-installer")
@@ -329,6 +329,23 @@ module RawRecordsBasicArraysTests
329
329
  records)
330
330
  assert_equal(records, target.raw_records)
331
331
  end
332
+
333
+ def test_decimal256
334
+ records = [
335
+ [BigDecimal("92.92")],
336
+ [nil],
337
+ [BigDecimal("29.29")],
338
+ ]
339
+ target = build({
340
+ column: {
341
+ type: :decimal256,
342
+ precision: 38,
343
+ scale: 2,
344
+ }
345
+ },
346
+ records)
347
+ assert_equal(records, target.raw_records)
348
+ end
332
349
  end
333
350
 
334
351
  class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
@@ -56,10 +56,7 @@ module RawRecordsDenseUnionArrayTests
56
56
  end
57
57
  records.each do |record|
58
58
  column = record[0]
59
- if column.nil?
60
- type_ids << nil
61
- offsets << 0
62
- elsif column.key?("0")
59
+ if column.key?("0")
63
60
  type_id = type_codes[0]
64
61
  type_ids << type_id
65
62
  offsets << (type_ids.count(type_id) - 1)
@@ -82,7 +79,6 @@ module RawRecordsDenseUnionArrayTests
82
79
  def test_null
83
80
  records = [
84
81
  [{"0" => nil}],
85
- [nil],
86
82
  ]
87
83
  target = build(:null, records)
88
84
  assert_equal(records, target.raw_records)
@@ -91,7 +87,6 @@ module RawRecordsDenseUnionArrayTests
91
87
  def test_boolean
92
88
  records = [
93
89
  [{"0" => true}],
94
- [nil],
95
90
  [{"1" => nil}],
96
91
  ]
97
92
  target = build(:boolean, records)
@@ -101,7 +96,6 @@ module RawRecordsDenseUnionArrayTests
101
96
  def test_int8
102
97
  records = [
103
98
  [{"0" => -(2 ** 7)}],
104
- [nil],
105
99
  [{"1" => nil}],
106
100
  ]
107
101
  target = build(:int8, records)
@@ -111,7 +105,6 @@ module RawRecordsDenseUnionArrayTests
111
105
  def test_uint8
112
106
  records = [
113
107
  [{"0" => (2 ** 8) - 1}],
114
- [nil],
115
108
  [{"1" => nil}],
116
109
  ]
117
110
  target = build(:uint8, records)
@@ -121,7 +114,6 @@ module RawRecordsDenseUnionArrayTests
121
114
  def test_int16
122
115
  records = [
123
116
  [{"0" => -(2 ** 15)}],
124
- [nil],
125
117
  [{"1" => nil}],
126
118
  ]
127
119
  target = build(:int16, records)
@@ -131,7 +123,6 @@ module RawRecordsDenseUnionArrayTests
131
123
  def test_uint16
132
124
  records = [
133
125
  [{"0" => (2 ** 16) - 1}],
134
- [nil],
135
126
  [{"1" => nil}],
136
127
  ]
137
128
  target = build(:uint16, records)
@@ -141,7 +132,6 @@ module RawRecordsDenseUnionArrayTests
141
132
  def test_int32
142
133
  records = [
143
134
  [{"0" => -(2 ** 31)}],
144
- [nil],
145
135
  [{"1" => nil}],
146
136
  ]
147
137
  target = build(:int32, records)
@@ -151,7 +141,6 @@ module RawRecordsDenseUnionArrayTests
151
141
  def test_uint32
152
142
  records = [
153
143
  [{"0" => (2 ** 32) - 1}],
154
- [nil],
155
144
  [{"1" => nil}],
156
145
  ]
157
146
  target = build(:uint32, records)
@@ -161,7 +150,6 @@ module RawRecordsDenseUnionArrayTests
161
150
  def test_int64
162
151
  records = [
163
152
  [{"0" => -(2 ** 63)}],
164
- [nil],
165
153
  [{"1" => nil}],
166
154
  ]
167
155
  target = build(:int64, records)
@@ -171,7 +159,6 @@ module RawRecordsDenseUnionArrayTests
171
159
  def test_uint64
172
160
  records = [
173
161
  [{"0" => (2 ** 64) - 1}],
174
- [nil],
175
162
  [{"1" => nil}],
176
163
  ]
177
164
  target = build(:uint64, records)
@@ -181,7 +168,6 @@ module RawRecordsDenseUnionArrayTests
181
168
  def test_float
182
169
  records = [
183
170
  [{"0" => -1.0}],
184
- [nil],
185
171
  [{"1" => nil}],
186
172
  ]
187
173
  target = build(:float, records)
@@ -191,7 +177,6 @@ module RawRecordsDenseUnionArrayTests
191
177
  def test_double
192
178
  records = [
193
179
  [{"0" => -1.0}],
194
- [nil],
195
180
  [{"1" => nil}],
196
181
  ]
197
182
  target = build(:double, records)
@@ -201,7 +186,6 @@ module RawRecordsDenseUnionArrayTests
201
186
  def test_binary
202
187
  records = [
203
188
  [{"0" => "\xff".b}],
204
- [nil],
205
189
  [{"1" => nil}],
206
190
  ]
207
191
  target = build(:binary, records)
@@ -211,7 +195,6 @@ module RawRecordsDenseUnionArrayTests
211
195
  def test_string
212
196
  records = [
213
197
  [{"0" => "Ruby"}],
214
- [nil],
215
198
  [{"1" => nil}],
216
199
  ]
217
200
  target = build(:string, records)
@@ -221,7 +204,6 @@ module RawRecordsDenseUnionArrayTests
221
204
  def test_date32
222
205
  records = [
223
206
  [{"0" => Date.new(1960, 1, 1)}],
224
- [nil],
225
207
  [{"1" => nil}],
226
208
  ]
227
209
  target = build(:date32, records)
@@ -231,7 +213,6 @@ module RawRecordsDenseUnionArrayTests
231
213
  def test_date64
232
214
  records = [
233
215
  [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
234
- [nil],
235
216
  [{"1" => nil}],
236
217
  ]
237
218
  target = build(:date64, records)
@@ -241,7 +222,6 @@ module RawRecordsDenseUnionArrayTests
241
222
  def test_timestamp_second
242
223
  records = [
243
224
  [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
244
- [nil],
245
225
  [{"1" => nil}],
246
226
  ]
247
227
  target = build({
@@ -255,7 +235,6 @@ module RawRecordsDenseUnionArrayTests
255
235
  def test_timestamp_milli
256
236
  records = [
257
237
  [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
258
- [nil],
259
238
  [{"1" => nil}],
260
239
  ]
261
240
  target = build({
@@ -269,7 +248,6 @@ module RawRecordsDenseUnionArrayTests
269
248
  def test_timestamp_micro
270
249
  records = [
271
250
  [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
272
- [nil],
273
251
  [{"1" => nil}],
274
252
  ]
275
253
  target = build({
@@ -283,7 +261,6 @@ module RawRecordsDenseUnionArrayTests
283
261
  def test_timestamp_nano
284
262
  records = [
285
263
  [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
286
- [nil],
287
264
  [{"1" => nil}],
288
265
  ]
289
266
  target = build({
@@ -299,7 +276,6 @@ module RawRecordsDenseUnionArrayTests
299
276
  records = [
300
277
  # 00:10:00
301
278
  [{"0" => Arrow::Time.new(unit, 60 * 10)}],
302
- [nil],
303
279
  [{"1" => nil}],
304
280
  ]
305
281
  target = build({
@@ -315,7 +291,6 @@ module RawRecordsDenseUnionArrayTests
315
291
  records = [
316
292
  # 00:10:00.123
317
293
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
318
- [nil],
319
294
  [{"1" => nil}],
320
295
  ]
321
296
  target = build({
@@ -331,7 +306,6 @@ module RawRecordsDenseUnionArrayTests
331
306
  records = [
332
307
  # 00:10:00.123456
333
308
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
334
- [nil],
335
309
  [{"1" => nil}],
336
310
  ]
337
311
  target = build({
@@ -347,7 +321,6 @@ module RawRecordsDenseUnionArrayTests
347
321
  records = [
348
322
  # 00:10:00.123456789
349
323
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
350
- [nil],
351
324
  [{"1" => nil}],
352
325
  ]
353
326
  target = build({
@@ -361,7 +334,6 @@ module RawRecordsDenseUnionArrayTests
361
334
  def test_decimal128
362
335
  records = [
363
336
  [{"0" => BigDecimal("92.92")}],
364
- [nil],
365
337
  [{"1" => nil}],
366
338
  ]
367
339
  target = build({
@@ -373,10 +345,23 @@ module RawRecordsDenseUnionArrayTests
373
345
  assert_equal(records, target.raw_records)
374
346
  end
375
347
 
348
+ def test_decimal256
349
+ records = [
350
+ [{"0" => BigDecimal("92.92")}],
351
+ [{"1" => nil}],
352
+ ]
353
+ target = build({
354
+ type: :decimal256,
355
+ precision: 38,
356
+ scale: 2,
357
+ },
358
+ records)
359
+ assert_equal(records, target.raw_records)
360
+ end
361
+
376
362
  def test_list
377
363
  records = [
378
364
  [{"0" => [true, nil, false]}],
379
- [nil],
380
365
  [{"1" => nil}],
381
366
  ]
382
367
  target = build({
@@ -393,7 +378,6 @@ module RawRecordsDenseUnionArrayTests
393
378
  def test_struct
394
379
  records = [
395
380
  [{"0" => {"sub_field" => true}}],
396
- [nil],
397
381
  [{"1" => nil}],
398
382
  [{"0" => {"sub_field" => nil}}],
399
383
  ]
@@ -414,7 +398,6 @@ module RawRecordsDenseUnionArrayTests
414
398
  omit("Need to add support for SparseUnionArrayBuilder")
415
399
  records = [
416
400
  [{"0" => {"field1" => true}}],
417
- [nil],
418
401
  [{"1" => nil}],
419
402
  [{"0" => {"field2" => nil}}],
420
403
  ]
@@ -440,7 +423,6 @@ module RawRecordsDenseUnionArrayTests
440
423
  omit("Need to add support for DenseUnionArrayBuilder")
441
424
  records = [
442
425
  [{"0" => {"field1" => true}}],
443
- [nil],
444
426
  [{"1" => nil}],
445
427
  [{"0" => {"field2" => nil}}],
446
428
  ]
@@ -466,7 +448,6 @@ module RawRecordsDenseUnionArrayTests
466
448
  omit("Need to add support for DictionaryArrayBuilder")
467
449
  records = [
468
450
  [{"0" => "Ruby"}],
469
- [nil],
470
451
  [{"1" => nil}],
471
452
  [{"0" => "GLib"}],
472
453
  ]