red-arrow 0.17.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +75 -32
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +3 -1
  5. data/ext/arrow/values.cpp +3 -1
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/buffer.rb +28 -0
  10. data/lib/arrow/data-type.rb +14 -5
  11. data/lib/arrow/decimal128-array-builder.rb +21 -25
  12. data/lib/arrow/decimal128-data-type.rb +2 -0
  13. data/lib/arrow/decimal128.rb +18 -0
  14. data/lib/arrow/decimal256-array-builder.rb +61 -0
  15. data/lib/arrow/decimal256-array.rb +25 -0
  16. data/lib/arrow/decimal256-data-type.rb +73 -0
  17. data/lib/arrow/decimal256.rb +60 -0
  18. data/lib/arrow/dense-union-data-type.rb +2 -2
  19. data/lib/arrow/dictionary-array.rb +24 -0
  20. data/lib/arrow/dictionary-data-type.rb +2 -2
  21. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  22. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  23. data/lib/arrow/loader.rb +18 -0
  24. data/lib/arrow/raw-table-converter.rb +47 -0
  25. data/lib/arrow/record-batch-iterator.rb +22 -0
  26. data/lib/arrow/record-batch.rb +9 -1
  27. data/lib/arrow/sort-key.rb +193 -0
  28. data/lib/arrow/sort-options.rb +109 -0
  29. data/lib/arrow/sparse-union-data-type.rb +2 -2
  30. data/lib/arrow/struct-array-builder.rb +13 -7
  31. data/lib/arrow/table-saver.rb +6 -6
  32. data/lib/arrow/table.rb +5 -24
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +1 -0
  38. data/test/raw-records/test-basic-arrays.rb +17 -0
  39. data/test/raw-records/test-dense-union-array.rb +15 -34
  40. data/test/raw-records/test-list-array.rb +20 -0
  41. data/test/raw-records/test-sparse-union-array.rb +15 -33
  42. data/test/raw-records/test-struct-array.rb +15 -0
  43. data/test/test-array.rb +122 -2
  44. data/test/test-bigdecimal.rb +20 -3
  45. data/test/test-buffer.rb +11 -0
  46. data/test/test-decimal128-array-builder.rb +18 -1
  47. data/test/test-decimal128-data-type.rb +2 -2
  48. data/test/test-decimal128.rb +38 -0
  49. data/test/test-decimal256-array-builder.rb +112 -0
  50. data/test/test-decimal256-array.rb +38 -0
  51. data/test/test-decimal256-data-type.rb +31 -0
  52. data/test/test-decimal256.rb +102 -0
  53. data/test/test-dense-union-data-type.rb +2 -2
  54. data/test/test-dictionary-array.rb +41 -0
  55. data/test/test-feather.rb +1 -1
  56. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  57. data/test/test-fixed-size-binary-array.rb +36 -0
  58. data/test/test-orc.rb +19 -23
  59. data/test/test-record-batch-iterator.rb +37 -0
  60. data/test/test-record-batch.rb +14 -0
  61. data/test/test-sort-indices.rb +40 -0
  62. data/test/test-sort-key.rb +81 -0
  63. data/test/test-sort-options.rb +58 -0
  64. data/test/test-sparse-union-data-type.rb +2 -2
  65. data/test/test-struct-array-builder.rb +16 -12
  66. data/test/test-struct-array.rb +2 -2
  67. data/test/values/test-basic-arrays.rb +11 -0
  68. data/test/values/test-dense-union-array.rb +15 -34
  69. data/test/values/test-list-array.rb +18 -0
  70. data/test/values/test-sparse-union-array.rb +15 -33
  71. data/test/values/test-struct-array.rb +15 -0
  72. metadata +107 -59
@@ -33,7 +33,7 @@ module Arrow
33
33
  # @param type_codes [::Array<Integer>] The IDs that indicates
34
34
  # corresponding fields.
35
35
  #
36
- # @example Create a sparse union data type for {2: visible, 9: count}
36
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
37
37
  # fields = [
38
38
  # Arrow::Field.new("visible", :boolean),
39
39
  # {
@@ -57,7 +57,7 @@ module Arrow
57
57
  # @option description [::Array<Integer>] :type_codes The IDs
58
58
  # that indicates corresponding fields.
59
59
  #
60
- # @example Create a sparse union data type for {2: visible, 9: count}
60
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
61
61
  # fields = [
62
62
  # Arrow::Field.new("visible", :boolean),
63
63
  # {
@@ -32,7 +32,7 @@ module Arrow
32
32
  case index_or_name
33
33
  when String, Symbol
34
34
  name = index_or_name
35
- (@name_to_builder ||= build_name_to_builder)[name.to_s]
35
+ cached_name_to_builder[name.to_s]
36
36
  else
37
37
  index = index_or_name
38
38
  cached_field_builders[index]
@@ -70,13 +70,18 @@ module Arrow
70
70
  append_null
71
71
  when ::Array
72
72
  append_value_raw
73
- value.each_with_index do |sub_value, i|
74
- self[i].append(sub_value)
73
+ cached_field_builders.zip(value) do |builder, sub_value|
74
+ builder.append(sub_value)
75
75
  end
76
76
  when Hash
77
77
  append_value_raw
78
+ local_name_to_builder = cached_name_to_builder.dup
78
79
  value.each do |name, sub_value|
79
- self[name].append(sub_value)
80
+ builder = local_name_to_builder.delete(name.to_s)
81
+ builder.append(sub_value)
82
+ end
83
+ local_name_to_builder.each do |_, builder|
84
+ builder.append_null
80
85
  end
81
86
  else
82
87
  message =
@@ -108,9 +113,6 @@ module Arrow
108
113
  alias_method :append_null_raw, :append_null
109
114
  def append_null
110
115
  append_null_raw
111
- cached_field_builders.each do |builder|
112
- builder.append_null
113
- end
114
116
  end
115
117
 
116
118
  # @since 0.12.0
@@ -136,5 +138,9 @@ module Arrow
136
138
  end
137
139
  name_to_builder
138
140
  end
141
+
142
+ def cached_name_to_builder
143
+ @name_to_builder ||= build_name_to_builder
144
+ end
139
145
  end
140
146
  end
@@ -155,13 +155,13 @@ module Arrow
155
155
  end
156
156
 
157
157
  def save_as_feather
158
+ properties = FeatherWriteProperties.new
159
+ properties.class.properties.each do |name|
160
+ value = @options[name.to_sym]
161
+ next if value.nil?
162
+ properties.__send__("#{name}=", value)
163
+ end
158
164
  open_raw_output_stream do |output|
159
- properties = FeatherWriteProperties.new
160
- properties.class.properties.each do |name|
161
- value = @options[name.to_sym]
162
- next if value.nil?
163
- properties.__send__("#{name}=", value)
164
- end
165
165
  @table.write_as_feather(output, properties)
166
166
  end
167
167
  end
data/lib/arrow/table.rb CHANGED
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "arrow/raw-table-converter"
19
+
18
20
  module Arrow
19
21
  class Table
20
22
  include ColumnContainable
@@ -81,14 +83,6 @@ module Arrow
81
83
  # `Array`.
82
84
  #
83
85
  # @example Create a table from column name and values
84
- # count_chunks = [
85
- # Arrow::UInt32Array.new([0, 2]),
86
- # Arrow::UInt32Array.new([nil, 4]),
87
- # ]
88
- # visible_chunks = [
89
- # Arrow::BooleanArray.new([true]),
90
- # Arrow::BooleanArray.new([nil, nil, false]),
91
- # ]
92
86
  # Arrow::Table.new("count" => [0, 2, nil, 4],
93
87
  # "visible" => [true, nil, nil, false])
94
88
  #
@@ -169,22 +163,9 @@ module Arrow
169
163
  n_args = args.size
170
164
  case n_args
171
165
  when 1
172
- if args[0][0].is_a?(Column)
173
- columns = args[0]
174
- fields = columns.collect(&:field)
175
- values = columns.collect(&:data)
176
- schema = Schema.new(fields)
177
- else
178
- raw_table = args[0]
179
- fields = []
180
- values = []
181
- raw_table.each do |name, array|
182
- array = ArrayBuilder.build(array) if array.is_a?(::Array)
183
- fields << Field.new(name.to_s, array.value_data_type)
184
- values << array
185
- end
186
- schema = Schema.new(fields)
187
- end
166
+ raw_table_converter = RawTableConverter.new(args[0])
167
+ schema = raw_table_converter.schema
168
+ values = raw_table_converter.values
188
169
  when 2
189
170
  schema = args[0]
190
171
  schema = Schema.new(schema) unless schema.is_a?(Schema)
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be second or millisecond.
31
31
  #
32
- # @example Create a time32 data type with {Arrow::TimeUnit}
32
+ # @example Create a time32 data type with Arrow::TimeUnit
33
33
  # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
34
34
  #
35
35
  # @example Create a time32 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be second or millisecond.
47
47
  #
48
- # @example Create a time32 data type with {Arrow::TimeUnit}
48
+ # @example Create a time32 data type with Arrow::TimeUnit
49
49
  # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
50
50
  #
51
51
  # @example Create a time32 data type with Symbol
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be microsecond or nanosecond.
31
31
  #
32
- # @example Create a time64 data type with {Arrow::TimeUnit}
32
+ # @example Create a time64 data type with Arrow::TimeUnit
33
33
  # Arrow::Time64DataType.new(Arrow::TimeUnit::NANO)
34
34
  #
35
35
  # @example Create a time64 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be microsecond or nanosecond.
47
47
  #
48
- # @example Create a time64 data type with {Arrow::TimeUnit}
48
+ # @example Create a time64 data type with Arrow::TimeUnit
49
49
  # Arrow::Time64DataType.new(unit: Arrow::TimeUnit::NANO)
50
50
  #
51
51
  # @example Create a time64 data type with Symbol
@@ -27,7 +27,7 @@ module Arrow
27
27
  # @param unit [Arrow::TimeUnit, Symbol] The unit of the
28
28
  # timestamp data type.
29
29
  #
30
- # @example Create a timestamp data type with {Arrow::TimeUnit}
30
+ # @example Create a timestamp data type with Arrow::TimeUnit
31
31
  # Arrow::TimestampDataType.new(Arrow::TimeUnit::MILLI)
32
32
  #
33
33
  # @example Create a timestamp data type with Symbol
@@ -41,7 +41,7 @@ module Arrow
41
41
  # @option description [Arrow::TimeUnit, Symbol] :unit The unit of
42
42
  # the timestamp data type.
43
43
  #
44
- # @example Create a timestamp data type with {Arrow::TimeUnit}
44
+ # @example Create a timestamp data type with Arrow::TimeUnit
45
45
  # Arrow::TimestampDataType.new(unit: Arrow::TimeUnit::MILLI)
46
46
  #
47
47
  # @example Create a timestamp data type with Symbol
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.17.1"
19
+ VERSION = "4.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -46,6 +46,7 @@ Gem::Specification.new do |spec|
46
46
  spec.test_files += Dir.glob("test/**/*")
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
+ spec.add_runtime_dependency("bigdecimal", ">= 2.0.3")
49
50
  spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
51
  spec.add_runtime_dependency("gio2", ">= 3.3.6")
51
52
  spec.add_runtime_dependency("native-package-installer")
@@ -329,6 +329,23 @@ module RawRecordsBasicArraysTests
329
329
  records)
330
330
  assert_equal(records, target.raw_records)
331
331
  end
332
+
333
+ def test_decimal256
334
+ records = [
335
+ [BigDecimal("92.92")],
336
+ [nil],
337
+ [BigDecimal("29.29")],
338
+ ]
339
+ target = build({
340
+ column: {
341
+ type: :decimal256,
342
+ precision: 38,
343
+ scale: 2,
344
+ }
345
+ },
346
+ records)
347
+ assert_equal(records, target.raw_records)
348
+ end
332
349
  end
333
350
 
334
351
  class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
@@ -56,10 +56,7 @@ module RawRecordsDenseUnionArrayTests
56
56
  end
57
57
  records.each do |record|
58
58
  column = record[0]
59
- if column.nil?
60
- type_ids << nil
61
- offsets << 0
62
- elsif column.key?("0")
59
+ if column.key?("0")
63
60
  type_id = type_codes[0]
64
61
  type_ids << type_id
65
62
  offsets << (type_ids.count(type_id) - 1)
@@ -82,7 +79,6 @@ module RawRecordsDenseUnionArrayTests
82
79
  def test_null
83
80
  records = [
84
81
  [{"0" => nil}],
85
- [nil],
86
82
  ]
87
83
  target = build(:null, records)
88
84
  assert_equal(records, target.raw_records)
@@ -91,7 +87,6 @@ module RawRecordsDenseUnionArrayTests
91
87
  def test_boolean
92
88
  records = [
93
89
  [{"0" => true}],
94
- [nil],
95
90
  [{"1" => nil}],
96
91
  ]
97
92
  target = build(:boolean, records)
@@ -101,7 +96,6 @@ module RawRecordsDenseUnionArrayTests
101
96
  def test_int8
102
97
  records = [
103
98
  [{"0" => -(2 ** 7)}],
104
- [nil],
105
99
  [{"1" => nil}],
106
100
  ]
107
101
  target = build(:int8, records)
@@ -111,7 +105,6 @@ module RawRecordsDenseUnionArrayTests
111
105
  def test_uint8
112
106
  records = [
113
107
  [{"0" => (2 ** 8) - 1}],
114
- [nil],
115
108
  [{"1" => nil}],
116
109
  ]
117
110
  target = build(:uint8, records)
@@ -121,7 +114,6 @@ module RawRecordsDenseUnionArrayTests
121
114
  def test_int16
122
115
  records = [
123
116
  [{"0" => -(2 ** 15)}],
124
- [nil],
125
117
  [{"1" => nil}],
126
118
  ]
127
119
  target = build(:int16, records)
@@ -131,7 +123,6 @@ module RawRecordsDenseUnionArrayTests
131
123
  def test_uint16
132
124
  records = [
133
125
  [{"0" => (2 ** 16) - 1}],
134
- [nil],
135
126
  [{"1" => nil}],
136
127
  ]
137
128
  target = build(:uint16, records)
@@ -141,7 +132,6 @@ module RawRecordsDenseUnionArrayTests
141
132
  def test_int32
142
133
  records = [
143
134
  [{"0" => -(2 ** 31)}],
144
- [nil],
145
135
  [{"1" => nil}],
146
136
  ]
147
137
  target = build(:int32, records)
@@ -151,7 +141,6 @@ module RawRecordsDenseUnionArrayTests
151
141
  def test_uint32
152
142
  records = [
153
143
  [{"0" => (2 ** 32) - 1}],
154
- [nil],
155
144
  [{"1" => nil}],
156
145
  ]
157
146
  target = build(:uint32, records)
@@ -161,7 +150,6 @@ module RawRecordsDenseUnionArrayTests
161
150
  def test_int64
162
151
  records = [
163
152
  [{"0" => -(2 ** 63)}],
164
- [nil],
165
153
  [{"1" => nil}],
166
154
  ]
167
155
  target = build(:int64, records)
@@ -171,7 +159,6 @@ module RawRecordsDenseUnionArrayTests
171
159
  def test_uint64
172
160
  records = [
173
161
  [{"0" => (2 ** 64) - 1}],
174
- [nil],
175
162
  [{"1" => nil}],
176
163
  ]
177
164
  target = build(:uint64, records)
@@ -181,7 +168,6 @@ module RawRecordsDenseUnionArrayTests
181
168
  def test_float
182
169
  records = [
183
170
  [{"0" => -1.0}],
184
- [nil],
185
171
  [{"1" => nil}],
186
172
  ]
187
173
  target = build(:float, records)
@@ -191,7 +177,6 @@ module RawRecordsDenseUnionArrayTests
191
177
  def test_double
192
178
  records = [
193
179
  [{"0" => -1.0}],
194
- [nil],
195
180
  [{"1" => nil}],
196
181
  ]
197
182
  target = build(:double, records)
@@ -201,7 +186,6 @@ module RawRecordsDenseUnionArrayTests
201
186
  def test_binary
202
187
  records = [
203
188
  [{"0" => "\xff".b}],
204
- [nil],
205
189
  [{"1" => nil}],
206
190
  ]
207
191
  target = build(:binary, records)
@@ -211,7 +195,6 @@ module RawRecordsDenseUnionArrayTests
211
195
  def test_string
212
196
  records = [
213
197
  [{"0" => "Ruby"}],
214
- [nil],
215
198
  [{"1" => nil}],
216
199
  ]
217
200
  target = build(:string, records)
@@ -221,7 +204,6 @@ module RawRecordsDenseUnionArrayTests
221
204
  def test_date32
222
205
  records = [
223
206
  [{"0" => Date.new(1960, 1, 1)}],
224
- [nil],
225
207
  [{"1" => nil}],
226
208
  ]
227
209
  target = build(:date32, records)
@@ -231,7 +213,6 @@ module RawRecordsDenseUnionArrayTests
231
213
  def test_date64
232
214
  records = [
233
215
  [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
234
- [nil],
235
216
  [{"1" => nil}],
236
217
  ]
237
218
  target = build(:date64, records)
@@ -241,7 +222,6 @@ module RawRecordsDenseUnionArrayTests
241
222
  def test_timestamp_second
242
223
  records = [
243
224
  [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
244
- [nil],
245
225
  [{"1" => nil}],
246
226
  ]
247
227
  target = build({
@@ -255,7 +235,6 @@ module RawRecordsDenseUnionArrayTests
255
235
  def test_timestamp_milli
256
236
  records = [
257
237
  [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
258
- [nil],
259
238
  [{"1" => nil}],
260
239
  ]
261
240
  target = build({
@@ -269,7 +248,6 @@ module RawRecordsDenseUnionArrayTests
269
248
  def test_timestamp_micro
270
249
  records = [
271
250
  [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
272
- [nil],
273
251
  [{"1" => nil}],
274
252
  ]
275
253
  target = build({
@@ -283,7 +261,6 @@ module RawRecordsDenseUnionArrayTests
283
261
  def test_timestamp_nano
284
262
  records = [
285
263
  [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
286
- [nil],
287
264
  [{"1" => nil}],
288
265
  ]
289
266
  target = build({
@@ -299,7 +276,6 @@ module RawRecordsDenseUnionArrayTests
299
276
  records = [
300
277
  # 00:10:00
301
278
  [{"0" => Arrow::Time.new(unit, 60 * 10)}],
302
- [nil],
303
279
  [{"1" => nil}],
304
280
  ]
305
281
  target = build({
@@ -315,7 +291,6 @@ module RawRecordsDenseUnionArrayTests
315
291
  records = [
316
292
  # 00:10:00.123
317
293
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
318
- [nil],
319
294
  [{"1" => nil}],
320
295
  ]
321
296
  target = build({
@@ -331,7 +306,6 @@ module RawRecordsDenseUnionArrayTests
331
306
  records = [
332
307
  # 00:10:00.123456
333
308
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
334
- [nil],
335
309
  [{"1" => nil}],
336
310
  ]
337
311
  target = build({
@@ -347,7 +321,6 @@ module RawRecordsDenseUnionArrayTests
347
321
  records = [
348
322
  # 00:10:00.123456789
349
323
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
350
- [nil],
351
324
  [{"1" => nil}],
352
325
  ]
353
326
  target = build({
@@ -361,7 +334,6 @@ module RawRecordsDenseUnionArrayTests
361
334
  def test_decimal128
362
335
  records = [
363
336
  [{"0" => BigDecimal("92.92")}],
364
- [nil],
365
337
  [{"1" => nil}],
366
338
  ]
367
339
  target = build({
@@ -373,10 +345,23 @@ module RawRecordsDenseUnionArrayTests
373
345
  assert_equal(records, target.raw_records)
374
346
  end
375
347
 
348
+ def test_decimal256
349
+ records = [
350
+ [{"0" => BigDecimal("92.92")}],
351
+ [{"1" => nil}],
352
+ ]
353
+ target = build({
354
+ type: :decimal256,
355
+ precision: 38,
356
+ scale: 2,
357
+ },
358
+ records)
359
+ assert_equal(records, target.raw_records)
360
+ end
361
+
376
362
  def test_list
377
363
  records = [
378
364
  [{"0" => [true, nil, false]}],
379
- [nil],
380
365
  [{"1" => nil}],
381
366
  ]
382
367
  target = build({
@@ -393,7 +378,6 @@ module RawRecordsDenseUnionArrayTests
393
378
  def test_struct
394
379
  records = [
395
380
  [{"0" => {"sub_field" => true}}],
396
- [nil],
397
381
  [{"1" => nil}],
398
382
  [{"0" => {"sub_field" => nil}}],
399
383
  ]
@@ -414,7 +398,6 @@ module RawRecordsDenseUnionArrayTests
414
398
  omit("Need to add support for SparseUnionArrayBuilder")
415
399
  records = [
416
400
  [{"0" => {"field1" => true}}],
417
- [nil],
418
401
  [{"1" => nil}],
419
402
  [{"0" => {"field2" => nil}}],
420
403
  ]
@@ -440,7 +423,6 @@ module RawRecordsDenseUnionArrayTests
440
423
  omit("Need to add support for DenseUnionArrayBuilder")
441
424
  records = [
442
425
  [{"0" => {"field1" => true}}],
443
- [nil],
444
426
  [{"1" => nil}],
445
427
  [{"0" => {"field2" => nil}}],
446
428
  ]
@@ -466,7 +448,6 @@ module RawRecordsDenseUnionArrayTests
466
448
  omit("Need to add support for DictionaryArrayBuilder")
467
449
  records = [
468
450
  [{"0" => "Ruby"}],
469
- [nil],
470
451
  [{"1" => nil}],
471
452
  [{"0" => "GLib"}],
472
453
  ]