red-arrow 12.0.1 → 14.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,529 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module EachRawRecordStructArrayTests
19
+ def build_schema(type)
20
+ field_description = {
21
+ name: :field,
22
+ }
23
+ if type.is_a?(Hash)
24
+ field_description = field_description.merge(type)
25
+ else
26
+ field_description[:type] = type
27
+ end
28
+ {
29
+ column: {
30
+ type: :struct,
31
+ fields: [
32
+ field_description,
33
+ ],
34
+ },
35
+ }
36
+ end
37
+
38
+ def test_null
39
+ records = [
40
+ [{"field" => nil}],
41
+ [nil],
42
+ ]
43
+ target = build(:null, records)
44
+ assert_equal(records, target.each_raw_record.to_a)
45
+ end
46
+
47
+ def test_boolean
48
+ records = [
49
+ [{"field" => true}],
50
+ [nil],
51
+ [{"field" => nil}],
52
+ ]
53
+ target = build(:boolean, records)
54
+ assert_equal(records, target.each_raw_record.to_a)
55
+ end
56
+
57
+ def test_int8
58
+ records = [
59
+ [{"field" => -(2 ** 7)}],
60
+ [nil],
61
+ [{"field" => nil}],
62
+ ]
63
+ target = build(:int8, records)
64
+ assert_equal(records, target.each_raw_record.to_a)
65
+ end
66
+
67
+ def test_uint8
68
+ records = [
69
+ [{"field" => (2 ** 8) - 1}],
70
+ [nil],
71
+ [{"field" => nil}],
72
+ ]
73
+ target = build(:uint8, records)
74
+ assert_equal(records, target.each_raw_record.to_a)
75
+ end
76
+
77
+ def test_int16
78
+ records = [
79
+ [{"field" => -(2 ** 15)}],
80
+ [nil],
81
+ [{"field" => nil}],
82
+ ]
83
+ target = build(:int16, records)
84
+ assert_equal(records, target.each_raw_record.to_a)
85
+ end
86
+
87
+ def test_uint16
88
+ records = [
89
+ [{"field" => (2 ** 16) - 1}],
90
+ [nil],
91
+ [{"field" => nil}],
92
+ ]
93
+ target = build(:uint16, records)
94
+ assert_equal(records, target.each_raw_record.to_a)
95
+ end
96
+
97
+ def test_int32
98
+ records = [
99
+ [{"field" => -(2 ** 31)}],
100
+ [nil],
101
+ [{"field" => nil}],
102
+ ]
103
+ target = build(:int32, records)
104
+ assert_equal(records, target.each_raw_record.to_a)
105
+ end
106
+
107
+ def test_uint32
108
+ records = [
109
+ [{"field" => (2 ** 32) - 1}],
110
+ [nil],
111
+ [{"field" => nil}],
112
+ ]
113
+ target = build(:uint32, records)
114
+ assert_equal(records, target.each_raw_record.to_a)
115
+ end
116
+
117
+ def test_int64
118
+ records = [
119
+ [{"field" => -(2 ** 63)}],
120
+ [nil],
121
+ [{"field" => nil}],
122
+ ]
123
+ target = build(:int64, records)
124
+ assert_equal(records, target.each_raw_record.to_a)
125
+ end
126
+
127
+ def test_uint64
128
+ records = [
129
+ [{"field" => (2 ** 64) - 1}],
130
+ [nil],
131
+ [{"field" => nil}],
132
+ ]
133
+ target = build(:uint64, records)
134
+ assert_equal(records, target.each_raw_record.to_a)
135
+ end
136
+
137
+ def test_float
138
+ records = [
139
+ [{"field" => -1.0}],
140
+ [nil],
141
+ [{"field" => nil}],
142
+ ]
143
+ target = build(:float, records)
144
+ assert_equal(records, target.each_raw_record.to_a)
145
+ end
146
+
147
+ def test_double
148
+ records = [
149
+ [{"field" => -1.0}],
150
+ [nil],
151
+ [{"field" => nil}],
152
+ ]
153
+ target = build(:double, records)
154
+ assert_equal(records, target.each_raw_record.to_a)
155
+ end
156
+
157
+ def test_binary
158
+ records = [
159
+ [{"field" => "\xff".b}],
160
+ [nil],
161
+ [{"field" => nil}],
162
+ ]
163
+ target = build(:binary, records)
164
+ assert_equal(records, target.each_raw_record.to_a)
165
+ end
166
+
167
+ def test_string
168
+ records = [
169
+ [{"field" => "Ruby"}],
170
+ [nil],
171
+ [{"field" => nil}],
172
+ ]
173
+ target = build(:string, records)
174
+ assert_equal(records, target.each_raw_record.to_a)
175
+ end
176
+
177
+ def test_date32
178
+ records = [
179
+ [{"field" => Date.new(1960, 1, 1)}],
180
+ [nil],
181
+ [{"field" => nil}],
182
+ ]
183
+ target = build(:date32, records)
184
+ assert_equal(records, target.each_raw_record.to_a)
185
+ end
186
+
187
+ def test_date64
188
+ records = [
189
+ [{"field" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
190
+ [nil],
191
+ [{"field" => nil}],
192
+ ]
193
+ target = build(:date64, records)
194
+ assert_equal(records, target.each_raw_record.to_a)
195
+ end
196
+
197
+ def test_timestamp_second
198
+ records = [
199
+ [{"field" => Time.parse("1960-01-01T02:09:30Z")}],
200
+ [nil],
201
+ [{"field" => nil}],
202
+ ]
203
+ target = build({
204
+ type: :timestamp,
205
+ unit: :second,
206
+ },
207
+ records)
208
+ assert_equal(records, target.each_raw_record.to_a)
209
+ end
210
+
211
+ def test_timestamp_milli
212
+ records = [
213
+ [{"field" => Time.parse("1960-01-01T02:09:30.123Z")}],
214
+ [nil],
215
+ [{"field" => nil}],
216
+ ]
217
+ target = build({
218
+ type: :timestamp,
219
+ unit: :milli,
220
+ },
221
+ records)
222
+ assert_equal(records, target.each_raw_record.to_a)
223
+ end
224
+
225
+ def test_timestamp_micro
226
+ records = [
227
+ [{"field" => Time.parse("1960-01-01T02:09:30.123456Z")}],
228
+ [nil],
229
+ [{"field" => nil}],
230
+ ]
231
+ target = build({
232
+ type: :timestamp,
233
+ unit: :micro,
234
+ },
235
+ records)
236
+ assert_equal(records, target.each_raw_record.to_a)
237
+ end
238
+
239
+ def test_timestamp_nano
240
+ records = [
241
+ [{"field" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
242
+ [nil],
243
+ [{"field" => nil}],
244
+ ]
245
+ target = build({
246
+ type: :timestamp,
247
+ unit: :nano,
248
+ },
249
+ records)
250
+ assert_equal(records, target.each_raw_record.to_a)
251
+ end
252
+
253
+ def test_time32_second
254
+ unit = Arrow::TimeUnit::SECOND
255
+ records = [
256
+ # 00:10:00
257
+ [{"field" => Arrow::Time.new(unit, 60 * 10)}],
258
+ [nil],
259
+ [{"field" => nil}],
260
+ ]
261
+ target = build({
262
+ type: :time32,
263
+ unit: :second,
264
+ },
265
+ records)
266
+ assert_equal(records, target.each_raw_record.to_a)
267
+ end
268
+
269
+ def test_time32_milli
270
+ unit = Arrow::TimeUnit::MILLI
271
+ records = [
272
+ # 00:10:00.123
273
+ [{"field" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
274
+ [nil],
275
+ [{"field" => nil}],
276
+ ]
277
+ target = build({
278
+ type: :time32,
279
+ unit: :milli,
280
+ },
281
+ records)
282
+ assert_equal(records, target.each_raw_record.to_a)
283
+ end
284
+
285
+ def test_time64_micro
286
+ unit = Arrow::TimeUnit::MICRO
287
+ records = [
288
+ # 00:10:00.123456
289
+ [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
290
+ [nil],
291
+ [{"field" => nil}],
292
+ ]
293
+ target = build({
294
+ type: :time64,
295
+ unit: :micro,
296
+ },
297
+ records)
298
+ assert_equal(records, target.each_raw_record.to_a)
299
+ end
300
+
301
+ def test_time64_nano
302
+ unit = Arrow::TimeUnit::NANO
303
+ records = [
304
+ # 00:10:00.123456789
305
+ [{"field" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
306
+ [nil],
307
+ [{"field" => nil}],
308
+ ]
309
+ target = build({
310
+ type: :time64,
311
+ unit: :nano,
312
+ },
313
+ records)
314
+ assert_equal(records, target.each_raw_record.to_a)
315
+ end
316
+
317
+ def test_decimal128
318
+ records = [
319
+ [{"field" => BigDecimal("92.92")}],
320
+ [nil],
321
+ [{"field" => nil}],
322
+ ]
323
+ target = build({
324
+ type: :decimal128,
325
+ precision: 8,
326
+ scale: 2,
327
+ },
328
+ records)
329
+ assert_equal(records, target.each_raw_record.to_a)
330
+ end
331
+
332
+ def test_decimal256
333
+ records = [
334
+ [{"field" => BigDecimal("92.92")}],
335
+ [nil],
336
+ [{"field" => nil}],
337
+ ]
338
+ target = build({
339
+ type: :decimal256,
340
+ precision: 38,
341
+ scale: 2,
342
+ },
343
+ records)
344
+ assert_equal(records, target.each_raw_record.to_a)
345
+ end
346
+
347
+ def test_month_interval
348
+ records = [
349
+ [{"field" => 1}],
350
+ [nil],
351
+ [{"field" => nil}],
352
+ ]
353
+ target = build(:month_interval, records)
354
+ assert_equal(records, target.each_raw_record.to_a)
355
+ end
356
+
357
+ def test_day_time_interval
358
+ records = [
359
+ [{"field" => {day: 1, millisecond: 100}}],
360
+ [nil],
361
+ [{"field" => nil}],
362
+ ]
363
+ target = build(:day_time_interval, records)
364
+ assert_equal(records, target.each_raw_record.to_a)
365
+ end
366
+
367
+ def test_month_day_nano_interval
368
+ records = [
369
+ [{"field" => {month: 1, day: 1, nanosecond: 100}}],
370
+ [nil],
371
+ [{"field" => nil}],
372
+ ]
373
+ target = build(:month_day_nano_interval, records)
374
+ assert_equal(records, target.each_raw_record.to_a)
375
+ end
376
+
377
+ def test_list
378
+ records = [
379
+ [{"field" => [true, nil, false]}],
380
+ [nil],
381
+ [{"field" => nil}],
382
+ ]
383
+ target = build({
384
+ type: :list,
385
+ field: {
386
+ name: :sub_element,
387
+ type: :boolean,
388
+ },
389
+ },
390
+ records)
391
+ assert_equal(records, target.each_raw_record.to_a)
392
+ end
393
+
394
+ def test_struct
395
+ records = [
396
+ [{"field" => {"sub_field" => true}}],
397
+ [nil],
398
+ [{"field" => nil}],
399
+ [{"field" => {"sub_field" => nil}}],
400
+ ]
401
+ target = build({
402
+ type: :struct,
403
+ fields: [
404
+ {
405
+ name: :sub_field,
406
+ type: :boolean,
407
+ },
408
+ ],
409
+ },
410
+ records)
411
+ assert_equal(records, target.each_raw_record.to_a)
412
+ end
413
+
414
+ def test_map
415
+ records = [
416
+ [{"field" => {"key1" => true, "key2" => nil}}],
417
+ [nil],
418
+ [{"field" => nil}],
419
+ ]
420
+ target = build({
421
+ type: :map,
422
+ key: :string,
423
+ item: :boolean,
424
+ },
425
+ records)
426
+ assert_equal(records, target.each_raw_record.to_a)
427
+ end
428
+
429
+ def remove_union_field_names(records)
430
+ records.collect do |record|
431
+ record.collect do |column|
432
+ if column.nil?
433
+ column
434
+ else
435
+ value = column["field"]
436
+ value = value.values[0] unless value.nil?
437
+ {"field" => value}
438
+ end
439
+ end
440
+ end
441
+ end
442
+
443
+ def test_sparse_union
444
+ records = [
445
+ [{"field" => {"field1" => true}}],
446
+ [nil],
447
+ [{"field" => nil}],
448
+ [{"field" => {"field2" => 29}}],
449
+ [{"field" => {"field2" => nil}}],
450
+ ]
451
+ target = build({
452
+ type: :sparse_union,
453
+ fields: [
454
+ {
455
+ name: :field1,
456
+ type: :boolean,
457
+ },
458
+ {
459
+ name: :field2,
460
+ type: :uint8,
461
+ },
462
+ ],
463
+ type_codes: [0, 1],
464
+ },
465
+ records)
466
+ assert_equal(remove_union_field_names(records),
467
+ target.each_raw_record.to_a)
468
+ end
469
+
470
+ def test_dense_union
471
+ records = [
472
+ [{"field" => {"field1" => true}}],
473
+ [nil],
474
+ [{"field" => nil}],
475
+ [{"field" => {"field2" => 29}}],
476
+ [{"field" => {"field2" => nil}}],
477
+ ]
478
+ target = build({
479
+ type: :dense_union,
480
+ fields: [
481
+ {
482
+ name: :field1,
483
+ type: :boolean,
484
+ },
485
+ {
486
+ name: :field2,
487
+ type: :uint8,
488
+ },
489
+ ],
490
+ type_codes: [0, 1],
491
+ },
492
+ records)
493
+ assert_equal(remove_union_field_names(records),
494
+ target.each_raw_record.to_a)
495
+ end
496
+
497
+ def test_dictionary
498
+ records = [
499
+ [{"field" => "Ruby"}],
500
+ [nil],
501
+ [{"field" => nil}],
502
+ [{"field" => "GLib"}],
503
+ ]
504
+ target = build({
505
+ type: :dictionary,
506
+ index_data_type: :int8,
507
+ value_data_type: :string,
508
+ ordered: false,
509
+ },
510
+ records)
511
+ assert_equal(records, target.each_raw_record.to_a)
512
+ end
513
+ end
514
+
515
+ class EachRawRecordRecordBatchStructArrayTest < Test::Unit::TestCase
516
+ include EachRawRecordStructArrayTests
517
+
518
+ def build(type, records)
519
+ Arrow::RecordBatch.new(build_schema(type), records)
520
+ end
521
+ end
522
+
523
+ class EachRawRecordTableStructArrayTest < Test::Unit::TestCase
524
+ include EachRawRecordStructArrayTests
525
+
526
+ def build(type, records)
527
+ Arrow::Table.new(build_schema(type), records)
528
+ end
529
+ end
@@ -0,0 +1,47 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class EachRawRecordTableTest < Test::Unit::TestCase
19
+ test("2 arrays") do
20
+ raw_record_batches = [
21
+ [
22
+ [true, nil, "Ruby"],
23
+ [nil, 0, "GLib"],
24
+ [false, 2 ** 8 - 1, nil],
25
+ ],
26
+ [
27
+ [nil, 10, "A"],
28
+ [true, 20, "B"],
29
+ [false, nil, "C"],
30
+ [nil, 40, nil],
31
+ ]
32
+ ]
33
+ raw_records = raw_record_batches.inject do |all_records, record_batch|
34
+ all_records + record_batch
35
+ end
36
+ schema = [
37
+ {name: :column0, type: :boolean},
38
+ {name: :column1, type: :uint8},
39
+ {name: :column2, type: :string},
40
+ ]
41
+ record_batches = raw_record_batches.collect do |record_batch|
42
+ Arrow::RecordBatch.new(schema, record_batch)
43
+ end
44
+ table = Arrow::Table.new(schema, record_batches)
45
+ assert_equal(raw_records, table.each_raw_record.to_a)
46
+ end
47
+ end
@@ -157,7 +157,7 @@ module RawRecordsBasicArraysTests
157
157
  assert_equal(records, target.raw_records)
158
158
  end
159
159
 
160
- def test_tring
160
+ def test_string
161
161
  records = [
162
162
  ["Ruby"],
163
163
  [nil],
@@ -36,5 +36,16 @@ class TestExpression < Test::Unit::TestCase
36
36
  assert_equal(Arrow::CallExpression.new("func", ["argument1"]),
37
37
  Arrow::Expression.try_convert(["func", "argument1"]))
38
38
  end
39
+
40
+ test("[Symbol, String, Hash]") do
41
+ options = Arrow::MatchSubstringOptions.new
42
+ options.pattern = "hello"
43
+ assert_equal(Arrow::CallExpression.new("match_substring",
44
+ ["content"],
45
+ options),
46
+ Arrow::Expression.try_convert([:match_substring,
47
+ "content",
48
+ {pattern: "hello"}]))
49
+ end
39
50
  end
40
51
  end
data/test/test-table.rb CHANGED
@@ -589,6 +589,13 @@ class TableTest < Test::Unit::TestCase
589
589
  0 1
590
590
  TABLE
591
591
  end
592
+
593
+ test("empty result") do
594
+ selected_table = @table.filter([false] * @table.size).select_columns(:a)
595
+ assert_equal(<<-TABLE, selected_table.to_s)
596
+ a
597
+ TABLE
598
+ end
592
599
  end
593
600
 
594
601
  sub_test_case("#column_names") do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 12.0.1
4
+ version: 14.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-15 00:00:00.000000000 Z
11
+ date: 2023-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 3.1.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: csv
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: extpp
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -218,6 +232,15 @@ files:
218
232
  - lib/arrow/version.rb
219
233
  - lib/arrow/writable.rb
220
234
  - red-arrow.gemspec
235
+ - test/each-raw-record/test-basic-arrays.rb
236
+ - test/each-raw-record/test-dense-union-array.rb
237
+ - test/each-raw-record/test-dictionary-array.rb
238
+ - test/each-raw-record/test-list-array.rb
239
+ - test/each-raw-record/test-map-array.rb
240
+ - test/each-raw-record/test-multiple-columns.rb
241
+ - test/each-raw-record/test-sparse-union-array.rb
242
+ - test/each-raw-record/test-struct-array.rb
243
+ - test/each-raw-record/test-table.rb
221
244
  - test/fixture/TestOrcFile.test1.orc
222
245
  - test/fixture/float-integer.csv
223
246
  - test/fixture/integer-float.csv
@@ -323,8 +346,8 @@ homepage: https://arrow.apache.org/
323
346
  licenses:
324
347
  - Apache-2.0
325
348
  metadata:
326
- msys2_mingw_dependencies: arrow>=12.0.1
327
- post_install_message:
349
+ msys2_mingw_dependencies: arrow>=14.0.0
350
+ post_install_message:
328
351
  rdoc_options: []
329
352
  require_paths:
330
353
  - lib
@@ -339,11 +362,20 @@ required_rubygems_version: !ruby/object:Gem::Requirement
339
362
  - !ruby/object:Gem::Version
340
363
  version: '0'
341
364
  requirements: []
342
- rubygems_version: 3.3.5
343
- signing_key:
365
+ rubygems_version: 3.5.0.dev
366
+ signing_key:
344
367
  specification_version: 4
345
368
  summary: Red Arrow is the Ruby bindings of Apache Arrow
346
369
  test_files:
370
+ - test/each-raw-record/test-basic-arrays.rb
371
+ - test/each-raw-record/test-dense-union-array.rb
372
+ - test/each-raw-record/test-dictionary-array.rb
373
+ - test/each-raw-record/test-list-array.rb
374
+ - test/each-raw-record/test-map-array.rb
375
+ - test/each-raw-record/test-multiple-columns.rb
376
+ - test/each-raw-record/test-sparse-union-array.rb
377
+ - test/each-raw-record/test-struct-array.rb
378
+ - test/each-raw-record/test-table.rb
347
379
  - test/fixture/TestOrcFile.test1.orc
348
380
  - test/fixture/float-integer.csv
349
381
  - test/fixture/integer-float.csv