red-arrow 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -0,0 +1,426 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class RawRecordsRecordBatchStructArrayTest < Test::Unit::TestCase
19
+ def fields(type)
20
+ field_description = {
21
+ name: :field,
22
+ }
23
+ if type.is_a?(Hash)
24
+ field_description = field_description.merge(type)
25
+ else
26
+ field_description[:type] = type
27
+ end
28
+ {
29
+ column: {
30
+ type: :struct,
31
+ fields: [
32
+ field_description,
33
+ ],
34
+ },
35
+ }
36
+ end
37
+
38
+ test("NullArray") do
39
+ records = [
40
+ [{"field" => nil}],
41
+ [nil],
42
+ ]
43
+ record_batch = Arrow::RecordBatch.new(fields(:null),
44
+ records)
45
+ assert_equal(records, record_batch.raw_records)
46
+ end
47
+
48
+ test("BooleanArray") do
49
+ records = [
50
+ [{"field" => true}],
51
+ [nil],
52
+ [{"field" => nil}],
53
+ ]
54
+ record_batch = Arrow::RecordBatch.new(fields(:boolean),
55
+ records)
56
+ assert_equal(records, record_batch.raw_records)
57
+ end
58
+
59
+ test("Int8Array") do
60
+ records = [
61
+ [{"field" => -(2 ** 7)}],
62
+ [nil],
63
+ [{"field" => nil}],
64
+ ]
65
+ record_batch = Arrow::RecordBatch.new(fields(:int8),
66
+ records)
67
+ assert_equal(records, record_batch.raw_records)
68
+ end
69
+
70
+ test("UInt8Array") do
71
+ records = [
72
+ [{"field" => (2 ** 8) - 1}],
73
+ [nil],
74
+ [{"field" => nil}],
75
+ ]
76
+ record_batch = Arrow::RecordBatch.new(fields(:uint8),
77
+ records)
78
+ assert_equal(records, record_batch.raw_records)
79
+ end
80
+
81
+ test("Int16Array") do
82
+ records = [
83
+ [{"field" => -(2 ** 15)}],
84
+ [nil],
85
+ [{"field" => nil}],
86
+ ]
87
+ record_batch = Arrow::RecordBatch.new(fields(:int16),
88
+ records)
89
+ assert_equal(records, record_batch.raw_records)
90
+ end
91
+
92
+ test("UInt16Array") do
93
+ records = [
94
+ [{"field" => (2 ** 16) - 1}],
95
+ [nil],
96
+ [{"field" => nil}],
97
+ ]
98
+ record_batch = Arrow::RecordBatch.new(fields(:uint16),
99
+ records)
100
+ assert_equal(records, record_batch.raw_records)
101
+ end
102
+
103
+ test("Int32Array") do
104
+ records = [
105
+ [{"field" => -(2 ** 31)}],
106
+ [nil],
107
+ [{"field" => nil}],
108
+ ]
109
+ record_batch = Arrow::RecordBatch.new(fields(:int32),
110
+ records)
111
+ assert_equal(records, record_batch.raw_records)
112
+ end
113
+
114
+ test("UInt32Array") do
115
+ records = [
116
+ [{"field" => (2 ** 32) - 1}],
117
+ [nil],
118
+ [{"field" => nil}],
119
+ ]
120
+ record_batch = Arrow::RecordBatch.new(fields(:uint32),
121
+ records)
122
+ assert_equal(records, record_batch.raw_records)
123
+ end
124
+
125
+ test("Int64Array") do
126
+ records = [
127
+ [{"field" => -(2 ** 63)}],
128
+ [nil],
129
+ [{"field" => nil}],
130
+ ]
131
+ record_batch = Arrow::RecordBatch.new(fields(:int64),
132
+ records)
133
+ assert_equal(records, record_batch.raw_records)
134
+ end
135
+
136
+ test("UInt64Array") do
137
+ records = [
138
+ [{"field" => (2 ** 64) - 1}],
139
+ [nil],
140
+ [{"field" => nil}],
141
+ ]
142
+ record_batch = Arrow::RecordBatch.new(fields(:uint64),
143
+ records)
144
+ assert_equal(records, record_batch.raw_records)
145
+ end
146
+
147
+ test("FloatArray") do
148
+ records = [
149
+ [{"field" => -1.0}],
150
+ [nil],
151
+ [{"field" => nil}],
152
+ ]
153
+ record_batch = Arrow::RecordBatch.new(fields(:float),
154
+ records)
155
+ assert_equal(records, record_batch.raw_records)
156
+ end
157
+
158
+ test("DoubleArray") do
159
+ records = [
160
+ [{"field" => -1.0}],
161
+ [nil],
162
+ [{"field" => nil}],
163
+ ]
164
+ record_batch = Arrow::RecordBatch.new(fields(:double),
165
+ records)
166
+ assert_equal(records, record_batch.raw_records)
167
+ end
168
+
169
+ test("BinaryArray") do
170
+ records = [
171
+ [{"field" => "\xff".b}],
172
+ [nil],
173
+ [{"field" => nil}],
174
+ ]
175
+ record_batch = Arrow::RecordBatch.new(fields(:binary),
176
+ records)
177
+ assert_equal(records, record_batch.raw_records)
178
+ end
179
+
180
+ test("StringArray") do
181
+ records = [
182
+ [{"field" => "Ruby"}],
183
+ [nil],
184
+ [{"field" => nil}],
185
+ ]
186
+ record_batch = Arrow::RecordBatch.new(fields(:string),
187
+ records)
188
+ assert_equal(records, record_batch.raw_records)
189
+ end
190
+
191
+ test("Date32Array") do
192
+ records = [
193
+ [{"field" => Date.new(1960, 1, 1)}],
194
+ [nil],
195
+ [{"field" => nil}],
196
+ ]
197
+ record_batch = Arrow::RecordBatch.new(fields(:date32),
198
+ records)
199
+ assert_equal(records, record_batch.raw_records)
200
+ end
201
+
202
+ test("Date64Array") do
203
+ records = [
204
+ [{"field" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
205
+ [nil],
206
+ [{"field" => nil}],
207
+ ]
208
+ record_batch = Arrow::RecordBatch.new(fields(:date64),
209
+ records)
210
+ assert_equal(records, record_batch.raw_records)
211
+ end
212
+
213
+ sub_test_case("TimestampArray") do
214
+ test("second") do
215
+ records = [
216
+ [{"field" => Time.parse("1960-01-01T02:09:30Z")}],
217
+ [nil],
218
+ [{"field" => nil}],
219
+ ]
220
+ record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
221
+ unit: :second),
222
+ records)
223
+ assert_equal(records, record_batch.raw_records)
224
+ end
225
+
226
+ test("milli") do
227
+ records = [
228
+ [{"field" => Time.parse("1960-01-01T02:09:30.123Z")}],
229
+ [nil],
230
+ [{"field" => nil}],
231
+ ]
232
+ record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
233
+ unit: :milli),
234
+ records)
235
+ assert_equal(records, record_batch.raw_records)
236
+ end
237
+
238
+ test("micro") do
239
+ records = [
240
+ [{"field" => Time.parse("1960-01-01T02:09:30.123456Z")}],
241
+ [nil],
242
+ [{"field" => nil}],
243
+ ]
244
+ record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
245
+ unit: :micro),
246
+ records)
247
+ assert_equal(records, record_batch.raw_records)
248
+ end
249
+
250
+ test("nano") do
251
+ records = [
252
+ [{"field" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
253
+ [nil],
254
+ [{"field" => nil}],
255
+ ]
256
+ record_batch = Arrow::RecordBatch.new(fields(type: :timestamp,
257
+ unit: :nano),
258
+ records)
259
+ assert_equal(records, record_batch.raw_records)
260
+ end
261
+ end
262
+
263
+ sub_test_case("Time32Array") do
264
+ test("second") do
265
+ records = [
266
+ [{"field" => 60 * 10}], # 00:10:00
267
+ [nil],
268
+ [{"field" => nil}],
269
+ ]
270
+ record_batch = Arrow::RecordBatch.new(fields(type: :time32,
271
+ unit: :second),
272
+ records)
273
+ assert_equal(records, record_batch.raw_records)
274
+ end
275
+
276
+ test("milli") do
277
+ records = [
278
+ [{"field" => (60 * 10) * 1000 + 123}], # 00:10:00.123
279
+ [nil],
280
+ [{"field" => nil}],
281
+ ]
282
+ record_batch = Arrow::RecordBatch.new(fields(type: :time32,
283
+ unit: :milli),
284
+ records)
285
+ assert_equal(records, record_batch.raw_records)
286
+ end
287
+ end
288
+
289
+ sub_test_case("Time64Array") do
290
+ test("micro") do
291
+ records = [
292
+ [{"field" => (60 * 10) * 1_000_000 + 123_456}], # 00:10:00.123456
293
+ [nil],
294
+ [{"field" => nil}],
295
+ ]
296
+ record_batch = Arrow::RecordBatch.new(fields(type: :time64,
297
+ unit: :micro),
298
+ records)
299
+ assert_equal(records, record_batch.raw_records)
300
+ end
301
+
302
+ test("nano") do
303
+ records = [
304
+ # 00:10:00.123456789
305
+ [{"field" => (60 * 10) * 1_000_000_000 + 123_456_789}],
306
+ [nil],
307
+ [{"field" => nil}],
308
+ ]
309
+ record_batch = Arrow::RecordBatch.new(fields(type: :time64,
310
+ unit: :nano),
311
+ records)
312
+ assert_equal(records, record_batch.raw_records)
313
+ end
314
+ end
315
+
316
+ test("Decimal128Array") do
317
+ records = [
318
+ [{"field" => BigDecimal("92.92")}],
319
+ [nil],
320
+ [{"field" => nil}],
321
+ ]
322
+ record_batch = Arrow::RecordBatch.new(fields(type: :decimal128,
323
+ precision: 8,
324
+ scale: 2),
325
+ records)
326
+ assert_equal(records, record_batch.raw_records)
327
+ end
328
+
329
+ test("ListArray") do
330
+ records = [
331
+ [{"field" => [true, nil, false]}],
332
+ [nil],
333
+ [{"field" => nil}],
334
+ ]
335
+ record_batch = Arrow::RecordBatch.new(fields(type: :list,
336
+ field: {
337
+ name: :sub_element,
338
+ type: :boolean,
339
+ }),
340
+ records)
341
+ assert_equal(records, record_batch.raw_records)
342
+ end
343
+
344
+ test("StructArray") do
345
+ records = [
346
+ [{"field" => {"sub_field" => true}}],
347
+ [nil],
348
+ [{"field" => nil}],
349
+ [{"field" => {"sub_field" => nil}}],
350
+ ]
351
+ record_batch = Arrow::RecordBatch.new(fields(type: :struct,
352
+ fields: [
353
+ {
354
+ name: :sub_field,
355
+ type: :boolean,
356
+ },
357
+ ]),
358
+ records)
359
+ assert_equal(records, record_batch.raw_records)
360
+ end
361
+
362
+ test("SparseUnionArray") do
363
+ omit("Need to add support for SparseUnionArrayBuilder")
364
+ records = [
365
+ [{"field" => {"field1" => true}}],
366
+ [nil],
367
+ [{"field" => nil}],
368
+ [{"field" => {"field2" => nil}}],
369
+ ]
370
+ record_batch = Arrow::RecordBatch.new(fields(type: :sparse_union,
371
+ fields: [
372
+ {
373
+ name: :field1,
374
+ type: :boolean,
375
+ },
376
+ {
377
+ name: :field2,
378
+ type: :uint8,
379
+ },
380
+ ],
381
+ type_codes: [0, 1]),
382
+ records)
383
+ assert_equal(records, record_batch.raw_records)
384
+ end
385
+
386
+ test("DenseUnionArray") do
387
+ omit("Need to add support for DenseUnionArrayBuilder")
388
+ records = [
389
+ [{"field" => {"field1" => true}}],
390
+ [nil],
391
+ [{"field" => nil}],
392
+ [{"field" => {"field2" => nil}}],
393
+ ]
394
+ record_batch = Arrow::RecordBatch.new(fields(type: :dense_union,
395
+ fields: [
396
+ {
397
+ name: :field1,
398
+ type: :boolean,
399
+ },
400
+ {
401
+ name: :field2,
402
+ type: :uint8,
403
+ },
404
+ ],
405
+ type_codes: [0, 1]),
406
+ records)
407
+ assert_equal(records, record_batch.raw_records)
408
+ end
409
+
410
+ test("DictionaryArray") do
411
+ omit("Need to add support for DictionaryArrayBuilder")
412
+ records = [
413
+ [{"field" => "Ruby"}],
414
+ [nil],
415
+ [{"field" => nil}],
416
+ [{"field" => "GLib"}],
417
+ ]
418
+ dictionary = Arrow::StringArray.new(["GLib", "Ruby"])
419
+ record_batch = Arrow::RecordBatch.new(fields(type: :dictionary,
420
+ index_data_type: :int8,
421
+ dictionary: dictionary,
422
+ ordered: true),
423
+ records)
424
+ assert_equal(records, record_batch.raw_records)
425
+ end
426
+ end
@@ -17,17 +17,40 @@
17
17
  # specific language governing permissions and limitations
18
18
  # under the License.
19
19
 
20
- ENV["TZ"] = "Asia/Tokyo"
21
-
22
20
  $VERBOSE = true
23
21
 
24
22
  require "pathname"
25
23
 
24
+ (ENV["ARROW_DLL_PATH"] || "").split(File::PATH_SEPARATOR).each do |path|
25
+ RubyInstaller::Runtime.add_dll_directory(path)
26
+ end
27
+
26
28
  base_dir = Pathname.new(__dir__).parent.expand_path
27
29
 
28
30
  lib_dir = base_dir + "lib"
31
+ ext_dir = base_dir + "ext" + "arrow"
29
32
  test_dir = base_dir + "test"
30
33
 
34
+ make = nil
35
+ if ENV["NO_MAKE"] != "yes"
36
+ if ENV["MAKE"]
37
+ make = ENV["MAKE"]
38
+ elsif system("which gmake > #{File::NULL} 2>&1")
39
+ make = "gmake"
40
+ elsif system("which make > #{File::NULL} 2>&1")
41
+ make = "make"
42
+ end
43
+ end
44
+ if make
45
+ Dir.chdir(ext_dir.to_s) do
46
+ unless File.exist?("Makefile")
47
+ system(RbConfig.ruby, "extconf.rb", "--enable-debug-build") or exit(false)
48
+ end
49
+ system("#{make} > #{File::NULL}") or exit(false)
50
+ end
51
+ end
52
+
53
+ $LOAD_PATH.unshift(ext_dir.to_s)
31
54
  $LOAD_PATH.unshift(lib_dir.to_s)
32
55
 
33
56
  require_relative "helper"