red-arrow 18.1.0 → 19.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/lib/arrow/array.rb +7 -4
  3. data/lib/arrow/column.rb +4 -4
  4. data/lib/arrow/jruby/array-builder.rb +114 -0
  5. data/lib/arrow/jruby/array.rb +109 -0
  6. data/{test/helper/fixture.rb → lib/arrow/jruby/chunked-array.rb} +14 -6
  7. data/lib/arrow/jruby/compression-type.rb +26 -0
  8. data/{test/test-boolean-scalar.rb → lib/arrow/jruby/csv-read-options.rb} +12 -6
  9. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  10. data/lib/arrow/jruby/decimal128.rb +28 -0
  11. data/lib/arrow/jruby/decimal256.rb +28 -0
  12. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/error.rb} +7 -4
  13. data/lib/arrow/jruby/file-system.rb +24 -0
  14. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  15. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  16. data/{test/fixture/integer-float.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  17. data/{test/fixture/float-integer.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  18. data/lib/arrow/jruby/sort-options.rb +24 -0
  19. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  20. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  21. data/lib/arrow/jruby/writable.rb +24 -0
  22. data/lib/arrow/jruby.rb +52 -0
  23. data/lib/arrow/libraries.rb +126 -0
  24. data/lib/arrow/list-array-builder.rb +1 -0
  25. data/lib/arrow/loader.rb +3 -111
  26. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +6 -4
  27. data/lib/arrow/version.rb +1 -1
  28. data/lib/arrow.rb +2 -7
  29. data/red-arrow.gemspec +15 -6
  30. metadata +26 -229
  31. data/test/each-raw-record/test-basic-arrays.rb +0 -411
  32. data/test/each-raw-record/test-dense-union-array.rb +0 -566
  33. data/test/each-raw-record/test-dictionary-array.rb +0 -341
  34. data/test/each-raw-record/test-list-array.rb +0 -628
  35. data/test/each-raw-record/test-map-array.rb +0 -507
  36. data/test/each-raw-record/test-multiple-columns.rb +0 -72
  37. data/test/each-raw-record/test-sparse-union-array.rb +0 -528
  38. data/test/each-raw-record/test-struct-array.rb +0 -529
  39. data/test/each-raw-record/test-table.rb +0 -47
  40. data/test/fixture/TestOrcFile.test1.orc +0 -0
  41. data/test/fixture/with-header-float.csv +0 -20
  42. data/test/fixture/with-header.csv +0 -20
  43. data/test/fixture/without-header-float.csv +0 -19
  44. data/test/fixture/without-header.csv +0 -19
  45. data/test/helper/omittable.rb +0 -49
  46. data/test/helper.rb +0 -31
  47. data/test/raw-records/test-basic-arrays.rb +0 -405
  48. data/test/raw-records/test-dense-union-array.rb +0 -566
  49. data/test/raw-records/test-dictionary-array.rb +0 -341
  50. data/test/raw-records/test-list-array.rb +0 -628
  51. data/test/raw-records/test-map-array.rb +0 -507
  52. data/test/raw-records/test-multiple-columns.rb +0 -65
  53. data/test/raw-records/test-sparse-union-array.rb +0 -556
  54. data/test/raw-records/test-struct-array.rb +0 -529
  55. data/test/raw-records/test-table.rb +0 -47
  56. data/test/run-test.rb +0 -71
  57. data/test/test-array-builder.rb +0 -198
  58. data/test/test-array.rb +0 -332
  59. data/test/test-bigdecimal.rb +0 -40
  60. data/test/test-binary-dictionary-array-builder.rb +0 -103
  61. data/test/test-buffer.rb +0 -49
  62. data/test/test-chunked-array.rb +0 -198
  63. data/test/test-column.rb +0 -123
  64. data/test/test-csv-loader.rb +0 -297
  65. data/test/test-data-type.rb +0 -84
  66. data/test/test-date32-array.rb +0 -24
  67. data/test/test-date64-array.rb +0 -25
  68. data/test/test-decimal128-array-builder.rb +0 -126
  69. data/test/test-decimal128-array.rb +0 -47
  70. data/test/test-decimal128-data-type.rb +0 -31
  71. data/test/test-decimal128.rb +0 -126
  72. data/test/test-decimal256-array-builder.rb +0 -126
  73. data/test/test-decimal256-array.rb +0 -47
  74. data/test/test-decimal256-data-type.rb +0 -31
  75. data/test/test-decimal256.rb +0 -126
  76. data/test/test-dense-union-array.rb +0 -42
  77. data/test/test-dense-union-data-type.rb +0 -41
  78. data/test/test-dictionary-array.rb +0 -41
  79. data/test/test-dictionary-data-type.rb +0 -40
  80. data/test/test-expression.rb +0 -51
  81. data/test/test-feather.rb +0 -49
  82. data/test/test-field.rb +0 -117
  83. data/test/test-file-output-stream.rb +0 -54
  84. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  85. data/test/test-fixed-size-binary-array.rb +0 -36
  86. data/test/test-float-scalar.rb +0 -46
  87. data/test/test-function.rb +0 -210
  88. data/test/test-group.rb +0 -193
  89. data/test/test-half-float-array.rb +0 -43
  90. data/test/test-half-float.rb +0 -130
  91. data/test/test-list-array-builder.rb +0 -79
  92. data/test/test-list-array.rb +0 -32
  93. data/test/test-list-data-type.rb +0 -69
  94. data/test/test-map-array-builder.rb +0 -110
  95. data/test/test-map-array.rb +0 -33
  96. data/test/test-memory-view.rb +0 -434
  97. data/test/test-orc.rb +0 -173
  98. data/test/test-ractor.rb +0 -34
  99. data/test/test-record-batch-builder.rb +0 -125
  100. data/test/test-record-batch-file-reader.rb +0 -136
  101. data/test/test-record-batch-iterator.rb +0 -37
  102. data/test/test-record-batch-reader.rb +0 -46
  103. data/test/test-record-batch-stream-reader.rb +0 -129
  104. data/test/test-record-batch.rb +0 -182
  105. data/test/test-scalar.rb +0 -65
  106. data/test/test-schema.rb +0 -134
  107. data/test/test-slicer.rb +0 -589
  108. data/test/test-sort-indices.rb +0 -40
  109. data/test/test-sort-key.rb +0 -81
  110. data/test/test-sort-options.rb +0 -58
  111. data/test/test-sparse-union-array.rb +0 -38
  112. data/test/test-sparse-union-data-type.rb +0 -41
  113. data/test/test-stream-listener.rb +0 -60
  114. data/test/test-string-dictionary-array-builder.rb +0 -103
  115. data/test/test-struct-array-builder.rb +0 -184
  116. data/test/test-struct-array.rb +0 -94
  117. data/test/test-struct-data-type.rb +0 -112
  118. data/test/test-table.rb +0 -1530
  119. data/test/test-tensor.rb +0 -297
  120. data/test/test-time.rb +0 -288
  121. data/test/test-time32-array.rb +0 -81
  122. data/test/test-time32-data-type.rb +0 -42
  123. data/test/test-time64-array.rb +0 -81
  124. data/test/test-time64-data-type.rb +0 -42
  125. data/test/test-timestamp-array.rb +0 -45
  126. data/test/test-timestamp-data-type.rb +0 -42
  127. data/test/values/test-basic-arrays.rb +0 -335
  128. data/test/values/test-dense-union-array.rb +0 -552
  129. data/test/values/test-dictionary-array.rb +0 -325
  130. data/test/values/test-list-array.rb +0 -587
  131. data/test/values/test-map-array.rb +0 -489
  132. data/test/values/test-sparse-union-array.rb +0 -543
  133. data/test/values/test-struct-array.rb +0 -524
@@ -1,341 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- module EachRawRecordDictionaryArrayTests
19
- def build_record_batch(array)
20
- dictionary = array.dictionary_encode
21
- schema = Arrow::Schema.new(column: dictionary.value_data_type)
22
- Arrow::RecordBatch.new(schema, array.length, [dictionary])
23
- end
24
-
25
- def test_null
26
- records = [
27
- [nil],
28
- [nil],
29
- [nil],
30
- [nil],
31
- ]
32
- target = build(Arrow::NullArray.new(records.collect(&:first)))
33
- assert_equal(records, target.each_raw_record.to_a)
34
- end
35
-
36
- def test_boolean
37
- records = [
38
- [true],
39
- [nil],
40
- [false],
41
- ]
42
- target = build(Arrow::BooleanArray.new(records.collect(&:first)))
43
- assert_equal(records, target.each_raw_record.to_a)
44
- end
45
-
46
- def test_int8
47
- records = [
48
- [-(2 ** 7)],
49
- [nil],
50
- [(2 ** 7) - 1],
51
- ]
52
- target = build(Arrow::Int8Array.new(records.collect(&:first)))
53
- assert_equal(records, target.each_raw_record.to_a)
54
- end
55
-
56
- def test_uint8
57
- records = [
58
- [0],
59
- [nil],
60
- [(2 ** 8) - 1],
61
- ]
62
- target = build(Arrow::UInt8Array.new(records.collect(&:first)))
63
- assert_equal(records, target.each_raw_record.to_a)
64
- end
65
-
66
- def test_int16
67
- records = [
68
- [-(2 ** 15)],
69
- [nil],
70
- [(2 ** 15) - 1],
71
- ]
72
- target = build(Arrow::Int16Array.new(records.collect(&:first)))
73
- assert_equal(records, target.each_raw_record.to_a)
74
- end
75
-
76
- def test_uint16
77
- records = [
78
- [0],
79
- [nil],
80
- [(2 ** 16) - 1],
81
- ]
82
- target = build(Arrow::UInt16Array.new(records.collect(&:first)))
83
- assert_equal(records, target.each_raw_record.to_a)
84
- end
85
-
86
- def test_int32
87
- records = [
88
- [-(2 ** 31)],
89
- [nil],
90
- [(2 ** 31) - 1],
91
- ]
92
- target = build(Arrow::Int32Array.new(records.collect(&:first)))
93
- assert_equal(records, target.each_raw_record.to_a)
94
- end
95
-
96
- def test_uint32
97
- records = [
98
- [0],
99
- [nil],
100
- [(2 ** 32) - 1],
101
- ]
102
- target = build(Arrow::UInt32Array.new(records.collect(&:first)))
103
- assert_equal(records, target.each_raw_record.to_a)
104
- end
105
-
106
- def test_int64
107
- records = [
108
- [-(2 ** 63)],
109
- [nil],
110
- [(2 ** 63) - 1],
111
- ]
112
- target = build(Arrow::Int64Array.new(records.collect(&:first)))
113
- assert_equal(records, target.each_raw_record.to_a)
114
- end
115
-
116
- def test_uint64
117
- records = [
118
- [0],
119
- [nil],
120
- [(2 ** 64) - 1],
121
- ]
122
- target = build(Arrow::UInt64Array.new(records.collect(&:first)))
123
- assert_equal(records, target.each_raw_record.to_a)
124
- end
125
-
126
- def test_float
127
- records = [
128
- [-1.0],
129
- [nil],
130
- [1.0],
131
- ]
132
- target = build(Arrow::FloatArray.new(records.collect(&:first)))
133
- assert_equal(records, target.each_raw_record.to_a)
134
- end
135
-
136
- def test_double
137
- records = [
138
- [-1.0],
139
- [nil],
140
- [1.0],
141
- ]
142
- target = build(Arrow::DoubleArray.new(records.collect(&:first)))
143
- assert_equal(records, target.each_raw_record.to_a)
144
- end
145
-
146
- def test_binary
147
- records = [
148
- ["\x00".b],
149
- [nil],
150
- ["\xff".b],
151
- ]
152
- target = build(Arrow::BinaryArray.new(records.collect(&:first)))
153
- assert_equal(records, target.each_raw_record.to_a)
154
- end
155
-
156
- def test_string
157
- records = [
158
- ["Ruby"],
159
- [nil],
160
- ["\u3042"], # U+3042 HIRAGANA LETTER A
161
- ]
162
- target = build(Arrow::StringArray.new(records.collect(&:first)))
163
- assert_equal(records, target.each_raw_record.to_a)
164
- end
165
-
166
- def test_date32
167
- records = [
168
- [Date.new(1960, 1, 1)],
169
- [nil],
170
- [Date.new(2017, 8, 23)],
171
- ]
172
- target = build(Arrow::Date32Array.new(records.collect(&:first)))
173
- assert_equal(records, target.each_raw_record.to_a)
174
- end
175
-
176
- def test_date64
177
- records = [
178
- [DateTime.new(1960, 1, 1, 2, 9, 30)],
179
- [nil],
180
- [DateTime.new(2017, 8, 23, 14, 57, 2)],
181
- ]
182
- target = build(Arrow::Date64Array.new(records.collect(&:first)))
183
- assert_equal(records, target.each_raw_record.to_a)
184
- end
185
-
186
- def test_timestamp_second
187
- records = [
188
- [Time.parse("1960-01-01T02:09:30Z")],
189
- [nil],
190
- [Time.parse("2017-08-23T14:57:02Z")],
191
- ]
192
- target = build(Arrow::TimestampArray.new(:second, records.collect(&:first)))
193
- assert_equal(records, target.each_raw_record.to_a)
194
- end
195
-
196
- def test_timestamp_milli
197
- records = [
198
- [Time.parse("1960-01-01T02:09:30.123Z")],
199
- [nil],
200
- [Time.parse("2017-08-23T14:57:02.987Z")],
201
- ]
202
- target = build(Arrow::TimestampArray.new(:milli, records.collect(&:first)))
203
- assert_equal(records, target.each_raw_record.to_a)
204
- end
205
-
206
- def test_timestamp_micro
207
- records = [
208
- [Time.parse("1960-01-01T02:09:30.123456Z")],
209
- [nil],
210
- [Time.parse("2017-08-23T14:57:02.987654Z")],
211
- ]
212
- target = build(Arrow::TimestampArray.new(:micro, records.collect(&:first)))
213
- assert_equal(records, target.each_raw_record.to_a)
214
- end
215
-
216
- def test_timestamp_nano
217
- records = [
218
- [Time.parse("1960-01-01T02:09:30.123456789Z")],
219
- [nil],
220
- [Time.parse("2017-08-23T14:57:02.987654321Z")],
221
- ]
222
- target = build(Arrow::TimestampArray.new(:nano, records.collect(&:first)))
223
- assert_equal(records, target.each_raw_record.to_a)
224
- end
225
-
226
- def test_time32_second
227
- unit = Arrow::TimeUnit::SECOND
228
- records = [
229
- [Arrow::Time.new(unit, 60 * 10)], # 00:10:00
230
- [nil],
231
- [Arrow::Time.new(unit, 60 * 60 * 2 + 9)], # 02:00:09
232
- ]
233
- target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
234
- assert_equal(records, target.each_raw_record.to_a)
235
- end
236
-
237
- def test_time32_milli
238
- unit = Arrow::TimeUnit::MILLI
239
- records = [
240
- [Arrow::Time.new(unit, (60 * 10) * 1000 + 123)], # 00:10:00.123
241
- [nil],
242
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1000 + 987)], # 02:00:09.987
243
- ]
244
- target = build(Arrow::Time32Array.new(unit, records.collect(&:first)))
245
- assert_equal(records, target.each_raw_record.to_a)
246
- end
247
-
248
- def test_time64_micro
249
- unit = Arrow::TimeUnit::MICRO
250
- records = [
251
- # 00:10:00.123456
252
- [Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)],
253
- [nil],
254
- # 02:00:09.987654
255
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000 + 987_654)],
256
- ]
257
- target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
258
- assert_equal(records, target.each_raw_record.to_a)
259
- end
260
-
261
- def test_time64_nano
262
- unit = Arrow::TimeUnit::NANO
263
- records = [
264
- # 00:10:00.123456789
265
- [Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)],
266
- [nil],
267
- # 02:00:09.987654321
268
- [Arrow::Time.new(unit, (60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321)],
269
- ]
270
- target = build(Arrow::Time64Array.new(unit, records.collect(&:first)))
271
- assert_equal(records, target.each_raw_record.to_a)
272
- end
273
-
274
- def test_decimal128
275
- records = [
276
- [BigDecimal("92.92")],
277
- [nil],
278
- [BigDecimal("29.29")],
279
- ]
280
- data_type = Arrow::Decimal128DataType.new(8, 2)
281
- target = build(Arrow::Decimal128Array.new(data_type, records.collect(&:first)))
282
- assert_equal(records, target.each_raw_record.to_a)
283
- end
284
-
285
- def test_decimal256
286
- records = [
287
- [BigDecimal("92.92")],
288
- [nil],
289
- [BigDecimal("29.29")],
290
- ]
291
- data_type = Arrow::Decimal256DataType.new(38, 2)
292
- target = build(Arrow::Decimal256Array.new(data_type, records.collect(&:first)))
293
- assert_equal(records, target.each_raw_record.to_a)
294
- end
295
-
296
- def test_month_interval
297
- records = [
298
- [1],
299
- [nil],
300
- [12],
301
- ]
302
- target = build(Arrow::MonthIntervalArray.new(records.collect(&:first)))
303
- assert_equal(records, target.each_raw_record.to_a)
304
- end
305
-
306
- def test_day_time_interval
307
- records = [
308
- [{day: 1, millisecond: 100}],
309
- [nil],
310
- [{day: 2, millisecond: 300}],
311
- ]
312
- target = build(Arrow::DayTimeIntervalArray.new(records.collect(&:first)))
313
- assert_equal(records, target.each_raw_record.to_a)
314
- end
315
-
316
- def test_month_day_nano_interval
317
- records = [
318
- [{month: 1, day: 1, nanosecond: 100}],
319
- [nil],
320
- [{month: 2, day: 3, nanosecond: 400}],
321
- ]
322
- target = build(Arrow::MonthDayNanoIntervalArray.new(records.collect(&:first)))
323
- assert_equal(records, target.each_raw_record.to_a)
324
- end
325
- end
326
-
327
- class EachRawRecordRecordBatchDictionaryArraysTest < Test::Unit::TestCase
328
- include EachRawRecordDictionaryArrayTests
329
-
330
- def build(array)
331
- build_record_batch(array)
332
- end
333
- end
334
-
335
- class EachRawRecordTableDictionaryArraysTest < Test::Unit::TestCase
336
- include EachRawRecordDictionaryArrayTests
337
-
338
- def build(array)
339
- build_record_batch(array).to_table
340
- end
341
- end