red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
@@ -1,434 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class MemoryViewTest < Test::Unit::TestCase
19
- def setup
20
- unless Fiddle.const_defined?(:MemoryView)
21
- omit("Fiddle::MemoryView is needed")
22
- end
23
- unless Fiddle::MemoryView.respond_to?(:export)
24
- omit("Fiddle::MemoryView.export is needed")
25
- end
26
- end
27
-
28
- def little_endian?
29
- [1].pack("s") == [1].pack("s<")
30
- end
31
-
32
- test("BooleanArray") do
33
- array = Arrow::BooleanArray.new([true] * 9)
34
- Fiddle::MemoryView.export(array) do |memory_view|
35
- if little_endian?
36
- template = "b"
37
- else
38
- template = "B"
39
- end
40
- assert_equal([
41
- "#{template}8",
42
- 1,
43
- 2,
44
- [(("1" * 9) + ("0" * 7))].pack("#{template}*"),
45
- ],
46
- [
47
- memory_view.format,
48
- memory_view.item_size,
49
- memory_view.byte_size,
50
- memory_view.to_s,
51
- ])
52
- end
53
- end
54
-
55
- test("Int8Array") do
56
- values = [-(2 ** 7), 0, (2 ** 7) - 1]
57
- array = Arrow::Int8Array.new(values)
58
- Fiddle::MemoryView.export(array) do |memory_view|
59
- assert_equal([
60
- "c",
61
- 1,
62
- values.size,
63
- values.pack("c*"),
64
- ],
65
- [
66
- memory_view.format,
67
- memory_view.item_size,
68
- memory_view.byte_size,
69
- memory_view.to_s,
70
- ])
71
- end
72
- end
73
-
74
- test("Int16Array") do
75
- values = [-(2 ** 15), 0, (2 ** 15) - 1]
76
- array = Arrow::Int16Array.new(values)
77
- Fiddle::MemoryView.export(array) do |memory_view|
78
- assert_equal([
79
- "s",
80
- 2,
81
- 2 * values.size,
82
- values.pack("s*"),
83
- ],
84
- [
85
- memory_view.format,
86
- memory_view.item_size,
87
- memory_view.byte_size,
88
- memory_view.to_s,
89
- ])
90
- end
91
- end
92
-
93
- test("Int32Array") do
94
- values = [-(2 ** 31), 0, (2 ** 31) - 1]
95
- array = Arrow::Int32Array.new(values)
96
- Fiddle::MemoryView.export(array) do |memory_view|
97
- assert_equal([
98
- "l",
99
- 4,
100
- 4 * values.size,
101
- values.pack("l*"),
102
- ],
103
- [
104
- memory_view.format,
105
- memory_view.item_size,
106
- memory_view.byte_size,
107
- memory_view.to_s,
108
- ])
109
- end
110
- end
111
-
112
- test("Int64Array") do
113
- values = [-(2 ** 63), 0, (2 ** 63) - 1]
114
- array = Arrow::Int64Array.new(values)
115
- Fiddle::MemoryView.export(array) do |memory_view|
116
- assert_equal([
117
- "q",
118
- 8,
119
- 8 * values.size,
120
- values.pack("q*"),
121
- ],
122
- [
123
- memory_view.format,
124
- memory_view.item_size,
125
- memory_view.byte_size,
126
- memory_view.to_s,
127
- ])
128
- end
129
- end
130
-
131
- test("UInt8Array") do
132
- values = [0, (2 ** 8) - 1]
133
- array = Arrow::UInt8Array.new(values)
134
- Fiddle::MemoryView.export(array) do |memory_view|
135
- assert_equal([
136
- "C",
137
- 1,
138
- values.size,
139
- values.pack("C*"),
140
- ],
141
- [
142
- memory_view.format,
143
- memory_view.item_size,
144
- memory_view.byte_size,
145
- memory_view.to_s,
146
- ])
147
- end
148
- end
149
-
150
- test("UInt16Array") do
151
- values = [0, (2 ** 16) - 1]
152
- array = Arrow::UInt16Array.new(values)
153
- Fiddle::MemoryView.export(array) do |memory_view|
154
- assert_equal([
155
- "S",
156
- 2,
157
- 2 * values.size,
158
- values.pack("S*"),
159
- ],
160
- [
161
- memory_view.format,
162
- memory_view.item_size,
163
- memory_view.byte_size,
164
- memory_view.to_s,
165
- ])
166
- end
167
- end
168
-
169
- test("UInt32Array") do
170
- values = [0, (2 ** 32) - 1]
171
- array = Arrow::UInt32Array.new(values)
172
- Fiddle::MemoryView.export(array) do |memory_view|
173
- assert_equal([
174
- "L",
175
- 4,
176
- 4 * values.size,
177
- values.pack("L*"),
178
- ],
179
- [
180
- memory_view.format,
181
- memory_view.item_size,
182
- memory_view.byte_size,
183
- memory_view.to_s,
184
- ])
185
- end
186
- end
187
-
188
- test("UInt64Array") do
189
- values = [(2 ** 64) - 1]
190
- array = Arrow::UInt64Array.new(values)
191
- Fiddle::MemoryView.export(array) do |memory_view|
192
- assert_equal([
193
- "Q",
194
- 8,
195
- 8 * values.size,
196
- values.pack("Q*"),
197
- ],
198
- [
199
- memory_view.format,
200
- memory_view.item_size,
201
- memory_view.byte_size,
202
- memory_view.to_s,
203
- ])
204
- end
205
- end
206
-
207
- test("FloatArray") do
208
- values = [-1.1, 0.0, 1.1]
209
- array = Arrow::FloatArray.new(values)
210
- Fiddle::MemoryView.export(array) do |memory_view|
211
- assert_equal([
212
- "f",
213
- 4,
214
- 4 * values.size,
215
- values.pack("f*"),
216
- ],
217
- [
218
- memory_view.format,
219
- memory_view.item_size,
220
- memory_view.byte_size,
221
- memory_view.to_s,
222
- ])
223
- end
224
- end
225
-
226
- test("DoubleArray") do
227
- values = [-1.1, 0.0, 1.1]
228
- array = Arrow::DoubleArray.new(values)
229
- Fiddle::MemoryView.export(array) do |memory_view|
230
- assert_equal([
231
- "d",
232
- 8,
233
- 8 * values.size,
234
- values.pack("d*"),
235
- ],
236
- [
237
- memory_view.format,
238
- memory_view.item_size,
239
- memory_view.byte_size,
240
- memory_view.to_s,
241
- ])
242
- end
243
- end
244
-
245
- test("FixedSizeBinaryArray") do
246
- values = ["\x01\x02", "\x03\x04", "\x05\x06"]
247
- data_type = Arrow::FixedSizeBinaryDataType.new(2)
248
- array = Arrow::FixedSizeBinaryArray.new(data_type, values)
249
- Fiddle::MemoryView.export(array) do |memory_view|
250
- assert_equal([
251
- "C2",
252
- 2,
253
- 2 * values.size,
254
- values.join("").b,
255
- ],
256
- [
257
- memory_view.format,
258
- memory_view.item_size,
259
- memory_view.byte_size,
260
- memory_view.to_s,
261
- ])
262
- end
263
- end
264
-
265
- test("Date32Array") do
266
- n_days_since_epoch = 17406 # 2017-08-28
267
- values = [n_days_since_epoch]
268
- array = Arrow::Date32Array.new(values)
269
- Fiddle::MemoryView.export(array) do |memory_view|
270
- assert_equal([
271
- "l",
272
- 4,
273
- 4 * values.size,
274
- values.pack("l*"),
275
- ],
276
- [
277
- memory_view.format,
278
- memory_view.item_size,
279
- memory_view.byte_size,
280
- memory_view.to_s,
281
- ])
282
- end
283
- end
284
-
285
- test("Date64Array") do
286
- n_msecs_since_epoch = 1503878400000 # 2017-08-28T00:00:00Z
287
- values = [n_msecs_since_epoch]
288
- array = Arrow::Date64Array.new(values)
289
- Fiddle::MemoryView.export(array) do |memory_view|
290
- assert_equal([
291
- "q",
292
- 8,
293
- 8 * values.size,
294
- values.pack("q*"),
295
- ],
296
- [
297
- memory_view.format,
298
- memory_view.item_size,
299
- memory_view.byte_size,
300
- memory_view.to_s,
301
- ])
302
- end
303
- end
304
-
305
- test("Time32Array") do
306
- values = [1, 2, 3]
307
- array = Arrow::Time32Array.new(:milli, values)
308
- Fiddle::MemoryView.export(array) do |memory_view|
309
- assert_equal([
310
- "l",
311
- 4,
312
- 4 * values.size,
313
- values.pack("l*"),
314
- ],
315
- [
316
- memory_view.format,
317
- memory_view.item_size,
318
- memory_view.byte_size,
319
- memory_view.to_s,
320
- ])
321
- end
322
- end
323
-
324
- test("Time64Array") do
325
- values = [1, 2, 3]
326
- array = Arrow::Time64Array.new(:nano, values)
327
- Fiddle::MemoryView.export(array) do |memory_view|
328
- assert_equal([
329
- "q",
330
- 8,
331
- 8 * values.size,
332
- values.pack("q*"),
333
- ],
334
- [
335
- memory_view.format,
336
- memory_view.item_size,
337
- memory_view.byte_size,
338
- memory_view.to_s,
339
- ])
340
- end
341
- end
342
-
343
- test("TimestampArray") do
344
- values = [1, 2, 3]
345
- array = Arrow::TimestampArray.new(:micro, values)
346
- Fiddle::MemoryView.export(array) do |memory_view|
347
- assert_equal([
348
- "q",
349
- 8,
350
- 8 * values.size,
351
- values.pack("q*"),
352
- ],
353
- [
354
- memory_view.format,
355
- memory_view.item_size,
356
- memory_view.byte_size,
357
- memory_view.to_s,
358
- ])
359
- end
360
- end
361
-
362
- test("Decimal128Array") do
363
- values = [
364
- Arrow::Decimal128.new("10.1"),
365
- Arrow::Decimal128.new("11.1"),
366
- Arrow::Decimal128.new("10.2"),
367
- ]
368
- data_type = Arrow::Decimal128DataType.new(3, 1)
369
- array = Arrow::Decimal128Array.new(data_type, values)
370
- Fiddle::MemoryView.export(array) do |memory_view|
371
- assert_equal([
372
- "q2",
373
- 16,
374
- 16 * values.size,
375
- values.collect {|value| value.to_bytes.to_s}.join(""),
376
- ],
377
- [
378
- memory_view.format,
379
- memory_view.item_size,
380
- memory_view.byte_size,
381
- memory_view.to_s,
382
- ])
383
- end
384
- end
385
-
386
- test("Decimal256Array") do
387
- values = [
388
- Arrow::Decimal256.new("10.1"),
389
- Arrow::Decimal256.new("11.1"),
390
- Arrow::Decimal256.new("10.2"),
391
- ]
392
- data_type = Arrow::Decimal256DataType.new(3, 1)
393
- array = Arrow::Decimal256Array.new(data_type, values)
394
- Fiddle::MemoryView.export(array) do |memory_view|
395
- assert_equal([
396
- "q4",
397
- 32,
398
- 32 * values.size,
399
- values.collect {|value| value.to_bytes.to_s}.join(""),
400
- ],
401
- [
402
- memory_view.format,
403
- memory_view.item_size,
404
- memory_view.byte_size,
405
- memory_view.to_s,
406
- ])
407
- end
408
- end
409
-
410
- test("Buffer") do
411
- values = [0, nil, nil] * 3
412
- array = Arrow::Int8Array.new(values)
413
- buffer = array.null_bitmap
414
- Fiddle::MemoryView.export(buffer) do |memory_view|
415
- if little_endian?
416
- template = "b"
417
- else
418
- template = "B"
419
- end
420
- assert_equal([
421
- "#{template}8",
422
- 1,
423
- 2,
424
- ["100" * 3].pack("#{template}*"),
425
- ],
426
- [
427
- memory_view.format,
428
- memory_view.item_size,
429
- memory_view.byte_size,
430
- memory_view.to_s,
431
- ])
432
- end
433
- end
434
- end
data/test/test-orc.rb DELETED
@@ -1,173 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class ORCTest < Test::Unit::TestCase
19
- include Helper::Fixture
20
-
21
- def setup
22
- omit("Require Apache Arrow ORC") unless Arrow.const_defined?(:ORCFileReader)
23
- @orc_path = fixture_path("TestOrcFile.test1.orc")
24
- end
25
-
26
- def pp_values(values)
27
- "[\n " + values.collect(&:inspect).join(",\n ") + "\n]"
28
- end
29
-
30
- sub_test_case("load") do
31
- test("default") do
32
- table = Arrow::Table.load(@orc_path)
33
- dump = table.columns.collect do |column|
34
- [
35
- column.field.to_s,
36
- column.data.chunks.collect(&:to_s),
37
- ]
38
- end
39
- assert_equal([
40
- ["boolean1: bool", [pp_values([false, true])]],
41
- ["byte1: int8", [pp_values([1, 100])]],
42
- ["short1: int16", [pp_values([1024, 2048])]],
43
- ["int1: int32", [pp_values([65536, 65536])]],
44
- [
45
- "long1: int64",
46
- [pp_values([9223372036854775807, 9223372036854775807])],
47
- ],
48
- ["float1: float", [pp_values([1, 2])]],
49
- ["double1: double", [pp_values([-15, -5])]],
50
- ["bytes1: binary", ["[\n 0001020304,\n \n]"]],
51
- ["string1: string", [pp_values(["hi", "bye"])]],
52
- [
53
- "middle: " +
54
- "struct<list: " +
55
- "list<item: struct<int1: int32, string1: string>>>",
56
- [
57
- <<-STRUCT.chomp
58
- -- is_valid: all not null
59
- -- child 0 type: list<item: struct<int1: int32, string1: string>>
60
- [
61
- -- is_valid: all not null
62
- -- child 0 type: int32
63
- [
64
- 1,
65
- 2
66
- ]
67
- -- child 1 type: string
68
- [
69
- "bye",
70
- "sigh"
71
- ],
72
- -- is_valid: all not null
73
- -- child 0 type: int32
74
- [
75
- 1,
76
- 2
77
- ]
78
- -- child 1 type: string
79
- [
80
- "bye",
81
- "sigh"
82
- ]
83
- ]
84
- STRUCT
85
- ]
86
- ],
87
- [
88
- "list: list<item: struct<int1: int32, string1: string>>",
89
- [
90
- <<-LIST.chomp
91
- [
92
- -- is_valid: all not null
93
- -- child 0 type: int32
94
- [
95
- 3,
96
- 4
97
- ]
98
- -- child 1 type: string
99
- [
100
- "good",
101
- "bad"
102
- ],
103
- -- is_valid: all not null
104
- -- child 0 type: int32
105
- [
106
- 100000000,
107
- -100000,
108
- 1234
109
- ]
110
- -- child 1 type: string
111
- [
112
- "cat",
113
- "in",
114
- "hat"
115
- ]
116
- ]
117
- LIST
118
- ]
119
- ],
120
- [
121
- "map: map<string, struct<int1: int32, string1: string>>",
122
- [
123
- <<-MAP.chomp
124
- [
125
- keys:
126
- []
127
- values:
128
- -- is_valid: all not null
129
- -- child 0 type: int32
130
- []
131
- -- child 1 type: string
132
- [],
133
- keys:
134
- [
135
- "chani",
136
- "mauddib"
137
- ]
138
- values:
139
- -- is_valid: all not null
140
- -- child 0 type: int32
141
- [
142
- 5,
143
- 1
144
- ]
145
- -- child 1 type: string
146
- [
147
- "chani",
148
- "mauddib"
149
- ]
150
- ]
151
- MAP
152
- ],
153
- ],
154
- ],
155
- dump)
156
- end
157
-
158
- test(":field_indexes") do
159
- table = Arrow::Table.load(@orc_path, field_indexes: [1, 3])
160
- dump = table.columns.collect do |column|
161
- [
162
- column.field.to_s,
163
- column.data.chunks.collect(&:to_s),
164
- ]
165
- end
166
- assert_equal([
167
- ["boolean1: bool", [pp_values([false, true])]],
168
- ["short1: int16", [pp_values([1024, 2048])]],
169
- ],
170
- dump)
171
- end
172
- end
173
- end