red-arrow 0.14.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
@@ -50,16 +50,16 @@ class StructArrayTest < Test::Unit::TestCase
50
50
 
51
51
  test("#[]") do
52
52
  assert_equal([
53
- Arrow::Struct.new(@array, 0),
54
- Arrow::Struct.new(@array, 1),
53
+ {"visible" => true, "count" => 1},
54
+ {"visible" => false, "count" => 2},
55
55
  ],
56
56
  @array.to_a)
57
57
  end
58
58
 
59
59
  test("#get_value") do
60
60
  assert_equal([
61
- Arrow::Struct.new(@array, 0),
62
- Arrow::Struct.new(@array, 1),
61
+ {"visible" => true, "count" => 1},
62
+ {"visible" => false, "count" => 2},
63
63
  ],
64
64
  [
65
65
  @array.get_value(0),
@@ -37,14 +37,15 @@ class TableTest < Test::Unit::TestCase
37
37
  ]
38
38
  @count_array = Arrow::ChunkedArray.new(count_arrays)
39
39
  @visible_array = Arrow::ChunkedArray.new(visible_arrays)
40
- @count_column = Arrow::Column.new(@count_field, @count_array)
41
- @visible_column = Arrow::Column.new(@visible_field, @visible_array)
42
- @table = Arrow::Table.new(schema, [@count_column, @visible_column])
40
+ @table = Arrow::Table.new(schema, [@count_array, @visible_array])
43
41
  end
44
42
 
45
43
  test("#columns") do
46
- assert_equal(["count", "visible"],
47
- @table.columns.collect(&:name))
44
+ assert_equal([
45
+ Arrow::Column.new(@table, 0),
46
+ Arrow::Column.new(@table, 1),
47
+ ],
48
+ @table.columns)
48
49
  end
49
50
 
50
51
  sub_test_case("#slice") do
@@ -188,11 +189,18 @@ class TableTest < Test::Unit::TestCase
188
189
 
189
190
  sub_test_case("#[]") do
190
191
  test("[String]") do
191
- assert_equal(@count_column, @table["count"])
192
+ assert_equal(Arrow::Column.new(@table, 0),
193
+ @table["count"])
192
194
  end
193
195
 
194
196
  test("[Symbol]") do
195
- assert_equal(@visible_column, @table[:visible])
197
+ assert_equal(Arrow::Column.new(@table, 1),
198
+ @table[:visible])
199
+ end
200
+
201
+ test("[Integer]") do
202
+ assert_equal(Arrow::Column.new(@table, 1),
203
+ @table[-1])
196
204
  end
197
205
  end
198
206
 
@@ -279,7 +287,8 @@ class TableTest < Test::Unit::TestCase
279
287
  end
280
288
 
281
289
  test("column name getter") do
282
- assert_equal(@visible_column, @table.visible)
290
+ assert_equal(Arrow::Column.new(@table, 1),
291
+ @table.visible)
283
292
  end
284
293
 
285
294
  sub_test_case("#remove_column") do
@@ -0,0 +1,288 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TimeTest < Test::Unit::TestCase
19
+ sub_test_case("#==") do
20
+ test("same unit") do
21
+ assert do
22
+ Arrow::Time.new(:second, 10) == Arrow::Time.new(:second, 10)
23
+ end
24
+ end
25
+
26
+ test("different unit") do
27
+ assert do
28
+ Arrow::Time.new(:second, 10) == Arrow::Time.new(:milli, 10 * 1000)
29
+ end
30
+ end
31
+
32
+ test("false") do
33
+ assert do
34
+ not(Arrow::Time.new(:second, 10) == Arrow::Time.new(:second, 11))
35
+ end
36
+ end
37
+ end
38
+
39
+ sub_test_case("#cast") do
40
+ test("same unit") do
41
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
42
+ casted_time = time.cast(Arrow::TimeUnit::SECOND)
43
+ assert_equal([time.unit, time.value],
44
+ [casted_time.unit, casted_time.value])
45
+ end
46
+
47
+ test("second -> milli") do
48
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
49
+ casted_time = time.cast(Arrow::TimeUnit::MILLI)
50
+ assert_equal([
51
+ Arrow::TimeUnit::MILLI,
52
+ time.value * 1000,
53
+ ],
54
+ [
55
+ casted_time.unit,
56
+ casted_time.value,
57
+ ])
58
+ end
59
+
60
+ test("second -> micro") do
61
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
62
+ casted_time = time.cast(Arrow::TimeUnit::MICRO)
63
+ assert_equal([
64
+ Arrow::TimeUnit::MICRO,
65
+ time.value * 1000 * 1000,
66
+ ],
67
+ [
68
+ casted_time.unit,
69
+ casted_time.value,
70
+ ])
71
+ end
72
+
73
+ test("second -> nano") do
74
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
75
+ casted_time = time.cast(Arrow::TimeUnit::NANO)
76
+ assert_equal([
77
+ Arrow::TimeUnit::NANO,
78
+ time.value * 1000 * 1000 * 1000,
79
+ ],
80
+ [
81
+ casted_time.unit,
82
+ casted_time.value,
83
+ ])
84
+ end
85
+
86
+ test("milli -> second") do
87
+ time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200)
88
+ casted_time = time.cast(Arrow::TimeUnit::SECOND)
89
+ assert_equal([
90
+ Arrow::TimeUnit::SECOND,
91
+ 10,
92
+ ],
93
+ [
94
+ casted_time.unit,
95
+ casted_time.value,
96
+ ])
97
+ end
98
+
99
+ test("milli -> micro") do
100
+ time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200)
101
+ casted_time = time.cast(Arrow::TimeUnit::MICRO)
102
+ assert_equal([
103
+ Arrow::TimeUnit::MICRO,
104
+ time.value * 1000,
105
+ ],
106
+ [
107
+ casted_time.unit,
108
+ casted_time.value,
109
+ ])
110
+ end
111
+
112
+ test("milli -> nano") do
113
+ time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200)
114
+ casted_time = time.cast(Arrow::TimeUnit::NANO)
115
+ assert_equal([
116
+ Arrow::TimeUnit::NANO,
117
+ time.value * 1000 * 1000,
118
+ ],
119
+ [
120
+ casted_time.unit,
121
+ casted_time.value,
122
+ ])
123
+ end
124
+
125
+ test("micro -> second") do
126
+ time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300)
127
+ casted_time = time.cast(Arrow::TimeUnit::SECOND)
128
+ assert_equal([
129
+ Arrow::TimeUnit::SECOND,
130
+ 10,
131
+ ],
132
+ [
133
+ casted_time.unit,
134
+ casted_time.value,
135
+ ])
136
+ end
137
+
138
+ test("micro -> milli") do
139
+ time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300)
140
+ casted_time = time.cast(Arrow::TimeUnit::MILLI)
141
+ assert_equal([
142
+ Arrow::TimeUnit::MILLI,
143
+ 10_200,
144
+ ],
145
+ [
146
+ casted_time.unit,
147
+ casted_time.value,
148
+ ])
149
+ end
150
+
151
+ test("micro -> nano") do
152
+ time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300)
153
+ casted_time = time.cast(Arrow::TimeUnit::NANO)
154
+ assert_equal([
155
+ Arrow::TimeUnit::NANO,
156
+ time.value * 1000,
157
+ ],
158
+ [
159
+ casted_time.unit,
160
+ casted_time.value,
161
+ ])
162
+ end
163
+
164
+ test("nano -> second") do
165
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400)
166
+ casted_time = time.cast(Arrow::TimeUnit::SECOND)
167
+ assert_equal([
168
+ Arrow::TimeUnit::SECOND,
169
+ 10,
170
+ ],
171
+ [
172
+ casted_time.unit,
173
+ casted_time.value,
174
+ ])
175
+ end
176
+
177
+ test("nano -> milli") do
178
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400)
179
+ casted_time = time.cast(Arrow::TimeUnit::MILLI)
180
+ assert_equal([
181
+ Arrow::TimeUnit::MILLI,
182
+ 10_200,
183
+ ],
184
+ [
185
+ casted_time.unit,
186
+ casted_time.value,
187
+ ])
188
+ end
189
+
190
+ test("nano -> micro") do
191
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400)
192
+ casted_time = time.cast(Arrow::TimeUnit::MICRO)
193
+ assert_equal([
194
+ Arrow::TimeUnit::MICRO,
195
+ 10_200_300,
196
+ ],
197
+ [
198
+ casted_time.unit,
199
+ casted_time.value,
200
+ ])
201
+ end
202
+ end
203
+
204
+ sub_test_case("#to_f") do
205
+ test("second") do
206
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
207
+ assert_in_delta(10.0, time.to_f)
208
+ end
209
+
210
+ test("milli") do
211
+ time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200)
212
+ assert_in_delta(10.2, time.to_f)
213
+ end
214
+
215
+ test("micro") do
216
+ time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300)
217
+ assert_in_delta(10.2003, time.to_f)
218
+ end
219
+
220
+ test("nano") do
221
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400)
222
+ assert_in_delta(10.2003004, time.to_f)
223
+ end
224
+ end
225
+
226
+ sub_test_case("#positive?") do
227
+ test("true") do
228
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
229
+ assert do
230
+ time.positive?
231
+ end
232
+ end
233
+
234
+ test("false") do
235
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, -10)
236
+ assert do
237
+ not time.positive?
238
+ end
239
+ end
240
+ end
241
+
242
+ sub_test_case("#negative?") do
243
+ test("true") do
244
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, -10)
245
+ assert do
246
+ time.negative?
247
+ end
248
+ end
249
+
250
+ test("false") do
251
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
252
+ assert do
253
+ not time.negative?
254
+ end
255
+ end
256
+ end
257
+
258
+ test("#hour") do
259
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND,
260
+ (5 * 60 * 60) + (12 * 60) + 10)
261
+ assert_equal(5, time.hour)
262
+ end
263
+
264
+ test("#minute") do
265
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND,
266
+ (5 * 60 * 60) + (12 * 60) + 10)
267
+ assert_equal(12, time.minute)
268
+ end
269
+
270
+ test("#second") do
271
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND,
272
+ (5 * 60 * 60) + (12 * 60) + 10)
273
+ assert_equal(10, time.second)
274
+ end
275
+
276
+ test("#nano_second") do
277
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 1234)
278
+ assert_equal(1234, time.nano_second)
279
+ end
280
+
281
+ test("#to_s") do
282
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO,
283
+ -(((5 * 60 * 60) + (12 * 60) + 10) * 1_000_000_000 +
284
+ 1234))
285
+ assert_equal("-05:12:10.000001234",
286
+ time.to_s)
287
+ end
288
+ end
@@ -0,0 +1,81 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class Time32ArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ sub_test_case("unit") do
21
+ test("Arrow::TimeUnit") do
22
+ values = [1000 * 10, nil]
23
+ array = Arrow::Time32Array.new(Arrow::TimeUnit::MILLI, values)
24
+ assert_equal([
25
+ "time32[ms]",
26
+ [
27
+ Arrow::Time.new(Arrow::TimeUnit::MILLI,
28
+ 1000 * 10),
29
+ nil,
30
+ ],
31
+ ],
32
+ [
33
+ array.value_data_type.to_s,
34
+ array.to_a,
35
+ ])
36
+ end
37
+
38
+ test("Symbol") do
39
+ values = [60 * 10, nil]
40
+ array = Arrow::Time32Array.new(:second, values)
41
+ assert_equal([
42
+ "time32[s]",
43
+ [
44
+ Arrow::Time.new(Arrow::TimeUnit::SECOND,
45
+ 60 * 10),
46
+ nil,
47
+ ],
48
+ ],
49
+ [
50
+ array.value_data_type.to_s,
51
+ array.to_a,
52
+ ])
53
+ end
54
+ end
55
+
56
+ sub_test_case("values") do
57
+ test("Arrow::Time") do
58
+ data_type = Arrow::Time32DataType.new(:second)
59
+ values = [
60
+ Arrow::Time.new(Arrow::TimeUnit::SECOND,
61
+ 60 * 10),
62
+ nil,
63
+ ]
64
+ array = Arrow::Time32Array.new(data_type, values)
65
+ assert_equal(values, array.to_a)
66
+ end
67
+
68
+ test("Integer") do
69
+ data_type = Arrow::Time32DataType.new(:second)
70
+ values = [60 * 10, nil]
71
+ array = Arrow::Time32Array.new(data_type, values)
72
+ assert_equal([
73
+ Arrow::Time.new(Arrow::TimeUnit::SECOND,
74
+ 60 * 10),
75
+ nil,
76
+ ],
77
+ array.to_a)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,81 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class Time64ArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ sub_test_case("unit") do
21
+ test("Arrow::TimeUnit") do
22
+ values = [1000 * 10, nil]
23
+ array = Arrow::Time64Array.new(Arrow::TimeUnit::NANO, values)
24
+ assert_equal([
25
+ "time64[ns]",
26
+ [
27
+ Arrow::Time.new(Arrow::TimeUnit::NANO,
28
+ 1000 * 10),
29
+ nil,
30
+ ],
31
+ ],
32
+ [
33
+ array.value_data_type.to_s,
34
+ array.to_a,
35
+ ])
36
+ end
37
+
38
+ test("Symbol") do
39
+ values = [1000 * 10, nil]
40
+ array = Arrow::Time64Array.new(:micro, values)
41
+ assert_equal([
42
+ "time64[us]",
43
+ [
44
+ Arrow::Time.new(Arrow::TimeUnit::MICRO,
45
+ 1000 * 10),
46
+ nil,
47
+ ],
48
+ ],
49
+ [
50
+ array.value_data_type.to_s,
51
+ array.to_a,
52
+ ])
53
+ end
54
+ end
55
+
56
+ sub_test_case("values") do
57
+ test("Arrow::Time") do
58
+ data_type = Arrow::Time64DataType.new(:nano)
59
+ values = [
60
+ Arrow::Time.new(Arrow::TimeUnit::NANO,
61
+ 1000 * 10),
62
+ nil,
63
+ ]
64
+ array = Arrow::Time64Array.new(data_type, values)
65
+ assert_equal(values, array.to_a)
66
+ end
67
+
68
+ test("Integer") do
69
+ data_type = Arrow::Time64DataType.new(:nano)
70
+ values = [1000 * 10, nil]
71
+ array = Arrow::Time64Array.new(data_type, values)
72
+ assert_equal([
73
+ Arrow::Time.new(Arrow::TimeUnit::NANO,
74
+ 1000 * 10),
75
+ nil,
76
+ ],
77
+ array.to_a)
78
+ end
79
+ end
80
+ end
81
+ end