red-arrow 0.14.1 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
@@ -50,16 +50,16 @@ class StructArrayTest < Test::Unit::TestCase
50
50
 
51
51
  test("#[]") do
52
52
  assert_equal([
53
- Arrow::Struct.new(@array, 0),
54
- Arrow::Struct.new(@array, 1),
53
+ {"visible" => true, "count" => 1},
54
+ {"visible" => false, "count" => 2},
55
55
  ],
56
56
  @array.to_a)
57
57
  end
58
58
 
59
59
  test("#get_value") do
60
60
  assert_equal([
61
- Arrow::Struct.new(@array, 0),
62
- Arrow::Struct.new(@array, 1),
61
+ {"visible" => true, "count" => 1},
62
+ {"visible" => false, "count" => 2},
63
63
  ],
64
64
  [
65
65
  @array.get_value(0),
@@ -37,14 +37,15 @@ class TableTest < Test::Unit::TestCase
37
37
  ]
38
38
  @count_array = Arrow::ChunkedArray.new(count_arrays)
39
39
  @visible_array = Arrow::ChunkedArray.new(visible_arrays)
40
- @count_column = Arrow::Column.new(@count_field, @count_array)
41
- @visible_column = Arrow::Column.new(@visible_field, @visible_array)
42
- @table = Arrow::Table.new(schema, [@count_column, @visible_column])
40
+ @table = Arrow::Table.new(schema, [@count_array, @visible_array])
43
41
  end
44
42
 
45
43
  test("#columns") do
46
- assert_equal(["count", "visible"],
47
- @table.columns.collect(&:name))
44
+ assert_equal([
45
+ Arrow::Column.new(@table, 0),
46
+ Arrow::Column.new(@table, 1),
47
+ ],
48
+ @table.columns)
48
49
  end
49
50
 
50
51
  sub_test_case("#slice") do
@@ -188,11 +189,18 @@ class TableTest < Test::Unit::TestCase
188
189
 
189
190
  sub_test_case("#[]") do
190
191
  test("[String]") do
191
- assert_equal(@count_column, @table["count"])
192
+ assert_equal(Arrow::Column.new(@table, 0),
193
+ @table["count"])
192
194
  end
193
195
 
194
196
  test("[Symbol]") do
195
- assert_equal(@visible_column, @table[:visible])
197
+ assert_equal(Arrow::Column.new(@table, 1),
198
+ @table[:visible])
199
+ end
200
+
201
+ test("[Integer]") do
202
+ assert_equal(Arrow::Column.new(@table, 1),
203
+ @table[-1])
196
204
  end
197
205
  end
198
206
 
@@ -279,7 +287,8 @@ class TableTest < Test::Unit::TestCase
279
287
  end
280
288
 
281
289
  test("column name getter") do
282
- assert_equal(@visible_column, @table.visible)
290
+ assert_equal(Arrow::Column.new(@table, 1),
291
+ @table.visible)
283
292
  end
284
293
 
285
294
  sub_test_case("#remove_column") do
@@ -0,0 +1,288 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class TimeTest < Test::Unit::TestCase
19
+ sub_test_case("#==") do
20
+ test("same unit") do
21
+ assert do
22
+ Arrow::Time.new(:second, 10) == Arrow::Time.new(:second, 10)
23
+ end
24
+ end
25
+
26
+ test("different unit") do
27
+ assert do
28
+ Arrow::Time.new(:second, 10) == Arrow::Time.new(:milli, 10 * 1000)
29
+ end
30
+ end
31
+
32
+ test("false") do
33
+ assert do
34
+ not(Arrow::Time.new(:second, 10) == Arrow::Time.new(:second, 11))
35
+ end
36
+ end
37
+ end
38
+
39
+ sub_test_case("#cast") do
40
+ test("same unit") do
41
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
42
+ casted_time = time.cast(Arrow::TimeUnit::SECOND)
43
+ assert_equal([time.unit, time.value],
44
+ [casted_time.unit, casted_time.value])
45
+ end
46
+
47
+ test("second -> milli") do
48
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
49
+ casted_time = time.cast(Arrow::TimeUnit::MILLI)
50
+ assert_equal([
51
+ Arrow::TimeUnit::MILLI,
52
+ time.value * 1000,
53
+ ],
54
+ [
55
+ casted_time.unit,
56
+ casted_time.value,
57
+ ])
58
+ end
59
+
60
+ test("second -> micro") do
61
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
62
+ casted_time = time.cast(Arrow::TimeUnit::MICRO)
63
+ assert_equal([
64
+ Arrow::TimeUnit::MICRO,
65
+ time.value * 1000 * 1000,
66
+ ],
67
+ [
68
+ casted_time.unit,
69
+ casted_time.value,
70
+ ])
71
+ end
72
+
73
+ test("second -> nano") do
74
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
75
+ casted_time = time.cast(Arrow::TimeUnit::NANO)
76
+ assert_equal([
77
+ Arrow::TimeUnit::NANO,
78
+ time.value * 1000 * 1000 * 1000,
79
+ ],
80
+ [
81
+ casted_time.unit,
82
+ casted_time.value,
83
+ ])
84
+ end
85
+
86
+ test("milli -> second") do
87
+ time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200)
88
+ casted_time = time.cast(Arrow::TimeUnit::SECOND)
89
+ assert_equal([
90
+ Arrow::TimeUnit::SECOND,
91
+ 10,
92
+ ],
93
+ [
94
+ casted_time.unit,
95
+ casted_time.value,
96
+ ])
97
+ end
98
+
99
+ test("milli -> micro") do
100
+ time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200)
101
+ casted_time = time.cast(Arrow::TimeUnit::MICRO)
102
+ assert_equal([
103
+ Arrow::TimeUnit::MICRO,
104
+ time.value * 1000,
105
+ ],
106
+ [
107
+ casted_time.unit,
108
+ casted_time.value,
109
+ ])
110
+ end
111
+
112
+ test("milli -> nano") do
113
+ time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200)
114
+ casted_time = time.cast(Arrow::TimeUnit::NANO)
115
+ assert_equal([
116
+ Arrow::TimeUnit::NANO,
117
+ time.value * 1000 * 1000,
118
+ ],
119
+ [
120
+ casted_time.unit,
121
+ casted_time.value,
122
+ ])
123
+ end
124
+
125
+ test("micro -> second") do
126
+ time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300)
127
+ casted_time = time.cast(Arrow::TimeUnit::SECOND)
128
+ assert_equal([
129
+ Arrow::TimeUnit::SECOND,
130
+ 10,
131
+ ],
132
+ [
133
+ casted_time.unit,
134
+ casted_time.value,
135
+ ])
136
+ end
137
+
138
+ test("micro -> milli") do
139
+ time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300)
140
+ casted_time = time.cast(Arrow::TimeUnit::MILLI)
141
+ assert_equal([
142
+ Arrow::TimeUnit::MILLI,
143
+ 10_200,
144
+ ],
145
+ [
146
+ casted_time.unit,
147
+ casted_time.value,
148
+ ])
149
+ end
150
+
151
+ test("micro -> nano") do
152
+ time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300)
153
+ casted_time = time.cast(Arrow::TimeUnit::NANO)
154
+ assert_equal([
155
+ Arrow::TimeUnit::NANO,
156
+ time.value * 1000,
157
+ ],
158
+ [
159
+ casted_time.unit,
160
+ casted_time.value,
161
+ ])
162
+ end
163
+
164
+ test("nano -> second") do
165
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400)
166
+ casted_time = time.cast(Arrow::TimeUnit::SECOND)
167
+ assert_equal([
168
+ Arrow::TimeUnit::SECOND,
169
+ 10,
170
+ ],
171
+ [
172
+ casted_time.unit,
173
+ casted_time.value,
174
+ ])
175
+ end
176
+
177
+ test("nano -> milli") do
178
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400)
179
+ casted_time = time.cast(Arrow::TimeUnit::MILLI)
180
+ assert_equal([
181
+ Arrow::TimeUnit::MILLI,
182
+ 10_200,
183
+ ],
184
+ [
185
+ casted_time.unit,
186
+ casted_time.value,
187
+ ])
188
+ end
189
+
190
+ test("nano -> micro") do
191
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400)
192
+ casted_time = time.cast(Arrow::TimeUnit::MICRO)
193
+ assert_equal([
194
+ Arrow::TimeUnit::MICRO,
195
+ 10_200_300,
196
+ ],
197
+ [
198
+ casted_time.unit,
199
+ casted_time.value,
200
+ ])
201
+ end
202
+ end
203
+
204
+ sub_test_case("#to_f") do
205
+ test("second") do
206
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
207
+ assert_in_delta(10.0, time.to_f)
208
+ end
209
+
210
+ test("milli") do
211
+ time = Arrow::Time.new(Arrow::TimeUnit::MILLI, 10_200)
212
+ assert_in_delta(10.2, time.to_f)
213
+ end
214
+
215
+ test("micro") do
216
+ time = Arrow::Time.new(Arrow::TimeUnit::MICRO, 10_200_300)
217
+ assert_in_delta(10.2003, time.to_f)
218
+ end
219
+
220
+ test("nano") do
221
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 10_200_300_400)
222
+ assert_in_delta(10.2003004, time.to_f)
223
+ end
224
+ end
225
+
226
+ sub_test_case("#positive?") do
227
+ test("true") do
228
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
229
+ assert do
230
+ time.positive?
231
+ end
232
+ end
233
+
234
+ test("false") do
235
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, -10)
236
+ assert do
237
+ not time.positive?
238
+ end
239
+ end
240
+ end
241
+
242
+ sub_test_case("#negative?") do
243
+ test("true") do
244
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, -10)
245
+ assert do
246
+ time.negative?
247
+ end
248
+ end
249
+
250
+ test("false") do
251
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND, 10)
252
+ assert do
253
+ not time.negative?
254
+ end
255
+ end
256
+ end
257
+
258
+ test("#hour") do
259
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND,
260
+ (5 * 60 * 60) + (12 * 60) + 10)
261
+ assert_equal(5, time.hour)
262
+ end
263
+
264
+ test("#minute") do
265
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND,
266
+ (5 * 60 * 60) + (12 * 60) + 10)
267
+ assert_equal(12, time.minute)
268
+ end
269
+
270
+ test("#second") do
271
+ time = Arrow::Time.new(Arrow::TimeUnit::SECOND,
272
+ (5 * 60 * 60) + (12 * 60) + 10)
273
+ assert_equal(10, time.second)
274
+ end
275
+
276
+ test("#nano_second") do
277
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO, 1234)
278
+ assert_equal(1234, time.nano_second)
279
+ end
280
+
281
+ test("#to_s") do
282
+ time = Arrow::Time.new(Arrow::TimeUnit::NANO,
283
+ -(((5 * 60 * 60) + (12 * 60) + 10) * 1_000_000_000 +
284
+ 1234))
285
+ assert_equal("-05:12:10.000001234",
286
+ time.to_s)
287
+ end
288
+ end
@@ -0,0 +1,81 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class Time32ArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ sub_test_case("unit") do
21
+ test("Arrow::TimeUnit") do
22
+ values = [1000 * 10, nil]
23
+ array = Arrow::Time32Array.new(Arrow::TimeUnit::MILLI, values)
24
+ assert_equal([
25
+ "time32[ms]",
26
+ [
27
+ Arrow::Time.new(Arrow::TimeUnit::MILLI,
28
+ 1000 * 10),
29
+ nil,
30
+ ],
31
+ ],
32
+ [
33
+ array.value_data_type.to_s,
34
+ array.to_a,
35
+ ])
36
+ end
37
+
38
+ test("Symbol") do
39
+ values = [60 * 10, nil]
40
+ array = Arrow::Time32Array.new(:second, values)
41
+ assert_equal([
42
+ "time32[s]",
43
+ [
44
+ Arrow::Time.new(Arrow::TimeUnit::SECOND,
45
+ 60 * 10),
46
+ nil,
47
+ ],
48
+ ],
49
+ [
50
+ array.value_data_type.to_s,
51
+ array.to_a,
52
+ ])
53
+ end
54
+ end
55
+
56
+ sub_test_case("values") do
57
+ test("Arrow::Time") do
58
+ data_type = Arrow::Time32DataType.new(:second)
59
+ values = [
60
+ Arrow::Time.new(Arrow::TimeUnit::SECOND,
61
+ 60 * 10),
62
+ nil,
63
+ ]
64
+ array = Arrow::Time32Array.new(data_type, values)
65
+ assert_equal(values, array.to_a)
66
+ end
67
+
68
+ test("Integer") do
69
+ data_type = Arrow::Time32DataType.new(:second)
70
+ values = [60 * 10, nil]
71
+ array = Arrow::Time32Array.new(data_type, values)
72
+ assert_equal([
73
+ Arrow::Time.new(Arrow::TimeUnit::SECOND,
74
+ 60 * 10),
75
+ nil,
76
+ ],
77
+ array.to_a)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,81 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class Time64ArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ sub_test_case("unit") do
21
+ test("Arrow::TimeUnit") do
22
+ values = [1000 * 10, nil]
23
+ array = Arrow::Time64Array.new(Arrow::TimeUnit::NANO, values)
24
+ assert_equal([
25
+ "time64[ns]",
26
+ [
27
+ Arrow::Time.new(Arrow::TimeUnit::NANO,
28
+ 1000 * 10),
29
+ nil,
30
+ ],
31
+ ],
32
+ [
33
+ array.value_data_type.to_s,
34
+ array.to_a,
35
+ ])
36
+ end
37
+
38
+ test("Symbol") do
39
+ values = [1000 * 10, nil]
40
+ array = Arrow::Time64Array.new(:micro, values)
41
+ assert_equal([
42
+ "time64[us]",
43
+ [
44
+ Arrow::Time.new(Arrow::TimeUnit::MICRO,
45
+ 1000 * 10),
46
+ nil,
47
+ ],
48
+ ],
49
+ [
50
+ array.value_data_type.to_s,
51
+ array.to_a,
52
+ ])
53
+ end
54
+ end
55
+
56
+ sub_test_case("values") do
57
+ test("Arrow::Time") do
58
+ data_type = Arrow::Time64DataType.new(:nano)
59
+ values = [
60
+ Arrow::Time.new(Arrow::TimeUnit::NANO,
61
+ 1000 * 10),
62
+ nil,
63
+ ]
64
+ array = Arrow::Time64Array.new(data_type, values)
65
+ assert_equal(values, array.to_a)
66
+ end
67
+
68
+ test("Integer") do
69
+ data_type = Arrow::Time64DataType.new(:nano)
70
+ values = [1000 * 10, nil]
71
+ array = Arrow::Time64Array.new(data_type, values)
72
+ assert_equal([
73
+ Arrow::Time.new(Arrow::TimeUnit::NANO,
74
+ 1000 * 10),
75
+ nil,
76
+ ],
77
+ array.to_a)
78
+ end
79
+ end
80
+ end
81
+ end