red-arrow 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

@@ -19,21 +19,21 @@ class DictionaryDataTypeTest < Test::Unit::TestCase
19
19
  sub_test_case(".new") do
20
20
  def setup
21
21
  @index_data_type = :int8
22
- @dictionary = Arrow::StringArray.new(["Hello", "World"])
22
+ @value_data_type = :string
23
23
  @ordered = true
24
24
  end
25
25
 
26
26
  test("ordered arguments") do
27
27
  assert_equal("dictionary<values=string, indices=int8, ordered=1>",
28
28
  Arrow::DictionaryDataType.new(@index_data_type,
29
- @dictionary,
29
+ @value_data_type,
30
30
  @ordered).to_s)
31
31
  end
32
32
 
33
33
  test("description") do
34
34
  assert_equal("dictionary<values=string, indices=int8, ordered=1>",
35
35
  Arrow::DictionaryDataType.new(index_data_type: @index_data_type,
36
- dictionary: @dictionary,
36
+ value_data_type: @value_data_type,
37
37
  ordered: @ordered).to_s)
38
38
  end
39
39
  end
@@ -74,17 +74,24 @@ class TableTest < Test::Unit::TestCase
74
74
  end
75
75
 
76
76
  test("Integer: positive") do
77
- assert_equal(<<-TABLE, @table.slice(2).to_s)
78
- count visible
79
- 0 4
80
- TABLE
77
+ assert_equal({"count" => 128, "visible" => nil},
78
+ @table.slice(@table.n_rows - 1).to_h)
81
79
  end
82
80
 
83
81
  test("Integer: negative") do
84
- assert_equal(<<-TABLE, @table.slice(-1).to_s)
85
- count visible
86
- 0 128
87
- TABLE
82
+ assert_equal({"count" => 1, "visible" => true},
83
+ @table.slice(-@table.n_rows).to_h)
84
+ end
85
+
86
+ test("Integer: out of index") do
87
+ assert_equal([
88
+ nil,
89
+ nil,
90
+ ],
91
+ [
92
+ @table.slice(@table.n_rows),
93
+ @table.slice(-(@table.n_rows + 1)),
94
+ ])
88
95
  end
89
96
 
90
97
  test("Range: positive: include end") do
@@ -145,17 +152,35 @@ class TableTest < Test::Unit::TestCase
145
152
  end
146
153
  end
147
154
 
148
- test("too many arguments: with block") do
155
+ test("too many arguments") do
149
156
  message = "wrong number of arguments (given 3, expected 1..2)"
150
157
  assert_raise(ArgumentError.new(message)) do
151
158
  @table.slice(1, 2, 3)
152
159
  end
153
160
  end
154
161
 
155
- test("too many arguments: without block") do
156
- message = "wrong number of arguments (given 3, expected 0..2)"
162
+ test("arguments: with block") do
163
+ message = "must not specify both arguments and block"
164
+ assert_raise(ArgumentError.new(message)) do
165
+ @table.slice(1, 2) {}
166
+ end
167
+ end
168
+
169
+ test("offset: too small") do
170
+ n_rows = @table.n_rows
171
+ offset = -(n_rows + 1)
172
+ message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}"
173
+ assert_raise(ArgumentError.new(message)) do
174
+ @table.slice(offset, 1)
175
+ end
176
+ end
177
+
178
+ test("offset: too large") do
179
+ n_rows = @table.n_rows
180
+ offset = n_rows
181
+ message = "offset is out of range (-#{n_rows + 1},#{n_rows}): #{offset}"
157
182
  assert_raise(ArgumentError.new(message)) do
158
- @table.slice(1, 2, 3) {}
183
+ @table.slice(offset, 1)
159
184
  end
160
185
  end
161
186
  end
@@ -492,7 +517,8 @@ class TableTest < Test::Unit::TestCase
492
517
 
493
518
  test("csv.gz") do
494
519
  file = Tempfile.new(["red-arrow", ".csv.gz"])
495
- Zlib::GzipWriter.wrap(file) do |gz|
520
+ file.close
521
+ Zlib::GzipWriter.open(file.path) do |gz|
496
522
  gz.write(<<-CSV)
497
523
  name,score
498
524
  alice,10
@@ -505,7 +531,7 @@ chris,-1
505
531
  0 alice 10
506
532
  1 bob 29
507
533
  2 chris -1
508
- TABLE
534
+ TABLE
509
535
  end
510
536
  end
511
537
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-31 00:00:00.000000000 Z
11
+ date: 2019-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: extpp
@@ -181,7 +181,7 @@ files:
181
181
  - doc/text/development.md
182
182
  - ext/arrow/arrow.cpp
183
183
  - ext/arrow/extconf.rb
184
- - ext/arrow/record-batch.cpp
184
+ - ext/arrow/raw-records.cpp
185
185
  - ext/arrow/red-arrow.hpp
186
186
  - image/red-arrow.png
187
187
  - lib/arrow.rb
@@ -254,12 +254,13 @@ files:
254
254
  - test/fixture/without-header.csv
255
255
  - test/helper.rb
256
256
  - test/helper/fixture.rb
257
- - test/raw-records/record-batch/test-basic-arrays.rb
258
- - test/raw-records/record-batch/test-dense-union-array.rb
259
- - test/raw-records/record-batch/test-list-array.rb
260
- - test/raw-records/record-batch/test-multiple-columns.rb
261
- - test/raw-records/record-batch/test-sparse-union-array.rb
262
- - test/raw-records/record-batch/test-struct-array.rb
257
+ - test/raw-records/test-basic-arrays.rb
258
+ - test/raw-records/test-dense-union-array.rb
259
+ - test/raw-records/test-list-array.rb
260
+ - test/raw-records/test-multiple-columns.rb
261
+ - test/raw-records/test-sparse-union-array.rb
262
+ - test/raw-records/test-struct-array.rb
263
+ - test/raw-records/test-table.rb
263
264
  - test/run-test.rb
264
265
  - test/test-array-builder.rb
265
266
  - test/test-array.rb
@@ -328,62 +329,63 @@ signing_key:
328
329
  specification_version: 4
329
330
  summary: Red Arrow is the Ruby bindings of Apache Arrow
330
331
  test_files:
331
- - test/test-record-batch-file-reader.rb
332
- - test/test-tensor.rb
333
- - test/test-dense-union-data-type.rb
334
332
  - test/test-time64-data-type.rb
335
- - test/fixture/with-header.csv
336
- - test/fixture/float-integer.csv
337
- - test/fixture/null-without-double-quote.csv
338
- - test/fixture/without-header-float.csv
339
- - test/fixture/with-header-float.csv
340
- - test/fixture/without-header.csv
341
- - test/fixture/integer-float.csv
342
- - test/fixture/TestOrcFile.test1.orc
343
- - test/fixture/null-with-double-quote.csv
344
- - test/test-file-output-stream.rb
345
- - test/test-bigdecimal.rb
346
- - test/test-record-batch-builder.rb
347
- - test/test-struct.rb
348
- - test/test-column.rb
349
- - test/test-time32-data-type.rb
333
+ - test/test-feather.rb
334
+ - test/test-decimal128.rb
350
335
  - test/test-struct-array.rb
351
- - test/test-array-builder.rb
352
- - test/test-chunked-array.rb
353
- - test/test-list-array.rb
354
- - test/test-date32-array.rb
355
- - test/test-field.rb
356
- - test/test-struct-array-builder.rb
357
- - test/test-decimal128-array-builder.rb
358
- - test/test-record-batch.rb
336
+ - test/test-data-type.rb
337
+ - test/test-list-data-type.rb
338
+ - test/test-dense-union-data-type.rb
359
339
  - test/helper.rb
360
- - test/test-orc.rb
361
- - test/test-struct-data-type.rb
362
- - test/test-rolling-window.rb
340
+ - test/test-record-batch.rb
341
+ - test/test-table.rb
363
342
  - test/run-test.rb
364
- - test/test-timestamp-data-type.rb
365
- - test/test-feather.rb
366
- - test/test-decimal128-data-type.rb
367
- - test/test-list-data-type.rb
368
343
  - test/test-timestamp-array.rb
344
+ - test/test-date32-array.rb
345
+ - test/test-array-builder.rb
346
+ - test/test-date64-array.rb
347
+ - test/test-record-batch-file-reader.rb
348
+ - test/test-decimal128-data-type.rb
349
+ - test/test-timestamp-data-type.rb
350
+ - test/test-column.rb
351
+ - test/test-field.rb
369
352
  - test/test-decimal128-array.rb
370
- - test/test-table.rb
371
353
  - test/test-csv-loader.rb
354
+ - test/test-bigdecimal.rb
355
+ - test/test-list-array.rb
356
+ - test/test-rolling-window.rb
357
+ - test/test-dictionary-data-type.rb
358
+ - test/fixture/integer-float.csv
359
+ - test/fixture/without-header-float.csv
360
+ - test/fixture/null-with-double-quote.csv
361
+ - test/fixture/with-header-float.csv
362
+ - test/fixture/TestOrcFile.test1.orc
363
+ - test/fixture/null-without-double-quote.csv
364
+ - test/fixture/without-header.csv
365
+ - test/fixture/with-header.csv
366
+ - test/fixture/float-integer.csv
367
+ - test/test-orc.rb
368
+ - test/test-time32-data-type.rb
369
+ - test/test-struct-data-type.rb
370
+ - test/test-struct.rb
371
+ - test/test-group.rb
372
+ - test/test-buffer.rb
373
+ - test/test-slicer.rb
372
374
  - test/test-list-array-builder.rb
375
+ - test/test-sparse-union-data-type.rb
376
+ - test/test-record-batch-builder.rb
377
+ - test/raw-records/test-struct-array.rb
378
+ - test/raw-records/test-table.rb
379
+ - test/raw-records/test-basic-arrays.rb
380
+ - test/raw-records/test-list-array.rb
381
+ - test/raw-records/test-dense-union-array.rb
382
+ - test/raw-records/test-sparse-union-array.rb
383
+ - test/raw-records/test-multiple-columns.rb
373
384
  - test/test-array.rb
374
- - test/test-group.rb
385
+ - test/test-file-output-stream.rb
386
+ - test/test-tensor.rb
387
+ - test/test-decimal128-array-builder.rb
388
+ - test/test-chunked-array.rb
375
389
  - test/test-schema.rb
376
- - test/test-dictionary-data-type.rb
377
- - test/test-data-type.rb
378
- - test/raw-records/record-batch/test-sparse-union-array.rb
379
- - test/raw-records/record-batch/test-basic-arrays.rb
380
- - test/raw-records/record-batch/test-multiple-columns.rb
381
- - test/raw-records/record-batch/test-struct-array.rb
382
- - test/raw-records/record-batch/test-list-array.rb
383
- - test/raw-records/record-batch/test-dense-union-array.rb
384
- - test/test-decimal128.rb
390
+ - test/test-struct-array-builder.rb
385
391
  - test/helper/fixture.rb
386
- - test/test-date64-array.rb
387
- - test/test-buffer.rb
388
- - test/test-sparse-union-data-type.rb
389
- - test/test-slicer.rb
@@ -1,349 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- class RawRecordsRecordBatchBasicArraysTest < Test::Unit::TestCase
19
- test("NullArray") do
20
- records = [
21
- [nil],
22
- [nil],
23
- [nil],
24
- [nil],
25
- ]
26
- array = Arrow::NullArray.new(records.size)
27
- schema = Arrow::Schema.new(column: :null)
28
- record_batch = Arrow::RecordBatch.new(schema,
29
- records.size,
30
- [array])
31
- assert_equal(records, record_batch.raw_records)
32
- end
33
-
34
- test("BooleanArray") do
35
- records = [
36
- [true],
37
- [nil],
38
- [false],
39
- ]
40
- record_batch = Arrow::RecordBatch.new({column: :boolean},
41
- records)
42
- assert_equal(records, record_batch.raw_records)
43
- end
44
-
45
- test("Int8Array") do
46
- records = [
47
- [-(2 ** 7)],
48
- [nil],
49
- [(2 ** 7) - 1],
50
- ]
51
- record_batch = Arrow::RecordBatch.new({column: :int8},
52
- records)
53
- assert_equal(records, record_batch.raw_records)
54
- end
55
-
56
- test("UInt8Array") do
57
- records = [
58
- [0],
59
- [nil],
60
- [(2 ** 8) - 1],
61
- ]
62
- record_batch = Arrow::RecordBatch.new({column: :uint8},
63
- records)
64
- assert_equal(records, record_batch.raw_records)
65
- end
66
-
67
- test("Int16Array") do
68
- records = [
69
- [-(2 ** 15)],
70
- [nil],
71
- [(2 ** 15) - 1],
72
- ]
73
- record_batch = Arrow::RecordBatch.new({column: :int16},
74
- records)
75
- assert_equal(records, record_batch.raw_records)
76
- end
77
-
78
- test("UInt16Array") do
79
- records = [
80
- [0],
81
- [nil],
82
- [(2 ** 16) - 1],
83
- ]
84
- record_batch = Arrow::RecordBatch.new({column: :uint16},
85
- records)
86
- assert_equal(records, record_batch.raw_records)
87
- end
88
-
89
- test("Int32Array") do
90
- records = [
91
- [-(2 ** 31)],
92
- [nil],
93
- [(2 ** 31) - 1],
94
- ]
95
- record_batch = Arrow::RecordBatch.new({column: :int32},
96
- records)
97
- assert_equal(records, record_batch.raw_records)
98
- end
99
-
100
- test("UInt32Array") do
101
- records = [
102
- [0],
103
- [nil],
104
- [(2 ** 32) - 1],
105
- ]
106
- record_batch = Arrow::RecordBatch.new({column: :uint32},
107
- records)
108
- assert_equal(records, record_batch.raw_records)
109
- end
110
-
111
- test("Int64Array") do
112
- records = [
113
- [-(2 ** 63)],
114
- [nil],
115
- [(2 ** 63) - 1],
116
- ]
117
- record_batch = Arrow::RecordBatch.new({column: :int64},
118
- records)
119
- assert_equal(records, record_batch.raw_records)
120
- end
121
-
122
- test("UInt64Array") do
123
- records = [
124
- [0],
125
- [nil],
126
- [(2 ** 64) - 1],
127
- ]
128
- record_batch = Arrow::RecordBatch.new({column: :uint64},
129
- records)
130
- assert_equal(records, record_batch.raw_records)
131
- end
132
-
133
- test("FloatArray") do
134
- records = [
135
- [-1.0],
136
- [nil],
137
- [1.0],
138
- ]
139
- record_batch = Arrow::RecordBatch.new({column: :float},
140
- records)
141
- assert_equal(records, record_batch.raw_records)
142
- end
143
-
144
- test("DoubleArray") do
145
- records = [
146
- [-1.0],
147
- [nil],
148
- [1.0],
149
- ]
150
- record_batch = Arrow::RecordBatch.new({column: :double},
151
- records)
152
- assert_equal(records, record_batch.raw_records)
153
- end
154
-
155
- test("BinaryArray") do
156
- records = [
157
- ["\x00".b],
158
- [nil],
159
- ["\xff".b],
160
- ]
161
- record_batch = Arrow::RecordBatch.new({column: :binary},
162
- records)
163
- assert_equal(records, record_batch.raw_records)
164
- end
165
-
166
- test("StringArray") do
167
- records = [
168
- ["Ruby"],
169
- [nil],
170
- ["\u3042"], # U+3042 HIRAGANA LETTER A
171
- ]
172
- record_batch = Arrow::RecordBatch.new({column: :string},
173
- records)
174
- assert_equal(records, record_batch.raw_records)
175
- end
176
-
177
- test("Date32Array") do
178
- records = [
179
- [Date.new(1960, 1, 1)],
180
- [nil],
181
- [Date.new(2017, 8, 23)],
182
- ]
183
- record_batch = Arrow::RecordBatch.new({column: :date32},
184
- records)
185
- assert_equal(records, record_batch.raw_records)
186
- end
187
-
188
- test("Date64Array") do
189
- records = [
190
- [DateTime.new(1960, 1, 1, 2, 9, 30)],
191
- [nil],
192
- [DateTime.new(2017, 8, 23, 14, 57, 2)],
193
- ]
194
- record_batch = Arrow::RecordBatch.new({column: :date64},
195
- records)
196
- assert_equal(records, record_batch.raw_records)
197
- end
198
-
199
- sub_test_case("TimestampArray") do
200
- test("second") do
201
- records = [
202
- [Time.parse("1960-01-01T02:09:30Z")],
203
- [nil],
204
- [Time.parse("2017-08-23T14:57:02Z")],
205
- ]
206
- record_batch = Arrow::RecordBatch.new({
207
- column: {
208
- type: :timestamp,
209
- unit: :second,
210
- }
211
- },
212
- records)
213
- assert_equal(records, record_batch.raw_records)
214
- end
215
-
216
- test("milli") do
217
- records = [
218
- [Time.parse("1960-01-01T02:09:30.123Z")],
219
- [nil],
220
- [Time.parse("2017-08-23T14:57:02.987Z")],
221
- ]
222
- record_batch = Arrow::RecordBatch.new({
223
- column: {
224
- type: :timestamp,
225
- unit: :milli,
226
- }
227
- },
228
- records)
229
- assert_equal(records, record_batch.raw_records)
230
- end
231
-
232
- test("micro") do
233
- records = [
234
- [Time.parse("1960-01-01T02:09:30.123456Z")],
235
- [nil],
236
- [Time.parse("2017-08-23T14:57:02.987654Z")],
237
- ]
238
- record_batch = Arrow::RecordBatch.new({
239
- column: {
240
- type: :timestamp,
241
- unit: :micro,
242
- }
243
- },
244
- records)
245
- assert_equal(records, record_batch.raw_records)
246
- end
247
-
248
- test("nano") do
249
- records = [
250
- [Time.parse("1960-01-01T02:09:30.123456789Z")],
251
- [nil],
252
- [Time.parse("2017-08-23T14:57:02.987654321Z")],
253
- ]
254
- record_batch = Arrow::RecordBatch.new({
255
- column: {
256
- type: :timestamp,
257
- unit: :nano,
258
- }
259
- },
260
- records)
261
- assert_equal(records, record_batch.raw_records)
262
- end
263
- end
264
-
265
- sub_test_case("Time32Array") do
266
- test("second") do
267
- records = [
268
- [60 * 10], # 00:10:00
269
- [nil],
270
- [60 * 60 * 2 + 9], # 02:00:09
271
- ]
272
- record_batch = Arrow::RecordBatch.new({
273
- column: {
274
- type: :time32,
275
- unit: :second,
276
- }
277
- },
278
- records)
279
- assert_equal(records, record_batch.raw_records)
280
- end
281
-
282
- test("milli") do
283
- records = [
284
- [(60 * 10) * 1000 + 123], # 00:10:00.123
285
- [nil],
286
- [(60 * 60 * 2 + 9) * 1000 + 987], # 02:00:09.987
287
- ]
288
- record_batch = Arrow::RecordBatch.new({
289
- column: {
290
- type: :time32,
291
- unit: :milli,
292
- }
293
- },
294
- records)
295
- assert_equal(records, record_batch.raw_records)
296
- end
297
- end
298
-
299
- sub_test_case("Time64Array") do
300
- test("micro") do
301
- records = [
302
- [(60 * 10) * 1_000_000 + 123_456], # 00:10:00.123456
303
- [nil],
304
- [(60 * 60 * 2 + 9) * 1_000_000 + 987_654], # 02:00:09.987654
305
- ]
306
- record_batch = Arrow::RecordBatch.new({
307
- column: {
308
- type: :time64,
309
- unit: :micro,
310
- }
311
- },
312
- records)
313
- assert_equal(records, record_batch.raw_records)
314
- end
315
-
316
- test("nano") do
317
- records = [
318
- [(60 * 10) * 1_000_000_000 + 123_456_789], # 00:10:00.123456789
319
- [nil],
320
- [(60 * 60 * 2 + 9) * 1_000_000_000 + 987_654_321], # 02:00:09.987654321
321
- ]
322
- record_batch = Arrow::RecordBatch.new({
323
- column: {
324
- type: :time64,
325
- unit: :nano,
326
- }
327
- },
328
- records)
329
- assert_equal(records, record_batch.raw_records)
330
- end
331
- end
332
-
333
- test("Decimal128Array") do
334
- records = [
335
- [BigDecimal("92.92")],
336
- [nil],
337
- [BigDecimal("29.29")],
338
- ]
339
- record_batch = Arrow::RecordBatch.new({
340
- column: {
341
- type: :decimal128,
342
- precision: 8,
343
- scale: 2,
344
- }
345
- },
346
- records)
347
- assert_equal(records, record_batch.raw_records)
348
- end
349
- end