red-arrow 0.16.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +60 -30
  4. data/ext/arrow/extconf.rb +14 -3
  5. data/ext/arrow/raw-records.cpp +2 -1
  6. data/ext/arrow/values.cpp +2 -1
  7. data/lib/arrow/buffer.rb +28 -0
  8. data/lib/arrow/dictionary-array.rb +24 -0
  9. data/lib/arrow/generic-filterable.rb +7 -7
  10. data/lib/arrow/generic-takeable.rb +2 -2
  11. data/lib/arrow/loader.rb +3 -0
  12. data/lib/arrow/null-array-builder.rb +1 -1
  13. data/lib/arrow/raw-table-converter.rb +47 -0
  14. data/lib/arrow/record-batch-iterator.rb +22 -0
  15. data/lib/arrow/record-batch.rb +9 -1
  16. data/lib/arrow/schema.rb +5 -0
  17. data/lib/arrow/struct-array-builder.rb +13 -7
  18. data/lib/arrow/table-saver.rb +8 -4
  19. data/lib/arrow/table.rb +8 -25
  20. data/lib/arrow/version.rb +1 -1
  21. data/test/helper.rb +1 -0
  22. data/test/helper/omittable.rb +36 -0
  23. data/test/raw-records/test-dense-union-array.rb +1 -34
  24. data/test/raw-records/test-sparse-union-array.rb +1 -33
  25. data/test/run-test.rb +14 -3
  26. data/test/test-array.rb +5 -3
  27. data/test/test-buffer.rb +11 -0
  28. data/test/test-chunked-array.rb +5 -3
  29. data/test/test-dense-union-data-type.rb +2 -2
  30. data/test/test-dictionary-array.rb +41 -0
  31. data/test/test-feather.rb +21 -6
  32. data/test/test-record-batch-iterator.rb +37 -0
  33. data/test/test-record-batch.rb +14 -0
  34. data/test/test-schema.rb +16 -0
  35. data/test/test-sparse-union-data-type.rb +2 -2
  36. data/test/test-struct-array-builder.rb +8 -4
  37. data/test/test-table.rb +9 -3
  38. data/test/values/test-dense-union-array.rb +1 -34
  39. data/test/values/test-sparse-union-array.rb +1 -33
  40. metadata +68 -59
@@ -19,7 +19,7 @@ module Arrow
19
19
  class NullArrayBuilder
20
20
  class << self
21
21
  def buildable?(args)
22
- super and args.collect(&:class) != [Integer]
22
+ super and not (args.size == 1 and args[0].is_a?(Integer))
23
23
  end
24
24
  end
25
25
  end
@@ -0,0 +1,47 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RawTableConverter
20
+ attr_reader :n_rows
21
+ attr_reader :schema
22
+ attr_reader :values
23
+ def initialize(raw_table)
24
+ @raw_table = raw_table
25
+ convert
26
+ end
27
+
28
+ private
29
+ def convert
30
+ if @raw_table.is_a?(::Array) and @raw_table[0].is_a?(Column)
31
+ fields = @raw_table.collect(&:field)
32
+ @schema = Schema.new(fields)
33
+ @values = @raw_table.collect(&:data)
34
+ else
35
+ fields = []
36
+ @values = []
37
+ @raw_table.each do |name, array|
38
+ array = ArrayBuilder.build(array) if array.is_a?(::Array)
39
+ fields << Field.new(name.to_s, array.value_data_type)
40
+ @values << array
41
+ end
42
+ @schema = Schema.new(fields)
43
+ end
44
+ @n_rows = @values[0].length
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,22 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RecordBatchIterator
20
+ alias_method :to_a, :to_list
21
+ end
22
+ end
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "arrow/raw-table-converter"
19
+
18
20
  module Arrow
19
21
  class RecordBatch
20
22
  include ColumnContainable
@@ -25,13 +27,19 @@ module Arrow
25
27
  def new(*args)
26
28
  n_args = args.size
27
29
  case n_args
30
+ when 1
31
+ raw_table_converter = RawTableConverter.new(args[0])
32
+ n_rows = raw_table_converter.n_rows
33
+ schema = raw_table_converter.schema
34
+ values = raw_table_converter.values
35
+ super(schema, n_rows, values)
28
36
  when 2
29
37
  schema, data = args
30
38
  RecordBatchBuilder.build(schema, data)
31
39
  when 3
32
40
  super
33
41
  else
34
- message = "wrong number of arguments (given #{n_args}, expected 2..3)"
42
+ message = "wrong number of arguments (given #{n_args}, expected 1..3)"
35
43
  raise ArgumentError, message
36
44
  end
37
45
  end
@@ -91,5 +91,10 @@ module Arrow
91
91
  end
92
92
 
93
93
  alias_method :[], :find_field
94
+
95
+ alias_method :to_s_raw, :to_s
96
+ def to_s(show_metadata: false)
97
+ to_string_metadata(show_metadata)
98
+ end
94
99
  end
95
100
  end
@@ -32,7 +32,7 @@ module Arrow
32
32
  case index_or_name
33
33
  when String, Symbol
34
34
  name = index_or_name
35
- (@name_to_builder ||= build_name_to_builder)[name.to_s]
35
+ cached_name_to_builder[name.to_s]
36
36
  else
37
37
  index = index_or_name
38
38
  cached_field_builders[index]
@@ -70,13 +70,18 @@ module Arrow
70
70
  append_null
71
71
  when ::Array
72
72
  append_value_raw
73
- value.each_with_index do |sub_value, i|
74
- self[i].append(sub_value)
73
+ cached_field_builders.zip(value) do |builder, sub_value|
74
+ builder.append(sub_value)
75
75
  end
76
76
  when Hash
77
77
  append_value_raw
78
+ local_name_to_builder = cached_name_to_builder.dup
78
79
  value.each do |name, sub_value|
79
- self[name].append(sub_value)
80
+ builder = local_name_to_builder.delete(name.to_s)
81
+ builder.append(sub_value)
82
+ end
83
+ local_name_to_builder.each do |_, builder|
84
+ builder.append_null
80
85
  end
81
86
  else
82
87
  message =
@@ -108,9 +113,6 @@ module Arrow
108
113
  alias_method :append_null_raw, :append_null
109
114
  def append_null
110
115
  append_null_raw
111
- cached_field_builders.each do |builder|
112
- builder.append_null
113
- end
114
116
  end
115
117
 
116
118
  # @since 0.12.0
@@ -136,5 +138,9 @@ module Arrow
136
138
  end
137
139
  name_to_builder
138
140
  end
141
+
142
+ def cached_name_to_builder
143
+ @name_to_builder ||= build_name_to_builder
144
+ end
139
145
  end
140
146
  end
@@ -155,10 +155,14 @@ module Arrow
155
155
  end
156
156
 
157
157
  def save_as_feather
158
- open_output_stream do |output|
159
- FeatherFileWriter.open(output) do |writer|
160
- writer.write(@table)
161
- end
158
+ properties = FeatherWriteProperties.new
159
+ properties.class.properties.each do |name|
160
+ value = @options[name.to_sym]
161
+ next if value.nil?
162
+ properties.__send__("#{name}=", value)
163
+ end
164
+ open_raw_output_stream do |output|
165
+ @table.write_as_feather(output, properties)
162
166
  end
163
167
  end
164
168
  end
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "arrow/raw-table-converter"
19
+
18
20
  module Arrow
19
21
  class Table
20
22
  include ColumnContainable
@@ -81,14 +83,6 @@ module Arrow
81
83
  # `Array`.
82
84
  #
83
85
  # @example Create a table from column name and values
84
- # count_chunks = [
85
- # Arrow::UInt32Array.new([0, 2]),
86
- # Arrow::UInt32Array.new([nil, 4]),
87
- # ]
88
- # visible_chunks = [
89
- # Arrow::BooleanArray.new([true]),
90
- # Arrow::BooleanArray.new([nil, nil, false]),
91
- # ]
92
86
  # Arrow::Table.new("count" => [0, 2, nil, 4],
93
87
  # "visible" => [true, nil, nil, false])
94
88
  #
@@ -169,22 +163,9 @@ module Arrow
169
163
  n_args = args.size
170
164
  case n_args
171
165
  when 1
172
- if args[0][0].is_a?(Column)
173
- columns = args[0]
174
- fields = columns.collect(&:field)
175
- values = columns.collect(&:data)
176
- schema = Schema.new(fields)
177
- else
178
- raw_table = args[0]
179
- fields = []
180
- values = []
181
- raw_table.each do |name, array|
182
- array = ArrayBuilder.build(array) if array.is_a?(::Array)
183
- fields << Field.new(name.to_s, array.value_data_type)
184
- values << array
185
- end
186
- schema = Schema.new(fields)
187
- end
166
+ raw_table_converter = RawTableConverter.new(args[0])
167
+ schema = raw_table_converter.schema
168
+ values = raw_table_converter.values
188
169
  when 2
189
170
  schema = args[0]
190
171
  schema = Schema.new(schema) unless schema.is_a?(Schema)
@@ -304,6 +285,8 @@ module Arrow
304
285
  end
305
286
  end
306
287
 
288
+ filter_options = Arrow::FilterOptions.new
289
+ filter_options.null_selection_behavior = :emit_null
307
290
  sliced_tables = []
308
291
  slicers.each do |slicer|
309
292
  slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
@@ -325,7 +308,7 @@ module Arrow
325
308
  to += n_rows if to < 0
326
309
  sliced_tables << slice_by_range(from, to)
327
310
  when ::Array, BooleanArray, ChunkedArray
328
- sliced_tables << filter(slicer)
311
+ sliced_tables << filter(slicer, filter_options)
329
312
  else
330
313
  message = "slicer must be Integer, Range, (from, to), " +
331
314
  "Arrow::ChunkedArray of Arrow::BooleanArray, " +
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "0.16.0"
19
+ VERSION = "2.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
@@ -24,3 +24,4 @@ require "zlib"
24
24
  require "test-unit"
25
25
 
26
26
  require_relative "helper/fixture"
27
+ require_relative "helper/omittable"
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Helper
19
+ module Omittable
20
+ def require_gi_bindings(major, minor, micro)
21
+ return if GLib.check_binding_version?(major, minor, micro)
22
+ message =
23
+ "Require gobject-introspection #{major}.#{minor}.#{micro} or later: " +
24
+ GLib::BINDING_VERSION.join(".")
25
+ omit(message)
26
+ end
27
+
28
+ def require_gi(major, minor, micro)
29
+ return if GObjectIntrospection::Version.or_later?(major, minor, micro)
30
+ message =
31
+ "Require GObject Introspection #{major}.#{minor}.#{micro} or later: " +
32
+ GObjectIntrospection::Version::STRING
33
+ omit(message)
34
+ end
35
+ end
36
+ end
@@ -56,10 +56,7 @@ module RawRecordsDenseUnionArrayTests
56
56
  end
57
57
  records.each do |record|
58
58
  column = record[0]
59
- if column.nil?
60
- type_ids << nil
61
- offsets << 0
62
- elsif column.key?("0")
59
+ if column.key?("0")
63
60
  type_id = type_codes[0]
64
61
  type_ids << type_id
65
62
  offsets << (type_ids.count(type_id) - 1)
@@ -82,7 +79,6 @@ module RawRecordsDenseUnionArrayTests
82
79
  def test_null
83
80
  records = [
84
81
  [{"0" => nil}],
85
- [nil],
86
82
  ]
87
83
  target = build(:null, records)
88
84
  assert_equal(records, target.raw_records)
@@ -91,7 +87,6 @@ module RawRecordsDenseUnionArrayTests
91
87
  def test_boolean
92
88
  records = [
93
89
  [{"0" => true}],
94
- [nil],
95
90
  [{"1" => nil}],
96
91
  ]
97
92
  target = build(:boolean, records)
@@ -101,7 +96,6 @@ module RawRecordsDenseUnionArrayTests
101
96
  def test_int8
102
97
  records = [
103
98
  [{"0" => -(2 ** 7)}],
104
- [nil],
105
99
  [{"1" => nil}],
106
100
  ]
107
101
  target = build(:int8, records)
@@ -111,7 +105,6 @@ module RawRecordsDenseUnionArrayTests
111
105
  def test_uint8
112
106
  records = [
113
107
  [{"0" => (2 ** 8) - 1}],
114
- [nil],
115
108
  [{"1" => nil}],
116
109
  ]
117
110
  target = build(:uint8, records)
@@ -121,7 +114,6 @@ module RawRecordsDenseUnionArrayTests
121
114
  def test_int16
122
115
  records = [
123
116
  [{"0" => -(2 ** 15)}],
124
- [nil],
125
117
  [{"1" => nil}],
126
118
  ]
127
119
  target = build(:int16, records)
@@ -131,7 +123,6 @@ module RawRecordsDenseUnionArrayTests
131
123
  def test_uint16
132
124
  records = [
133
125
  [{"0" => (2 ** 16) - 1}],
134
- [nil],
135
126
  [{"1" => nil}],
136
127
  ]
137
128
  target = build(:uint16, records)
@@ -141,7 +132,6 @@ module RawRecordsDenseUnionArrayTests
141
132
  def test_int32
142
133
  records = [
143
134
  [{"0" => -(2 ** 31)}],
144
- [nil],
145
135
  [{"1" => nil}],
146
136
  ]
147
137
  target = build(:int32, records)
@@ -151,7 +141,6 @@ module RawRecordsDenseUnionArrayTests
151
141
  def test_uint32
152
142
  records = [
153
143
  [{"0" => (2 ** 32) - 1}],
154
- [nil],
155
144
  [{"1" => nil}],
156
145
  ]
157
146
  target = build(:uint32, records)
@@ -161,7 +150,6 @@ module RawRecordsDenseUnionArrayTests
161
150
  def test_int64
162
151
  records = [
163
152
  [{"0" => -(2 ** 63)}],
164
- [nil],
165
153
  [{"1" => nil}],
166
154
  ]
167
155
  target = build(:int64, records)
@@ -171,7 +159,6 @@ module RawRecordsDenseUnionArrayTests
171
159
  def test_uint64
172
160
  records = [
173
161
  [{"0" => (2 ** 64) - 1}],
174
- [nil],
175
162
  [{"1" => nil}],
176
163
  ]
177
164
  target = build(:uint64, records)
@@ -181,7 +168,6 @@ module RawRecordsDenseUnionArrayTests
181
168
  def test_float
182
169
  records = [
183
170
  [{"0" => -1.0}],
184
- [nil],
185
171
  [{"1" => nil}],
186
172
  ]
187
173
  target = build(:float, records)
@@ -191,7 +177,6 @@ module RawRecordsDenseUnionArrayTests
191
177
  def test_double
192
178
  records = [
193
179
  [{"0" => -1.0}],
194
- [nil],
195
180
  [{"1" => nil}],
196
181
  ]
197
182
  target = build(:double, records)
@@ -201,7 +186,6 @@ module RawRecordsDenseUnionArrayTests
201
186
  def test_binary
202
187
  records = [
203
188
  [{"0" => "\xff".b}],
204
- [nil],
205
189
  [{"1" => nil}],
206
190
  ]
207
191
  target = build(:binary, records)
@@ -211,7 +195,6 @@ module RawRecordsDenseUnionArrayTests
211
195
  def test_string
212
196
  records = [
213
197
  [{"0" => "Ruby"}],
214
- [nil],
215
198
  [{"1" => nil}],
216
199
  ]
217
200
  target = build(:string, records)
@@ -221,7 +204,6 @@ module RawRecordsDenseUnionArrayTests
221
204
  def test_date32
222
205
  records = [
223
206
  [{"0" => Date.new(1960, 1, 1)}],
224
- [nil],
225
207
  [{"1" => nil}],
226
208
  ]
227
209
  target = build(:date32, records)
@@ -231,7 +213,6 @@ module RawRecordsDenseUnionArrayTests
231
213
  def test_date64
232
214
  records = [
233
215
  [{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
234
- [nil],
235
216
  [{"1" => nil}],
236
217
  ]
237
218
  target = build(:date64, records)
@@ -241,7 +222,6 @@ module RawRecordsDenseUnionArrayTests
241
222
  def test_timestamp_second
242
223
  records = [
243
224
  [{"0" => Time.parse("1960-01-01T02:09:30Z")}],
244
- [nil],
245
225
  [{"1" => nil}],
246
226
  ]
247
227
  target = build({
@@ -255,7 +235,6 @@ module RawRecordsDenseUnionArrayTests
255
235
  def test_timestamp_milli
256
236
  records = [
257
237
  [{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
258
- [nil],
259
238
  [{"1" => nil}],
260
239
  ]
261
240
  target = build({
@@ -269,7 +248,6 @@ module RawRecordsDenseUnionArrayTests
269
248
  def test_timestamp_micro
270
249
  records = [
271
250
  [{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
272
- [nil],
273
251
  [{"1" => nil}],
274
252
  ]
275
253
  target = build({
@@ -283,7 +261,6 @@ module RawRecordsDenseUnionArrayTests
283
261
  def test_timestamp_nano
284
262
  records = [
285
263
  [{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
286
- [nil],
287
264
  [{"1" => nil}],
288
265
  ]
289
266
  target = build({
@@ -299,7 +276,6 @@ module RawRecordsDenseUnionArrayTests
299
276
  records = [
300
277
  # 00:10:00
301
278
  [{"0" => Arrow::Time.new(unit, 60 * 10)}],
302
- [nil],
303
279
  [{"1" => nil}],
304
280
  ]
305
281
  target = build({
@@ -315,7 +291,6 @@ module RawRecordsDenseUnionArrayTests
315
291
  records = [
316
292
  # 00:10:00.123
317
293
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
318
- [nil],
319
294
  [{"1" => nil}],
320
295
  ]
321
296
  target = build({
@@ -331,7 +306,6 @@ module RawRecordsDenseUnionArrayTests
331
306
  records = [
332
307
  # 00:10:00.123456
333
308
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
334
- [nil],
335
309
  [{"1" => nil}],
336
310
  ]
337
311
  target = build({
@@ -347,7 +321,6 @@ module RawRecordsDenseUnionArrayTests
347
321
  records = [
348
322
  # 00:10:00.123456789
349
323
  [{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)}],
350
- [nil],
351
324
  [{"1" => nil}],
352
325
  ]
353
326
  target = build({
@@ -361,7 +334,6 @@ module RawRecordsDenseUnionArrayTests
361
334
  def test_decimal128
362
335
  records = [
363
336
  [{"0" => BigDecimal("92.92")}],
364
- [nil],
365
337
  [{"1" => nil}],
366
338
  ]
367
339
  target = build({
@@ -376,7 +348,6 @@ module RawRecordsDenseUnionArrayTests
376
348
  def test_list
377
349
  records = [
378
350
  [{"0" => [true, nil, false]}],
379
- [nil],
380
351
  [{"1" => nil}],
381
352
  ]
382
353
  target = build({
@@ -393,7 +364,6 @@ module RawRecordsDenseUnionArrayTests
393
364
  def test_struct
394
365
  records = [
395
366
  [{"0" => {"sub_field" => true}}],
396
- [nil],
397
367
  [{"1" => nil}],
398
368
  [{"0" => {"sub_field" => nil}}],
399
369
  ]
@@ -414,7 +384,6 @@ module RawRecordsDenseUnionArrayTests
414
384
  omit("Need to add support for SparseUnionArrayBuilder")
415
385
  records = [
416
386
  [{"0" => {"field1" => true}}],
417
- [nil],
418
387
  [{"1" => nil}],
419
388
  [{"0" => {"field2" => nil}}],
420
389
  ]
@@ -440,7 +409,6 @@ module RawRecordsDenseUnionArrayTests
440
409
  omit("Need to add support for DenseUnionArrayBuilder")
441
410
  records = [
442
411
  [{"0" => {"field1" => true}}],
443
- [nil],
444
412
  [{"1" => nil}],
445
413
  [{"0" => {"field2" => nil}}],
446
414
  ]
@@ -466,7 +434,6 @@ module RawRecordsDenseUnionArrayTests
466
434
  omit("Need to add support for DictionaryArrayBuilder")
467
435
  records = [
468
436
  [{"0" => "Ruby"}],
469
- [nil],
470
437
  [{"1" => nil}],
471
438
  [{"0" => "GLib"}],
472
439
  ]