red-arrow 0.15.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/arrow/converters.hpp +6 -6
- data/lib/arrow/array-builder.rb +101 -52
- data/lib/arrow/array.rb +28 -10
- data/lib/arrow/chunked-array.rb +2 -0
- data/lib/arrow/csv-loader.rb +5 -0
- data/lib/arrow/csv-read-options.rb +18 -0
- data/lib/arrow/data-type.rb +35 -2
- data/lib/arrow/decimal128-array-builder.rb +0 -2
- data/lib/arrow/field.rb +1 -1
- data/lib/arrow/generic-filterable.rb +43 -0
- data/lib/arrow/generic-takeable.rb +38 -0
- data/lib/arrow/list-data-type.rb +58 -8
- data/lib/arrow/loader.rb +9 -1
- data/lib/arrow/{binary-array-builder.rb → null-array.rb} +3 -15
- data/lib/arrow/record-batch.rb +0 -3
- data/lib/arrow/schema.rb +0 -2
- data/lib/arrow/struct-data-type.rb +0 -2
- data/lib/arrow/table-loader.rb +29 -6
- data/lib/arrow/table-saver.rb +29 -9
- data/lib/arrow/table.rb +14 -50
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +3 -1
- data/test/test-array-builder.rb +17 -0
- data/test/test-array.rb +102 -0
- data/test/test-chunked-array.rb +94 -0
- data/test/test-csv-loader.rb +2 -2
- data/test/test-data-type.rb +11 -0
- data/test/test-list-data-type.rb +27 -1
- data/test/test-null-array.rb +23 -0
- data/test/test-slicer.rb +74 -30
- data/test/test-table.rb +147 -14
- data/test/test-timestamp-array.rb +19 -0
- metadata +60 -55
data/lib/arrow/list-data-type.rb
CHANGED
@@ -53,16 +53,66 @@ module Arrow
|
|
53
53
|
#
|
54
54
|
# @example Create a list data type with field description
|
55
55
|
# Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
56
|
+
#
|
57
|
+
# @overload initialize(data_type)
|
58
|
+
#
|
59
|
+
# @param data_type [Arrow::DataType, String, Symbol,
|
60
|
+
# ::Array<String>, ::Array<Symbol>, Hash] The element data
|
61
|
+
# type of the list data type. A field is created with the
|
62
|
+
# default name `"item"` from the data type automatically.
|
63
|
+
#
|
64
|
+
# See {Arrow::DataType.resolve} how to specify data type.
|
65
|
+
#
|
66
|
+
# @example Create a list data type with {Arrow::DataType}
|
67
|
+
# Arrow::ListDataType.new(Arrow::BooleanDataType.new)
|
68
|
+
#
|
69
|
+
# @example Create a list data type with data type name as String
|
70
|
+
# Arrow::ListDataType.new("boolean")
|
71
|
+
#
|
72
|
+
# @example Create a list data type with data type name as Symbol
|
73
|
+
# Arrow::ListDataType.new(:boolean)
|
74
|
+
#
|
75
|
+
# @example Create a list data type with data type as Array
|
76
|
+
# Arrow::ListDataType.new([:time32, :milli])
|
77
|
+
def initialize(arg)
|
78
|
+
data_type = resolve_data_type(arg)
|
79
|
+
if data_type
|
80
|
+
field = Field.new(default_field_name, data_type)
|
81
|
+
else
|
82
|
+
field = resolve_field(arg)
|
64
83
|
end
|
65
84
|
initialize_raw(field)
|
66
85
|
end
|
86
|
+
|
87
|
+
private
|
88
|
+
def resolve_data_type(arg)
|
89
|
+
case arg
|
90
|
+
when DataType, String, Symbol, ::Array
|
91
|
+
DataType.resolve(arg)
|
92
|
+
when Hash
|
93
|
+
return nil if arg[:name]
|
94
|
+
return nil unless arg[:type]
|
95
|
+
DataType.resolve(arg)
|
96
|
+
else
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def default_field_name
|
102
|
+
"item"
|
103
|
+
end
|
104
|
+
|
105
|
+
def resolve_field(arg)
|
106
|
+
if arg.is_a?(Hash) and arg.key?(:field)
|
107
|
+
description = arg
|
108
|
+
arg = description[:field]
|
109
|
+
end
|
110
|
+
if arg.is_a?(Hash)
|
111
|
+
field_description = arg
|
112
|
+
Field.new(field_description)
|
113
|
+
else
|
114
|
+
arg
|
115
|
+
end
|
116
|
+
end
|
67
117
|
end
|
68
118
|
end
|
data/lib/arrow/loader.rb
CHANGED
@@ -32,9 +32,15 @@ module Arrow
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def require_libraries
|
35
|
+
require "arrow/column-containable"
|
36
|
+
require "arrow/field-containable"
|
37
|
+
require "arrow/generic-filterable"
|
38
|
+
require "arrow/generic-takeable"
|
39
|
+
require "arrow/record-containable"
|
40
|
+
|
35
41
|
require "arrow/array"
|
36
42
|
require "arrow/array-builder"
|
37
|
-
require "arrow/
|
43
|
+
require "arrow/bigdecimal-extension"
|
38
44
|
require "arrow/chunked-array"
|
39
45
|
require "arrow/column"
|
40
46
|
require "arrow/compression-type"
|
@@ -53,8 +59,10 @@ module Arrow
|
|
53
59
|
require "arrow/dictionary-data-type"
|
54
60
|
require "arrow/field"
|
55
61
|
require "arrow/file-output-stream"
|
62
|
+
require "arrow/group"
|
56
63
|
require "arrow/list-array-builder"
|
57
64
|
require "arrow/list-data-type"
|
65
|
+
require "arrow/null-array"
|
58
66
|
require "arrow/null-array-builder"
|
59
67
|
require "arrow/path-extension"
|
60
68
|
require "arrow/record"
|
@@ -16,21 +16,9 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
module Arrow
|
19
|
-
class
|
20
|
-
def
|
21
|
-
|
22
|
-
is_valids.each_with_index do |is_valid, i|
|
23
|
-
if is_valid
|
24
|
-
append_value(values[i])
|
25
|
-
else
|
26
|
-
append_null
|
27
|
-
end
|
28
|
-
end
|
29
|
-
else
|
30
|
-
values.each do |value|
|
31
|
-
append_value(value)
|
32
|
-
end
|
33
|
-
end
|
19
|
+
class NullArray
|
20
|
+
def get_value(i)
|
21
|
+
nil
|
34
22
|
end
|
35
23
|
end
|
36
24
|
end
|
data/lib/arrow/record-batch.rb
CHANGED
data/lib/arrow/schema.rb
CHANGED
data/lib/arrow/table-loader.rb
CHANGED
@@ -41,6 +41,8 @@ module Arrow
|
|
41
41
|
available_formats << match_data.post_match
|
42
42
|
end
|
43
43
|
end
|
44
|
+
deprecated_formats = ["batch", "stream"]
|
45
|
+
available_formats -= deprecated_formats
|
44
46
|
message = "Arrow::Table load format must be one of ["
|
45
47
|
message << available_formats.join(", ")
|
46
48
|
message << "]: #{format.inspect}"
|
@@ -119,18 +121,30 @@ module Arrow
|
|
119
121
|
load_raw(input, reader)
|
120
122
|
end
|
121
123
|
|
122
|
-
|
124
|
+
# @since 1.0.0
|
125
|
+
def load_as_arrow_file
|
123
126
|
input = open_input_stream
|
124
127
|
reader = RecordBatchFileReader.new(input)
|
125
128
|
load_raw(input, reader)
|
126
129
|
end
|
127
130
|
|
128
|
-
|
131
|
+
# @deprecated Use `format: :arrow_file` instead.
|
132
|
+
def load_as_batch
|
133
|
+
load_as_arrow_file
|
134
|
+
end
|
135
|
+
|
136
|
+
# @since 1.0.0
|
137
|
+
def load_as_arrow_streaming
|
129
138
|
input = open_input_stream
|
130
139
|
reader = RecordBatchStreamReader.new(input)
|
131
140
|
load_raw(input, reader)
|
132
141
|
end
|
133
142
|
|
143
|
+
# @deprecated Use `format: :arrow_streaming` instead.
|
144
|
+
def load_as_stream
|
145
|
+
load_as_arrow_streaming
|
146
|
+
end
|
147
|
+
|
134
148
|
if Arrow.const_defined?(:ORCFileReader)
|
135
149
|
def load_as_orc
|
136
150
|
input = open_input_stream
|
@@ -143,16 +157,25 @@ module Arrow
|
|
143
157
|
end
|
144
158
|
end
|
145
159
|
|
146
|
-
def
|
147
|
-
options = @options.dup
|
160
|
+
def csv_load(options)
|
148
161
|
options.delete(:format)
|
149
162
|
if @input.is_a?(Buffer)
|
150
|
-
CSVLoader.load(@input.data.to_s, options)
|
163
|
+
CSVLoader.load(@input.data.to_s, **options)
|
151
164
|
else
|
152
|
-
CSVLoader.load(Pathname.new(@input), options)
|
165
|
+
CSVLoader.load(Pathname.new(@input), **options)
|
153
166
|
end
|
154
167
|
end
|
155
168
|
|
169
|
+
def load_as_csv
|
170
|
+
csv_load(@options.dup)
|
171
|
+
end
|
172
|
+
|
173
|
+
def load_as_tsv
|
174
|
+
options = @options.dup
|
175
|
+
options[:delimiter] = "\t"
|
176
|
+
csv_load(options.dup)
|
177
|
+
end
|
178
|
+
|
156
179
|
def load_as_feather
|
157
180
|
input = open_input_stream
|
158
181
|
reader = FeatherFileReader.new(input)
|
data/lib/arrow/table-saver.rb
CHANGED
@@ -42,6 +42,8 @@ module Arrow
|
|
42
42
|
available_formats << match_data.post_match
|
43
43
|
end
|
44
44
|
end
|
45
|
+
deprecated_formats = ["batch", "stream"]
|
46
|
+
available_formats -= deprecated_formats
|
45
47
|
message = "Arrow::Table save format must be one of ["
|
46
48
|
message << available_formats.join(", ")
|
47
49
|
message << "]: #{format.inspect}"
|
@@ -110,30 +112,48 @@ module Arrow
|
|
110
112
|
end
|
111
113
|
|
112
114
|
def save_as_arrow
|
113
|
-
|
115
|
+
save_as_arrow_file
|
114
116
|
end
|
115
117
|
|
116
|
-
|
118
|
+
# @since 1.0.0
|
119
|
+
def save_as_arrow_file
|
117
120
|
save_raw(RecordBatchFileWriter)
|
118
121
|
end
|
119
122
|
|
120
|
-
|
123
|
+
# @deprecated Use `format: :arrow_batch` instead.
|
124
|
+
def save_as_batch
|
125
|
+
save_as_arrow_file
|
126
|
+
end
|
127
|
+
|
128
|
+
# @since 1.0.0
|
129
|
+
def save_as_arrow_streaming
|
121
130
|
save_raw(RecordBatchStreamWriter)
|
122
131
|
end
|
123
132
|
|
124
|
-
|
133
|
+
# @deprecated Use `format: :arrow_streaming` instead.
|
134
|
+
def save_as_stream
|
135
|
+
save_as_arrow_streaming
|
136
|
+
end
|
137
|
+
|
138
|
+
def csv_save(**options)
|
125
139
|
open_output_stream do |output|
|
126
|
-
csv = CSV.new(output)
|
140
|
+
csv = CSV.new(output, **options)
|
127
141
|
names = @table.schema.fields.collect(&:name)
|
128
142
|
csv << names
|
129
|
-
@table.
|
130
|
-
csv <<
|
131
|
-
record[name]
|
132
|
-
end
|
143
|
+
@table.raw_records.each do |record|
|
144
|
+
csv << record
|
133
145
|
end
|
134
146
|
end
|
135
147
|
end
|
136
148
|
|
149
|
+
def save_as_csv
|
150
|
+
csv_save
|
151
|
+
end
|
152
|
+
|
153
|
+
def save_as_tsv
|
154
|
+
csv_save(col_sep: "\t")
|
155
|
+
end
|
156
|
+
|
137
157
|
def save_as_feather
|
138
158
|
open_output_stream do |output|
|
139
159
|
FeatherFileWriter.open(output) do |writer|
|
data/lib/arrow/table.rb
CHANGED
@@ -15,13 +15,11 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
-
require "arrow/column-containable"
|
19
|
-
require "arrow/group"
|
20
|
-
require "arrow/record-containable"
|
21
|
-
|
22
18
|
module Arrow
|
23
19
|
class Table
|
24
20
|
include ColumnContainable
|
21
|
+
include GenericFilterable
|
22
|
+
include GenericTakeable
|
25
23
|
include RecordContainable
|
26
24
|
|
27
25
|
class << self
|
@@ -306,13 +304,13 @@ module Arrow
|
|
306
304
|
end
|
307
305
|
end
|
308
306
|
|
309
|
-
|
307
|
+
sliced_tables = []
|
310
308
|
slicers.each do |slicer|
|
311
309
|
slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
|
312
310
|
case slicer
|
313
311
|
when Integer
|
314
312
|
slicer += n_rows if slicer < 0
|
315
|
-
|
313
|
+
sliced_tables << slice_by_range(slicer, n_rows - 1)
|
316
314
|
when Range
|
317
315
|
original_from = from = slicer.first
|
318
316
|
to = slicer.last
|
@@ -325,17 +323,9 @@ module Arrow
|
|
325
323
|
raise ArgumentError, message
|
326
324
|
end
|
327
325
|
to += n_rows if to < 0
|
328
|
-
|
329
|
-
when ::Array
|
330
|
-
|
331
|
-
when ChunkedArray
|
332
|
-
offset = 0
|
333
|
-
slicer.each_chunk do |array|
|
334
|
-
boolean_array_to_slice_ranges(array, offset, ranges)
|
335
|
-
offset += array.length
|
336
|
-
end
|
337
|
-
when BooleanArray
|
338
|
-
boolean_array_to_slice_ranges(slicer, 0, ranges)
|
326
|
+
sliced_tables << slice_by_range(from, to)
|
327
|
+
when ::Array, BooleanArray, ChunkedArray
|
328
|
+
sliced_tables << filter(slicer)
|
339
329
|
else
|
340
330
|
message = "slicer must be Integer, Range, (from, to), " +
|
341
331
|
"Arrow::ChunkedArray of Arrow::BooleanArray, " +
|
@@ -343,7 +333,11 @@ module Arrow
|
|
343
333
|
raise ArgumentError, message
|
344
334
|
end
|
345
335
|
end
|
346
|
-
|
336
|
+
if sliced_tables.size > 1
|
337
|
+
sliced_tables[0].concatenate(sliced_tables[1..-1])
|
338
|
+
else
|
339
|
+
sliced_tables[0]
|
340
|
+
end
|
347
341
|
end
|
348
342
|
|
349
343
|
# TODO
|
@@ -514,38 +508,8 @@ module Arrow
|
|
514
508
|
end
|
515
509
|
|
516
510
|
private
|
517
|
-
def
|
518
|
-
|
519
|
-
target_start = nil
|
520
|
-
array.each_with_index do |is_target, i|
|
521
|
-
if is_target
|
522
|
-
unless in_target
|
523
|
-
target_start = offset + i
|
524
|
-
in_target = true
|
525
|
-
end
|
526
|
-
else
|
527
|
-
if in_target
|
528
|
-
ranges << [target_start, offset + i - 1]
|
529
|
-
target_start = nil
|
530
|
-
in_target = false
|
531
|
-
end
|
532
|
-
end
|
533
|
-
end
|
534
|
-
if in_target
|
535
|
-
ranges << [target_start, offset + array.length - 1]
|
536
|
-
end
|
537
|
-
end
|
538
|
-
|
539
|
-
def slice_by_ranges(ranges)
|
540
|
-
sliced_table = []
|
541
|
-
ranges.each do |from, to|
|
542
|
-
sliced_table << slice_raw(from, to - from + 1)
|
543
|
-
end
|
544
|
-
if sliced_table.size > 1
|
545
|
-
sliced_table[0].concatenate(sliced_table[1..-1])
|
546
|
-
else
|
547
|
-
sliced_table[0]
|
548
|
-
end
|
511
|
+
def slice_by_range(from, to)
|
512
|
+
slice_raw(from, to - from + 1)
|
549
513
|
end
|
550
514
|
|
551
515
|
def ensure_raw_column(name, data)
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -59,5 +59,7 @@ Gem::Specification.new do |spec|
|
|
59
59
|
spec.add_development_dependency("test-unit")
|
60
60
|
spec.add_development_dependency("yard")
|
61
61
|
|
62
|
-
|
62
|
+
required_msys2_package_version = version_components[0, 3].join(".")
|
63
|
+
spec.metadata["msys2_mingw_dependencies"] =
|
64
|
+
"arrow>=#{required_msys2_package_version}"
|
63
65
|
end
|
data/test/test-array-builder.rb
CHANGED
@@ -60,6 +60,23 @@ class ArrayBuilderTest < Test::Unit::TestCase
|
|
60
60
|
DateTime.new(2018, 1, 5, 0, 23, 21),
|
61
61
|
])
|
62
62
|
end
|
63
|
+
|
64
|
+
test("list<boolean>s") do
|
65
|
+
assert_build(Arrow::ArrayBuilder,
|
66
|
+
[
|
67
|
+
[nil, true, false],
|
68
|
+
nil,
|
69
|
+
[false],
|
70
|
+
])
|
71
|
+
end
|
72
|
+
|
73
|
+
test("list<string>s") do
|
74
|
+
assert_build(Arrow::ArrayBuilder,
|
75
|
+
[
|
76
|
+
["Hello", "World"],
|
77
|
+
["Apache Arrow"],
|
78
|
+
])
|
79
|
+
end
|
63
80
|
end
|
64
81
|
|
65
82
|
sub_test_case("specific builder") do
|