red-arrow 4.0.1 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +10 -0
- data/README.md +23 -0
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/array.rb +12 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/datum.rb +102 -0
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/function.rb +52 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +58 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/s3-global-options.rb +38 -0
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/sort-key.rb +61 -55
- data/lib/arrow/sort-options.rb +8 -8
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +119 -44
- data/lib/arrow/table-saver.rb +36 -5
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +112 -40
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -9
- data/test/helper.rb +3 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-array.rb +34 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-expression.rb +40 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +210 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +376 -56
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +117 -168
@@ -0,0 +1,36 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class TableConcatenateOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
options = new
|
26
|
+
value.each do |k, v|
|
27
|
+
options.public_send("#{k}=", value)
|
28
|
+
end
|
29
|
+
options
|
30
|
+
else
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -18,6 +18,125 @@
|
|
18
18
|
module Arrow
|
19
19
|
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
20
20
|
class TableFormatter
|
21
|
+
# @private
|
22
|
+
class ColumnFormatter
|
23
|
+
attr_reader :column
|
24
|
+
attr_reader :head_values
|
25
|
+
attr_reader :tail_values
|
26
|
+
attr_reader :sample_values
|
27
|
+
def initialize(column, head_values, tail_values)
|
28
|
+
@column = column
|
29
|
+
@head_values = head_values
|
30
|
+
@tail_values = tail_values
|
31
|
+
@sample_values = head_values + tail_values
|
32
|
+
@field_value_widths = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def data_type
|
36
|
+
@data_type ||= @column.data_type
|
37
|
+
end
|
38
|
+
|
39
|
+
def name
|
40
|
+
@name ||= @column.name
|
41
|
+
end
|
42
|
+
|
43
|
+
def aligned_name
|
44
|
+
@aligned_name ||= format_aligned_name(name, data_type, @sample_values)
|
45
|
+
end
|
46
|
+
|
47
|
+
FLOAT_N_DIGITS = 10
|
48
|
+
FORMATTED_NULL = "(null)"
|
49
|
+
|
50
|
+
def format_value(value, width=0)
|
51
|
+
case value
|
52
|
+
when ::Time
|
53
|
+
value.iso8601
|
54
|
+
when Float
|
55
|
+
"%*f" % [[width, FLOAT_N_DIGITS].max, value]
|
56
|
+
when Integer
|
57
|
+
"%*d" % [width, value]
|
58
|
+
when Hash
|
59
|
+
formatted_values = data_type.fields.collect do |field|
|
60
|
+
field_name = field.name
|
61
|
+
field_value_width = compute_field_value_width(field, @sample_values)
|
62
|
+
formatted_name = format_value(field_name, 0)
|
63
|
+
formatted_value = format_value(value[field_name], field_value_width)
|
64
|
+
"#{formatted_name}: #{formatted_value}"
|
65
|
+
end
|
66
|
+
formatted = "{"
|
67
|
+
formatted << formatted_values.join(", ")
|
68
|
+
formatted << "}"
|
69
|
+
"%-*s" % [width, formatted]
|
70
|
+
when nil
|
71
|
+
"%*s" % [width, FORMATTED_NULL]
|
72
|
+
else
|
73
|
+
"%-*s" % [width, value.to_s]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
def compute_field_value_width(field, sample_values)
|
79
|
+
unless @field_value_widths.key?(field)
|
80
|
+
field_name = field.name
|
81
|
+
field_sample_values = sample_values.collect do |v|
|
82
|
+
(v || {})[field_name]
|
83
|
+
end
|
84
|
+
field_aligned_name = format_aligned_name("",
|
85
|
+
field.data_type,
|
86
|
+
field_sample_values)
|
87
|
+
@field_value_widths[field] = field_aligned_name.size
|
88
|
+
end
|
89
|
+
@field_value_widths[field]
|
90
|
+
end
|
91
|
+
|
92
|
+
def format_aligned_name(name, data_type, sample_values)
|
93
|
+
case data_type
|
94
|
+
when TimestampDataType
|
95
|
+
"%*s" % [::Time.now.iso8601.size, name]
|
96
|
+
when IntegerDataType
|
97
|
+
have_null = false
|
98
|
+
have_negative = false
|
99
|
+
max_value = nil
|
100
|
+
sample_values.each do |value|
|
101
|
+
if value.nil?
|
102
|
+
have_null = true
|
103
|
+
else
|
104
|
+
if max_value.nil?
|
105
|
+
max_value = value.abs
|
106
|
+
else
|
107
|
+
max_value = [value.abs, max_value].max
|
108
|
+
end
|
109
|
+
have_negative = true if value.negative?
|
110
|
+
end
|
111
|
+
end
|
112
|
+
if max_value.nil?
|
113
|
+
width = 0
|
114
|
+
elsif max_value.zero?
|
115
|
+
width = 1
|
116
|
+
else
|
117
|
+
width = (Math.log10(max_value) + 1).truncate
|
118
|
+
end
|
119
|
+
width += 1 if have_negative # Need "-"
|
120
|
+
width = [width, FORMATTED_NULL.size].max if have_null
|
121
|
+
"%*s" % [width, name]
|
122
|
+
when FloatDataType, DoubleDataType
|
123
|
+
"%*s" % [FLOAT_N_DIGITS, name]
|
124
|
+
when StructDataType
|
125
|
+
field_widths = data_type.fields.collect do |field|
|
126
|
+
field_value_width = compute_field_value_width(field, sample_values)
|
127
|
+
field.name.size + ": ".size + field_value_width
|
128
|
+
end
|
129
|
+
width = "{}".size + field_widths.sum
|
130
|
+
if field_widths.size > 0
|
131
|
+
width += (", ".size * (field_widths.size - 1))
|
132
|
+
end
|
133
|
+
"%*s" % [width, name]
|
134
|
+
else
|
135
|
+
name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
21
140
|
def initialize(table, options={})
|
22
141
|
@table = table
|
23
142
|
@options = options
|
@@ -25,38 +144,43 @@ module Arrow
|
|
25
144
|
|
26
145
|
def format
|
27
146
|
text = ""
|
28
|
-
columns = @table.columns
|
29
|
-
format_header(text, columns)
|
30
|
-
|
31
147
|
n_rows = @table.n_rows
|
32
|
-
return text if n_rows.zero?
|
33
|
-
|
34
148
|
border = @options[:border] || 10
|
35
|
-
|
149
|
+
|
36
150
|
head_limit = [border, n_rows].min
|
37
|
-
|
38
|
-
|
151
|
+
|
152
|
+
tail_start = [border, n_rows - border].max
|
153
|
+
tail_limit = n_rows - tail_start
|
154
|
+
|
155
|
+
column_formatters = @table.columns.collect do |column|
|
156
|
+
head_values = column.each.take(head_limit)
|
157
|
+
if tail_limit > 0
|
158
|
+
tail_values = column.reverse_each.take(tail_limit).reverse
|
159
|
+
else
|
160
|
+
tail_values = []
|
161
|
+
end
|
162
|
+
ColumnFormatter.new(column, head_values, tail_values)
|
39
163
|
end
|
164
|
+
|
165
|
+
format_header(text, column_formatters)
|
166
|
+
return text if n_rows.zero?
|
167
|
+
|
168
|
+
n_digits = (Math.log10(n_rows) + 1).truncate
|
40
169
|
format_rows(text,
|
41
|
-
|
42
|
-
|
170
|
+
column_formatters,
|
171
|
+
column_formatters.collect(&:head_values).transpose,
|
43
172
|
n_digits,
|
44
173
|
0)
|
45
174
|
return text if n_rows <= border
|
46
175
|
|
47
|
-
tail_start = [border, n_rows - border].max
|
48
|
-
tail_limit = n_rows - tail_start
|
49
|
-
tail_column_values = columns.collect do |column|
|
50
|
-
column.reverse_each.take(tail_limit).reverse
|
51
|
-
end
|
52
176
|
|
53
177
|
if head_limit != tail_start
|
54
178
|
format_ellipsis(text)
|
55
179
|
end
|
56
180
|
|
57
181
|
format_rows(text,
|
58
|
-
|
59
|
-
|
182
|
+
column_formatters,
|
183
|
+
column_formatters.collect(&:tail_values).transpose,
|
60
184
|
n_digits,
|
61
185
|
tail_start)
|
62
186
|
|
@@ -22,12 +22,14 @@ module Arrow
|
|
22
22
|
def format_header(text, columns)
|
23
23
|
end
|
24
24
|
|
25
|
-
def format_rows(text,
|
25
|
+
def format_rows(text, column_formatters, rows, n_digits, start_offset)
|
26
26
|
rows.each_with_index do |row, nth_row|
|
27
27
|
text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
|
28
28
|
row.each_with_index do |column_value, nth_column|
|
29
|
-
|
30
|
-
|
29
|
+
column_formatter = column_formatters[nth_column]
|
30
|
+
formatted_name = column_formatter.name
|
31
|
+
formatted_value = column_formatter.format_value(column_value)
|
32
|
+
text << "#{formatted_name}: #{formatted_value}\n"
|
31
33
|
end
|
32
34
|
end
|
33
35
|
end
|
data/lib/arrow/table-loader.rb
CHANGED
@@ -15,6 +15,8 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
+
require "open-uri"
|
19
|
+
|
18
20
|
module Arrow
|
19
21
|
class TableLoader
|
20
22
|
class << self
|
@@ -31,6 +33,48 @@ module Arrow
|
|
31
33
|
end
|
32
34
|
|
33
35
|
def load
|
36
|
+
if @input.is_a?(URI)
|
37
|
+
custom_load_method_candidates = []
|
38
|
+
if @input.scheme
|
39
|
+
custom_load_method_candidates << "load_from_uri_#{@input.scheme}"
|
40
|
+
end
|
41
|
+
custom_load_method_candidates << "load_from_uri"
|
42
|
+
elsif @input.is_a?(String) and ::File.directory?(@input)
|
43
|
+
custom_load_method_candidates = ["load_from_directory"]
|
44
|
+
else
|
45
|
+
custom_load_method_candidates = ["load_from_file"]
|
46
|
+
end
|
47
|
+
custom_load_method_candidates.each do |custom_load_method|
|
48
|
+
next unless respond_to?(custom_load_method, true)
|
49
|
+
return __send__(custom_load_method)
|
50
|
+
end
|
51
|
+
available_schemes = []
|
52
|
+
(methods(true) | private_methods(true)).each do |name|
|
53
|
+
match_data = /\Aload_from_/.match(name.to_s)
|
54
|
+
if match_data
|
55
|
+
available_schemes << match_data.post_match
|
56
|
+
end
|
57
|
+
end
|
58
|
+
message = "Arrow::Table load source must be one of ["
|
59
|
+
message << available_schemes.join(", ")
|
60
|
+
message << "]: #{@input.inspect}"
|
61
|
+
raise ArgumentError, message
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
def load_from_uri_http
|
66
|
+
load_by_reader
|
67
|
+
end
|
68
|
+
|
69
|
+
def load_from_uri_https
|
70
|
+
load_by_reader
|
71
|
+
end
|
72
|
+
|
73
|
+
def load_from_file
|
74
|
+
load_by_reader
|
75
|
+
end
|
76
|
+
|
77
|
+
def load_by_reader
|
34
78
|
format = @options[:format]
|
35
79
|
custom_load_method = "load_as_#{format}"
|
36
80
|
unless respond_to?(custom_load_method, true)
|
@@ -56,21 +100,24 @@ module Arrow
|
|
56
100
|
end
|
57
101
|
end
|
58
102
|
|
59
|
-
private
|
60
103
|
def fill_options
|
61
104
|
if @options[:format] and @options.key?(:compression)
|
62
105
|
return
|
63
106
|
end
|
64
107
|
|
65
|
-
|
108
|
+
case @input
|
109
|
+
when Buffer
|
66
110
|
info = {}
|
111
|
+
when URI
|
112
|
+
extension = PathExtension.new(@input.path)
|
113
|
+
info = extension.extract
|
67
114
|
else
|
68
115
|
extension = PathExtension.new(@input)
|
69
116
|
info = extension.extract
|
70
117
|
end
|
71
118
|
format = info[:format]
|
72
119
|
@options = @options.dup
|
73
|
-
if format
|
120
|
+
if format
|
74
121
|
@options[:format] ||= format.to_sym
|
75
122
|
else
|
76
123
|
@options[:format] ||= :arrow
|
@@ -81,10 +128,29 @@ module Arrow
|
|
81
128
|
end
|
82
129
|
|
83
130
|
def open_input_stream
|
84
|
-
|
85
|
-
|
131
|
+
case @input
|
132
|
+
when Buffer
|
133
|
+
yield(BufferInputStream.new(@input))
|
134
|
+
when URI
|
135
|
+
@input.open do |ruby_input|
|
136
|
+
case @options[:format]
|
137
|
+
when :stream, :arrow_streaming
|
138
|
+
Gio::RubyInputStream.open(ruby_input) do |gio_input|
|
139
|
+
GIOInputStream.open(gio_input) do |input|
|
140
|
+
yield(input)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
else
|
144
|
+
# TODO: We need to consider Ruby's GVL carefully to use
|
145
|
+
# Ruby object directly for input with other formats. We
|
146
|
+
# read data and use it as Buffer for now.
|
147
|
+
data = GLib::Bytes.new(ruby_input.read.freeze)
|
148
|
+
buffer = Buffer.new(data)
|
149
|
+
yield(BufferInputStream.new(buffer))
|
150
|
+
end
|
151
|
+
end
|
86
152
|
else
|
87
|
-
MemoryMappedInputStream.new(@input)
|
153
|
+
yield(MemoryMappedInputStream.new(@input))
|
88
154
|
end
|
89
155
|
end
|
90
156
|
|
@@ -100,32 +166,19 @@ module Arrow
|
|
100
166
|
end
|
101
167
|
|
102
168
|
def load_as_arrow
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
RecordBatchFileReader,
|
108
|
-
RecordBatchStreamReader,
|
109
|
-
]
|
110
|
-
reader_class_candidates.each do |reader_class_candidate|
|
111
|
-
input = open_input_stream
|
112
|
-
begin
|
113
|
-
reader = reader_class_candidate.new(input)
|
114
|
-
rescue Arrow::Error
|
115
|
-
error = $!
|
116
|
-
else
|
117
|
-
break
|
118
|
-
end
|
169
|
+
begin
|
170
|
+
load_as_arrow_file
|
171
|
+
rescue
|
172
|
+
load_as_arrows
|
119
173
|
end
|
120
|
-
raise error if reader.nil?
|
121
|
-
load_raw(input, reader)
|
122
174
|
end
|
123
175
|
|
124
176
|
# @since 1.0.0
|
125
177
|
def load_as_arrow_file
|
126
|
-
|
127
|
-
|
128
|
-
|
178
|
+
open_input_stream do |input|
|
179
|
+
reader = RecordBatchFileReader.new(input)
|
180
|
+
load_raw(input, reader)
|
181
|
+
end
|
129
182
|
end
|
130
183
|
|
131
184
|
# @deprecated Use `format: :arrow_file` instead.
|
@@ -133,34 +186,46 @@ module Arrow
|
|
133
186
|
load_as_arrow_file
|
134
187
|
end
|
135
188
|
|
189
|
+
# @since 7.0.0
|
190
|
+
def load_as_arrows
|
191
|
+
open_input_stream do |input|
|
192
|
+
reader = RecordBatchStreamReader.new(input)
|
193
|
+
load_raw(input, reader)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
136
197
|
# @since 1.0.0
|
137
198
|
def load_as_arrow_streaming
|
138
|
-
|
139
|
-
reader = RecordBatchStreamReader.new(input)
|
140
|
-
load_raw(input, reader)
|
199
|
+
load_as_arrows
|
141
200
|
end
|
142
201
|
|
143
202
|
# @deprecated Use `format: :arrow_streaming` instead.
|
144
203
|
def load_as_stream
|
145
|
-
|
204
|
+
load_as_arrows
|
146
205
|
end
|
147
206
|
|
148
207
|
if Arrow.const_defined?(:ORCFileReader)
|
149
208
|
def load_as_orc
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
209
|
+
open_input_stream do |input|
|
210
|
+
reader = ORCFileReader.new(input)
|
211
|
+
field_indexes = @options[:field_indexes]
|
212
|
+
reader.set_field_indexes(field_indexes) if field_indexes
|
213
|
+
table = reader.read_stripes
|
214
|
+
table.instance_variable_set(:@input, input)
|
215
|
+
table
|
216
|
+
end
|
157
217
|
end
|
158
218
|
end
|
159
219
|
|
160
220
|
def csv_load(options)
|
161
221
|
options.delete(:format)
|
162
|
-
|
222
|
+
case @input
|
223
|
+
when Buffer
|
163
224
|
CSVLoader.load(@input.data.to_s, **options)
|
225
|
+
when URI
|
226
|
+
@input.open do |input|
|
227
|
+
CSVLoader.load(input.read, **options)
|
228
|
+
end
|
164
229
|
else
|
165
230
|
CSVLoader.load(Pathname.new(@input), **options)
|
166
231
|
end
|
@@ -177,11 +242,21 @@ module Arrow
|
|
177
242
|
end
|
178
243
|
|
179
244
|
def load_as_feather
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
245
|
+
open_input_stream do |input|
|
246
|
+
reader = FeatherFileReader.new(input)
|
247
|
+
table = reader.read
|
248
|
+
table.instance_variable_set(:@input, input)
|
249
|
+
table
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def load_as_json
|
254
|
+
open_input_stream do |input|
|
255
|
+
reader = JSONReader.new(input)
|
256
|
+
table = reader.read
|
257
|
+
table.instance_variable_set(:@input, input)
|
258
|
+
table
|
259
|
+
end
|
185
260
|
end
|
186
261
|
end
|
187
262
|
end
|
data/lib/arrow/table-saver.rb
CHANGED
@@ -32,6 +32,29 @@ module Arrow
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def save
|
35
|
+
if @output.is_a?(URI)
|
36
|
+
custom_save_method = "save_to_uri"
|
37
|
+
else
|
38
|
+
custom_save_method = "save_to_file"
|
39
|
+
end
|
40
|
+
unless respond_to?(custom_save_method, true)
|
41
|
+
available_schemes = []
|
42
|
+
(methods(true) | private_methods(true)).each do |name|
|
43
|
+
match_data = /\Asave_to_/.match(name.to_s)
|
44
|
+
if match_data
|
45
|
+
available_schemes << match_data.post_match
|
46
|
+
end
|
47
|
+
end
|
48
|
+
message = "Arrow::Table save source must be one of ["
|
49
|
+
message << available_schemes.join(", ")
|
50
|
+
message << "]: #{@output.scheme.inspect}"
|
51
|
+
raise ArgumentError, message
|
52
|
+
end
|
53
|
+
__send__(custom_save_method)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
def save_to_file
|
35
58
|
format = @options[:format]
|
36
59
|
custom_save_method = "save_as_#{format}"
|
37
60
|
unless respond_to?(custom_save_method, true)
|
@@ -57,21 +80,24 @@ module Arrow
|
|
57
80
|
end
|
58
81
|
end
|
59
82
|
|
60
|
-
private
|
61
83
|
def fill_options
|
62
84
|
if @options[:format] and @options.key?(:compression)
|
63
85
|
return
|
64
86
|
end
|
65
87
|
|
66
|
-
|
88
|
+
case @output
|
89
|
+
when Buffer
|
67
90
|
info = {}
|
91
|
+
when URI
|
92
|
+
extension = PathExtension.new(@output.path)
|
93
|
+
info = extension.extract
|
68
94
|
else
|
69
95
|
extension = PathExtension.new(@output)
|
70
96
|
info = extension.extract
|
71
97
|
end
|
72
98
|
format = info[:format]
|
73
99
|
@options = @options.dup
|
74
|
-
if format
|
100
|
+
if format
|
75
101
|
@options[:format] ||= format.to_sym
|
76
102
|
else
|
77
103
|
@options[:format] ||= :arrow
|
@@ -125,14 +151,19 @@ module Arrow
|
|
125
151
|
save_as_arrow_file
|
126
152
|
end
|
127
153
|
|
154
|
+
# @since 7.0.0
|
155
|
+
def save_as_arrows
|
156
|
+
save_raw(RecordBatchStreamWriter)
|
157
|
+
end
|
158
|
+
|
128
159
|
# @since 1.0.0
|
129
160
|
def save_as_arrow_streaming
|
130
|
-
|
161
|
+
save_as_arrows
|
131
162
|
end
|
132
163
|
|
133
164
|
# @deprecated Use `format: :arrow_streaming` instead.
|
134
165
|
def save_as_stream
|
135
|
-
|
166
|
+
save_as_arrows
|
136
167
|
end
|
137
168
|
|
138
169
|
def csv_save(**options)
|
@@ -21,51 +21,27 @@ module Arrow
|
|
21
21
|
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
22
22
|
class TableTableFormatter < TableFormatter
|
23
23
|
private
|
24
|
-
def format_header(text,
|
25
|
-
|
24
|
+
def format_header(text, column_formatters)
|
25
|
+
column_formatters.each do |column_formatter|
|
26
26
|
text << "\t"
|
27
|
-
text <<
|
27
|
+
text << column_formatter.aligned_name
|
28
28
|
end
|
29
29
|
text << "\n"
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
def format_column_name(column)
|
34
|
-
case column.data_type
|
35
|
-
when TimestampDataType
|
36
|
-
"%*s" % [::Time.now.iso8601.size, column.name]
|
37
|
-
when FloatDataType, DoubleDataType
|
38
|
-
"%*s" % [FLOAT_N_DIGITS, column.name]
|
39
|
-
else
|
40
|
-
column.name
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def format_rows(text, columns, rows, n_digits, start_offset)
|
32
|
+
def format_rows(text, column_formatters, rows, n_digits, start_offset)
|
45
33
|
rows.each_with_index do |row, nth_row|
|
46
34
|
text << ("%*d" % [n_digits, start_offset + nth_row])
|
47
35
|
row.each_with_index do |column_value, nth_column|
|
48
36
|
text << "\t"
|
49
|
-
|
50
|
-
|
37
|
+
column_formatter = column_formatters[nth_column]
|
38
|
+
aligned_name = column_formatter.aligned_name
|
39
|
+
text << column_formatter.format_value(column_value, aligned_name.size)
|
51
40
|
end
|
52
41
|
text << "\n"
|
53
42
|
end
|
54
43
|
end
|
55
44
|
|
56
|
-
def format_column_value(column, value)
|
57
|
-
case value
|
58
|
-
when ::Time
|
59
|
-
value.iso8601
|
60
|
-
when Float
|
61
|
-
"%*f" % [[column.name.size, FLOAT_N_DIGITS].max, value]
|
62
|
-
when Integer
|
63
|
-
"%*d" % [column.name.size, value]
|
64
|
-
else
|
65
|
-
"%-*s" % [column.name.size, value.to_s]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
45
|
def format_ellipsis(text)
|
70
46
|
text << "...\n"
|
71
47
|
end
|