red-arrow 4.0.1 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +10 -0
- data/README.md +23 -0
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/array.rb +12 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/datum.rb +102 -0
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/function.rb +52 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +58 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/s3-global-options.rb +38 -0
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/sort-key.rb +61 -55
- data/lib/arrow/sort-options.rb +8 -8
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +119 -44
- data/lib/arrow/table-saver.rb +36 -5
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +112 -40
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -9
- data/test/helper.rb +3 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-array.rb +34 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-expression.rb +40 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +210 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +376 -56
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +117 -168
@@ -0,0 +1,36 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class TableConcatenateOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
options = new
|
26
|
+
value.each do |k, v|
|
27
|
+
options.public_send("#{k}=", value)
|
28
|
+
end
|
29
|
+
options
|
30
|
+
else
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -18,6 +18,125 @@
|
|
18
18
|
module Arrow
|
19
19
|
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
20
20
|
class TableFormatter
|
21
|
+
# @private
|
22
|
+
class ColumnFormatter
|
23
|
+
attr_reader :column
|
24
|
+
attr_reader :head_values
|
25
|
+
attr_reader :tail_values
|
26
|
+
attr_reader :sample_values
|
27
|
+
def initialize(column, head_values, tail_values)
|
28
|
+
@column = column
|
29
|
+
@head_values = head_values
|
30
|
+
@tail_values = tail_values
|
31
|
+
@sample_values = head_values + tail_values
|
32
|
+
@field_value_widths = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def data_type
|
36
|
+
@data_type ||= @column.data_type
|
37
|
+
end
|
38
|
+
|
39
|
+
def name
|
40
|
+
@name ||= @column.name
|
41
|
+
end
|
42
|
+
|
43
|
+
def aligned_name
|
44
|
+
@aligned_name ||= format_aligned_name(name, data_type, @sample_values)
|
45
|
+
end
|
46
|
+
|
47
|
+
FLOAT_N_DIGITS = 10
|
48
|
+
FORMATTED_NULL = "(null)"
|
49
|
+
|
50
|
+
def format_value(value, width=0)
|
51
|
+
case value
|
52
|
+
when ::Time
|
53
|
+
value.iso8601
|
54
|
+
when Float
|
55
|
+
"%*f" % [[width, FLOAT_N_DIGITS].max, value]
|
56
|
+
when Integer
|
57
|
+
"%*d" % [width, value]
|
58
|
+
when Hash
|
59
|
+
formatted_values = data_type.fields.collect do |field|
|
60
|
+
field_name = field.name
|
61
|
+
field_value_width = compute_field_value_width(field, @sample_values)
|
62
|
+
formatted_name = format_value(field_name, 0)
|
63
|
+
formatted_value = format_value(value[field_name], field_value_width)
|
64
|
+
"#{formatted_name}: #{formatted_value}"
|
65
|
+
end
|
66
|
+
formatted = "{"
|
67
|
+
formatted << formatted_values.join(", ")
|
68
|
+
formatted << "}"
|
69
|
+
"%-*s" % [width, formatted]
|
70
|
+
when nil
|
71
|
+
"%*s" % [width, FORMATTED_NULL]
|
72
|
+
else
|
73
|
+
"%-*s" % [width, value.to_s]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
def compute_field_value_width(field, sample_values)
|
79
|
+
unless @field_value_widths.key?(field)
|
80
|
+
field_name = field.name
|
81
|
+
field_sample_values = sample_values.collect do |v|
|
82
|
+
(v || {})[field_name]
|
83
|
+
end
|
84
|
+
field_aligned_name = format_aligned_name("",
|
85
|
+
field.data_type,
|
86
|
+
field_sample_values)
|
87
|
+
@field_value_widths[field] = field_aligned_name.size
|
88
|
+
end
|
89
|
+
@field_value_widths[field]
|
90
|
+
end
|
91
|
+
|
92
|
+
def format_aligned_name(name, data_type, sample_values)
|
93
|
+
case data_type
|
94
|
+
when TimestampDataType
|
95
|
+
"%*s" % [::Time.now.iso8601.size, name]
|
96
|
+
when IntegerDataType
|
97
|
+
have_null = false
|
98
|
+
have_negative = false
|
99
|
+
max_value = nil
|
100
|
+
sample_values.each do |value|
|
101
|
+
if value.nil?
|
102
|
+
have_null = true
|
103
|
+
else
|
104
|
+
if max_value.nil?
|
105
|
+
max_value = value.abs
|
106
|
+
else
|
107
|
+
max_value = [value.abs, max_value].max
|
108
|
+
end
|
109
|
+
have_negative = true if value.negative?
|
110
|
+
end
|
111
|
+
end
|
112
|
+
if max_value.nil?
|
113
|
+
width = 0
|
114
|
+
elsif max_value.zero?
|
115
|
+
width = 1
|
116
|
+
else
|
117
|
+
width = (Math.log10(max_value) + 1).truncate
|
118
|
+
end
|
119
|
+
width += 1 if have_negative # Need "-"
|
120
|
+
width = [width, FORMATTED_NULL.size].max if have_null
|
121
|
+
"%*s" % [width, name]
|
122
|
+
when FloatDataType, DoubleDataType
|
123
|
+
"%*s" % [FLOAT_N_DIGITS, name]
|
124
|
+
when StructDataType
|
125
|
+
field_widths = data_type.fields.collect do |field|
|
126
|
+
field_value_width = compute_field_value_width(field, sample_values)
|
127
|
+
field.name.size + ": ".size + field_value_width
|
128
|
+
end
|
129
|
+
width = "{}".size + field_widths.sum
|
130
|
+
if field_widths.size > 0
|
131
|
+
width += (", ".size * (field_widths.size - 1))
|
132
|
+
end
|
133
|
+
"%*s" % [width, name]
|
134
|
+
else
|
135
|
+
name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
21
140
|
def initialize(table, options={})
|
22
141
|
@table = table
|
23
142
|
@options = options
|
@@ -25,38 +144,43 @@ module Arrow
|
|
25
144
|
|
26
145
|
def format
|
27
146
|
text = ""
|
28
|
-
columns = @table.columns
|
29
|
-
format_header(text, columns)
|
30
|
-
|
31
147
|
n_rows = @table.n_rows
|
32
|
-
return text if n_rows.zero?
|
33
|
-
|
34
148
|
border = @options[:border] || 10
|
35
|
-
|
149
|
+
|
36
150
|
head_limit = [border, n_rows].min
|
37
|
-
|
38
|
-
|
151
|
+
|
152
|
+
tail_start = [border, n_rows - border].max
|
153
|
+
tail_limit = n_rows - tail_start
|
154
|
+
|
155
|
+
column_formatters = @table.columns.collect do |column|
|
156
|
+
head_values = column.each.take(head_limit)
|
157
|
+
if tail_limit > 0
|
158
|
+
tail_values = column.reverse_each.take(tail_limit).reverse
|
159
|
+
else
|
160
|
+
tail_values = []
|
161
|
+
end
|
162
|
+
ColumnFormatter.new(column, head_values, tail_values)
|
39
163
|
end
|
164
|
+
|
165
|
+
format_header(text, column_formatters)
|
166
|
+
return text if n_rows.zero?
|
167
|
+
|
168
|
+
n_digits = (Math.log10(n_rows) + 1).truncate
|
40
169
|
format_rows(text,
|
41
|
-
|
42
|
-
|
170
|
+
column_formatters,
|
171
|
+
column_formatters.collect(&:head_values).transpose,
|
43
172
|
n_digits,
|
44
173
|
0)
|
45
174
|
return text if n_rows <= border
|
46
175
|
|
47
|
-
tail_start = [border, n_rows - border].max
|
48
|
-
tail_limit = n_rows - tail_start
|
49
|
-
tail_column_values = columns.collect do |column|
|
50
|
-
column.reverse_each.take(tail_limit).reverse
|
51
|
-
end
|
52
176
|
|
53
177
|
if head_limit != tail_start
|
54
178
|
format_ellipsis(text)
|
55
179
|
end
|
56
180
|
|
57
181
|
format_rows(text,
|
58
|
-
|
59
|
-
|
182
|
+
column_formatters,
|
183
|
+
column_formatters.collect(&:tail_values).transpose,
|
60
184
|
n_digits,
|
61
185
|
tail_start)
|
62
186
|
|
@@ -22,12 +22,14 @@ module Arrow
|
|
22
22
|
def format_header(text, columns)
|
23
23
|
end
|
24
24
|
|
25
|
-
def format_rows(text,
|
25
|
+
def format_rows(text, column_formatters, rows, n_digits, start_offset)
|
26
26
|
rows.each_with_index do |row, nth_row|
|
27
27
|
text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
|
28
28
|
row.each_with_index do |column_value, nth_column|
|
29
|
-
|
30
|
-
|
29
|
+
column_formatter = column_formatters[nth_column]
|
30
|
+
formatted_name = column_formatter.name
|
31
|
+
formatted_value = column_formatter.format_value(column_value)
|
32
|
+
text << "#{formatted_name}: #{formatted_value}\n"
|
31
33
|
end
|
32
34
|
end
|
33
35
|
end
|
data/lib/arrow/table-loader.rb
CHANGED
@@ -15,6 +15,8 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
+
require "open-uri"
|
19
|
+
|
18
20
|
module Arrow
|
19
21
|
class TableLoader
|
20
22
|
class << self
|
@@ -31,6 +33,48 @@ module Arrow
|
|
31
33
|
end
|
32
34
|
|
33
35
|
def load
|
36
|
+
if @input.is_a?(URI)
|
37
|
+
custom_load_method_candidates = []
|
38
|
+
if @input.scheme
|
39
|
+
custom_load_method_candidates << "load_from_uri_#{@input.scheme}"
|
40
|
+
end
|
41
|
+
custom_load_method_candidates << "load_from_uri"
|
42
|
+
elsif @input.is_a?(String) and ::File.directory?(@input)
|
43
|
+
custom_load_method_candidates = ["load_from_directory"]
|
44
|
+
else
|
45
|
+
custom_load_method_candidates = ["load_from_file"]
|
46
|
+
end
|
47
|
+
custom_load_method_candidates.each do |custom_load_method|
|
48
|
+
next unless respond_to?(custom_load_method, true)
|
49
|
+
return __send__(custom_load_method)
|
50
|
+
end
|
51
|
+
available_schemes = []
|
52
|
+
(methods(true) | private_methods(true)).each do |name|
|
53
|
+
match_data = /\Aload_from_/.match(name.to_s)
|
54
|
+
if match_data
|
55
|
+
available_schemes << match_data.post_match
|
56
|
+
end
|
57
|
+
end
|
58
|
+
message = "Arrow::Table load source must be one of ["
|
59
|
+
message << available_schemes.join(", ")
|
60
|
+
message << "]: #{@input.inspect}"
|
61
|
+
raise ArgumentError, message
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
def load_from_uri_http
|
66
|
+
load_by_reader
|
67
|
+
end
|
68
|
+
|
69
|
+
def load_from_uri_https
|
70
|
+
load_by_reader
|
71
|
+
end
|
72
|
+
|
73
|
+
def load_from_file
|
74
|
+
load_by_reader
|
75
|
+
end
|
76
|
+
|
77
|
+
def load_by_reader
|
34
78
|
format = @options[:format]
|
35
79
|
custom_load_method = "load_as_#{format}"
|
36
80
|
unless respond_to?(custom_load_method, true)
|
@@ -56,21 +100,24 @@ module Arrow
|
|
56
100
|
end
|
57
101
|
end
|
58
102
|
|
59
|
-
private
|
60
103
|
def fill_options
|
61
104
|
if @options[:format] and @options.key?(:compression)
|
62
105
|
return
|
63
106
|
end
|
64
107
|
|
65
|
-
|
108
|
+
case @input
|
109
|
+
when Buffer
|
66
110
|
info = {}
|
111
|
+
when URI
|
112
|
+
extension = PathExtension.new(@input.path)
|
113
|
+
info = extension.extract
|
67
114
|
else
|
68
115
|
extension = PathExtension.new(@input)
|
69
116
|
info = extension.extract
|
70
117
|
end
|
71
118
|
format = info[:format]
|
72
119
|
@options = @options.dup
|
73
|
-
if format
|
120
|
+
if format
|
74
121
|
@options[:format] ||= format.to_sym
|
75
122
|
else
|
76
123
|
@options[:format] ||= :arrow
|
@@ -81,10 +128,29 @@ module Arrow
|
|
81
128
|
end
|
82
129
|
|
83
130
|
def open_input_stream
|
84
|
-
|
85
|
-
|
131
|
+
case @input
|
132
|
+
when Buffer
|
133
|
+
yield(BufferInputStream.new(@input))
|
134
|
+
when URI
|
135
|
+
@input.open do |ruby_input|
|
136
|
+
case @options[:format]
|
137
|
+
when :stream, :arrow_streaming
|
138
|
+
Gio::RubyInputStream.open(ruby_input) do |gio_input|
|
139
|
+
GIOInputStream.open(gio_input) do |input|
|
140
|
+
yield(input)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
else
|
144
|
+
# TODO: We need to consider Ruby's GVL carefully to use
|
145
|
+
# Ruby object directly for input with other formats. We
|
146
|
+
# read data and use it as Buffer for now.
|
147
|
+
data = GLib::Bytes.new(ruby_input.read.freeze)
|
148
|
+
buffer = Buffer.new(data)
|
149
|
+
yield(BufferInputStream.new(buffer))
|
150
|
+
end
|
151
|
+
end
|
86
152
|
else
|
87
|
-
MemoryMappedInputStream.new(@input)
|
153
|
+
yield(MemoryMappedInputStream.new(@input))
|
88
154
|
end
|
89
155
|
end
|
90
156
|
|
@@ -100,32 +166,19 @@ module Arrow
|
|
100
166
|
end
|
101
167
|
|
102
168
|
def load_as_arrow
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
RecordBatchFileReader,
|
108
|
-
RecordBatchStreamReader,
|
109
|
-
]
|
110
|
-
reader_class_candidates.each do |reader_class_candidate|
|
111
|
-
input = open_input_stream
|
112
|
-
begin
|
113
|
-
reader = reader_class_candidate.new(input)
|
114
|
-
rescue Arrow::Error
|
115
|
-
error = $!
|
116
|
-
else
|
117
|
-
break
|
118
|
-
end
|
169
|
+
begin
|
170
|
+
load_as_arrow_file
|
171
|
+
rescue
|
172
|
+
load_as_arrows
|
119
173
|
end
|
120
|
-
raise error if reader.nil?
|
121
|
-
load_raw(input, reader)
|
122
174
|
end
|
123
175
|
|
124
176
|
# @since 1.0.0
|
125
177
|
def load_as_arrow_file
|
126
|
-
|
127
|
-
|
128
|
-
|
178
|
+
open_input_stream do |input|
|
179
|
+
reader = RecordBatchFileReader.new(input)
|
180
|
+
load_raw(input, reader)
|
181
|
+
end
|
129
182
|
end
|
130
183
|
|
131
184
|
# @deprecated Use `format: :arrow_file` instead.
|
@@ -133,34 +186,46 @@ module Arrow
|
|
133
186
|
load_as_arrow_file
|
134
187
|
end
|
135
188
|
|
189
|
+
# @since 7.0.0
|
190
|
+
def load_as_arrows
|
191
|
+
open_input_stream do |input|
|
192
|
+
reader = RecordBatchStreamReader.new(input)
|
193
|
+
load_raw(input, reader)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
136
197
|
# @since 1.0.0
|
137
198
|
def load_as_arrow_streaming
|
138
|
-
|
139
|
-
reader = RecordBatchStreamReader.new(input)
|
140
|
-
load_raw(input, reader)
|
199
|
+
load_as_arrows
|
141
200
|
end
|
142
201
|
|
143
202
|
# @deprecated Use `format: :arrow_streaming` instead.
|
144
203
|
def load_as_stream
|
145
|
-
|
204
|
+
load_as_arrows
|
146
205
|
end
|
147
206
|
|
148
207
|
if Arrow.const_defined?(:ORCFileReader)
|
149
208
|
def load_as_orc
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
209
|
+
open_input_stream do |input|
|
210
|
+
reader = ORCFileReader.new(input)
|
211
|
+
field_indexes = @options[:field_indexes]
|
212
|
+
reader.set_field_indexes(field_indexes) if field_indexes
|
213
|
+
table = reader.read_stripes
|
214
|
+
table.instance_variable_set(:@input, input)
|
215
|
+
table
|
216
|
+
end
|
157
217
|
end
|
158
218
|
end
|
159
219
|
|
160
220
|
def csv_load(options)
|
161
221
|
options.delete(:format)
|
162
|
-
|
222
|
+
case @input
|
223
|
+
when Buffer
|
163
224
|
CSVLoader.load(@input.data.to_s, **options)
|
225
|
+
when URI
|
226
|
+
@input.open do |input|
|
227
|
+
CSVLoader.load(input.read, **options)
|
228
|
+
end
|
164
229
|
else
|
165
230
|
CSVLoader.load(Pathname.new(@input), **options)
|
166
231
|
end
|
@@ -177,11 +242,21 @@ module Arrow
|
|
177
242
|
end
|
178
243
|
|
179
244
|
def load_as_feather
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
245
|
+
open_input_stream do |input|
|
246
|
+
reader = FeatherFileReader.new(input)
|
247
|
+
table = reader.read
|
248
|
+
table.instance_variable_set(:@input, input)
|
249
|
+
table
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def load_as_json
|
254
|
+
open_input_stream do |input|
|
255
|
+
reader = JSONReader.new(input)
|
256
|
+
table = reader.read
|
257
|
+
table.instance_variable_set(:@input, input)
|
258
|
+
table
|
259
|
+
end
|
185
260
|
end
|
186
261
|
end
|
187
262
|
end
|
data/lib/arrow/table-saver.rb
CHANGED
@@ -32,6 +32,29 @@ module Arrow
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def save
|
35
|
+
if @output.is_a?(URI)
|
36
|
+
custom_save_method = "save_to_uri"
|
37
|
+
else
|
38
|
+
custom_save_method = "save_to_file"
|
39
|
+
end
|
40
|
+
unless respond_to?(custom_save_method, true)
|
41
|
+
available_schemes = []
|
42
|
+
(methods(true) | private_methods(true)).each do |name|
|
43
|
+
match_data = /\Asave_to_/.match(name.to_s)
|
44
|
+
if match_data
|
45
|
+
available_schemes << match_data.post_match
|
46
|
+
end
|
47
|
+
end
|
48
|
+
message = "Arrow::Table save source must be one of ["
|
49
|
+
message << available_schemes.join(", ")
|
50
|
+
message << "]: #{@output.scheme.inspect}"
|
51
|
+
raise ArgumentError, message
|
52
|
+
end
|
53
|
+
__send__(custom_save_method)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
def save_to_file
|
35
58
|
format = @options[:format]
|
36
59
|
custom_save_method = "save_as_#{format}"
|
37
60
|
unless respond_to?(custom_save_method, true)
|
@@ -57,21 +80,24 @@ module Arrow
|
|
57
80
|
end
|
58
81
|
end
|
59
82
|
|
60
|
-
private
|
61
83
|
def fill_options
|
62
84
|
if @options[:format] and @options.key?(:compression)
|
63
85
|
return
|
64
86
|
end
|
65
87
|
|
66
|
-
|
88
|
+
case @output
|
89
|
+
when Buffer
|
67
90
|
info = {}
|
91
|
+
when URI
|
92
|
+
extension = PathExtension.new(@output.path)
|
93
|
+
info = extension.extract
|
68
94
|
else
|
69
95
|
extension = PathExtension.new(@output)
|
70
96
|
info = extension.extract
|
71
97
|
end
|
72
98
|
format = info[:format]
|
73
99
|
@options = @options.dup
|
74
|
-
if format
|
100
|
+
if format
|
75
101
|
@options[:format] ||= format.to_sym
|
76
102
|
else
|
77
103
|
@options[:format] ||= :arrow
|
@@ -125,14 +151,19 @@ module Arrow
|
|
125
151
|
save_as_arrow_file
|
126
152
|
end
|
127
153
|
|
154
|
+
# @since 7.0.0
|
155
|
+
def save_as_arrows
|
156
|
+
save_raw(RecordBatchStreamWriter)
|
157
|
+
end
|
158
|
+
|
128
159
|
# @since 1.0.0
|
129
160
|
def save_as_arrow_streaming
|
130
|
-
|
161
|
+
save_as_arrows
|
131
162
|
end
|
132
163
|
|
133
164
|
# @deprecated Use `format: :arrow_streaming` instead.
|
134
165
|
def save_as_stream
|
135
|
-
|
166
|
+
save_as_arrows
|
136
167
|
end
|
137
168
|
|
138
169
|
def csv_save(**options)
|
@@ -21,51 +21,27 @@ module Arrow
|
|
21
21
|
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
22
22
|
class TableTableFormatter < TableFormatter
|
23
23
|
private
|
24
|
-
def format_header(text,
|
25
|
-
|
24
|
+
def format_header(text, column_formatters)
|
25
|
+
column_formatters.each do |column_formatter|
|
26
26
|
text << "\t"
|
27
|
-
text <<
|
27
|
+
text << column_formatter.aligned_name
|
28
28
|
end
|
29
29
|
text << "\n"
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
def format_column_name(column)
|
34
|
-
case column.data_type
|
35
|
-
when TimestampDataType
|
36
|
-
"%*s" % [::Time.now.iso8601.size, column.name]
|
37
|
-
when FloatDataType, DoubleDataType
|
38
|
-
"%*s" % [FLOAT_N_DIGITS, column.name]
|
39
|
-
else
|
40
|
-
column.name
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def format_rows(text, columns, rows, n_digits, start_offset)
|
32
|
+
def format_rows(text, column_formatters, rows, n_digits, start_offset)
|
45
33
|
rows.each_with_index do |row, nth_row|
|
46
34
|
text << ("%*d" % [n_digits, start_offset + nth_row])
|
47
35
|
row.each_with_index do |column_value, nth_column|
|
48
36
|
text << "\t"
|
49
|
-
|
50
|
-
|
37
|
+
column_formatter = column_formatters[nth_column]
|
38
|
+
aligned_name = column_formatter.aligned_name
|
39
|
+
text << column_formatter.format_value(column_value, aligned_name.size)
|
51
40
|
end
|
52
41
|
text << "\n"
|
53
42
|
end
|
54
43
|
end
|
55
44
|
|
56
|
-
def format_column_value(column, value)
|
57
|
-
case value
|
58
|
-
when ::Time
|
59
|
-
value.iso8601
|
60
|
-
when Float
|
61
|
-
"%*f" % [[column.name.size, FLOAT_N_DIGITS].max, value]
|
62
|
-
when Integer
|
63
|
-
"%*d" % [column.name.size, value]
|
64
|
-
else
|
65
|
-
"%-*s" % [column.name.size, value.to_s]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
45
|
def format_ellipsis(text)
|
70
46
|
text << "...\n"
|
71
47
|
end
|