red-arrow 3.0.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/array.rb +130 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/data-type.rb +14 -5
- data/lib/arrow/datum.rb +100 -0
- data/lib/arrow/dense-union-data-type.rb +2 -2
- data/lib/arrow/dictionary-data-type.rb +2 -2
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +46 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/sort-key.rb +193 -0
- data/lib/arrow/sort-options.rb +109 -0
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/sparse-union-data-type.rb +2 -2
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +41 -3
- data/lib/arrow/table-saver.rb +29 -3
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +34 -40
- data/lib/arrow/time32-data-type.rb +2 -2
- data/lib/arrow/time64-data-type.rb +2 -2
- data/lib/arrow/timestamp-data-type.rb +2 -2
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +2 -1
- data/test/helper.rb +1 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-array.rb +154 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-decimal128-data-type.rb +2 -2
- data/test/test-expression.rb +40 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +176 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-orc.rb +19 -23
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-sort-indices.rb +40 -0
- data/test/test-sort-key.rb +81 -0
- data/test/test-sort-options.rb +58 -0
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +190 -53
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +73 -6
@@ -0,0 +1,32 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SourceNodeOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when RecordBatchReader, RecordBatch, Table
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -33,7 +33,7 @@ module Arrow
|
|
33
33
|
# @param type_codes [::Array<Integer>] The IDs that indicates
|
34
34
|
# corresponding fields.
|
35
35
|
#
|
36
|
-
# @example Create a sparse union data type for {2: visible, 9: count}
|
36
|
+
# @example Create a sparse union data type for `{2: visible, 9: count}`
|
37
37
|
# fields = [
|
38
38
|
# Arrow::Field.new("visible", :boolean),
|
39
39
|
# {
|
@@ -57,7 +57,7 @@ module Arrow
|
|
57
57
|
# @option description [::Array<Integer>] :type_codes The IDs
|
58
58
|
# that indicates corresponding fields.
|
59
59
|
#
|
60
|
-
# @example Create a sparse union data type for {2: visible, 9: count}
|
60
|
+
# @example Create a sparse union data type for `{2: visible, 9: count}`
|
61
61
|
# fields = [
|
62
62
|
# Arrow::Field.new("visible", :boolean),
|
63
63
|
# {
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class StringDictionaryArrayBuilder
|
20
|
+
include SymbolValuesAppendable
|
21
|
+
|
22
|
+
private
|
23
|
+
def create_values_array_builder
|
24
|
+
StringArrayBuilder.new
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module SymbolValuesAppendable
|
20
|
+
def append_values(values, is_valids=nil)
|
21
|
+
builder = create_values_array_builder
|
22
|
+
values = values.collect do |value|
|
23
|
+
case value
|
24
|
+
when Symbol
|
25
|
+
value.to_s
|
26
|
+
else
|
27
|
+
value
|
28
|
+
end
|
29
|
+
end
|
30
|
+
builder.append_values(values, is_valids)
|
31
|
+
append_array(builder.finish)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class TableConcatenateOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
options = new
|
26
|
+
value.each do |k, v|
|
27
|
+
options.public_send("#{k}=", value)
|
28
|
+
end
|
29
|
+
options
|
30
|
+
else
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -18,6 +18,125 @@
|
|
18
18
|
module Arrow
|
19
19
|
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
20
20
|
class TableFormatter
|
21
|
+
# @private
|
22
|
+
class ColumnFormatter
|
23
|
+
attr_reader :column
|
24
|
+
attr_reader :head_values
|
25
|
+
attr_reader :tail_values
|
26
|
+
attr_reader :sample_values
|
27
|
+
def initialize(column, head_values, tail_values)
|
28
|
+
@column = column
|
29
|
+
@head_values = head_values
|
30
|
+
@tail_values = tail_values
|
31
|
+
@sample_values = head_values + tail_values
|
32
|
+
@field_value_widths = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def data_type
|
36
|
+
@data_type ||= @column.data_type
|
37
|
+
end
|
38
|
+
|
39
|
+
def name
|
40
|
+
@name ||= @column.name
|
41
|
+
end
|
42
|
+
|
43
|
+
def aligned_name
|
44
|
+
@aligned_name ||= format_aligned_name(name, data_type, @sample_values)
|
45
|
+
end
|
46
|
+
|
47
|
+
FLOAT_N_DIGITS = 10
|
48
|
+
FORMATTED_NULL = "(null)"
|
49
|
+
|
50
|
+
def format_value(value, width=0)
|
51
|
+
case value
|
52
|
+
when ::Time
|
53
|
+
value.iso8601
|
54
|
+
when Float
|
55
|
+
"%*f" % [[width, FLOAT_N_DIGITS].max, value]
|
56
|
+
when Integer
|
57
|
+
"%*d" % [width, value]
|
58
|
+
when Hash
|
59
|
+
formatted_values = data_type.fields.collect do |field|
|
60
|
+
field_name = field.name
|
61
|
+
field_value_width = compute_field_value_width(field, @sample_values)
|
62
|
+
formatted_name = format_value(field_name, 0)
|
63
|
+
formatted_value = format_value(value[field_name], field_value_width)
|
64
|
+
"#{formatted_name}: #{formatted_value}"
|
65
|
+
end
|
66
|
+
formatted = "{"
|
67
|
+
formatted << formatted_values.join(", ")
|
68
|
+
formatted << "}"
|
69
|
+
"%-*s" % [width, formatted]
|
70
|
+
when nil
|
71
|
+
"%*s" % [width, FORMATTED_NULL]
|
72
|
+
else
|
73
|
+
"%-*s" % [width, value.to_s]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
def compute_field_value_width(field, sample_values)
|
79
|
+
unless @field_value_widths.key?(field)
|
80
|
+
field_name = field.name
|
81
|
+
field_sample_values = sample_values.collect do |v|
|
82
|
+
(v || {})[field_name]
|
83
|
+
end
|
84
|
+
field_aligned_name = format_aligned_name("",
|
85
|
+
field.data_type,
|
86
|
+
field_sample_values)
|
87
|
+
@field_value_widths[field] = field_aligned_name.size
|
88
|
+
end
|
89
|
+
@field_value_widths[field]
|
90
|
+
end
|
91
|
+
|
92
|
+
def format_aligned_name(name, data_type, sample_values)
|
93
|
+
case data_type
|
94
|
+
when TimestampDataType
|
95
|
+
"%*s" % [::Time.now.iso8601.size, name]
|
96
|
+
when IntegerDataType
|
97
|
+
have_null = false
|
98
|
+
have_negative = false
|
99
|
+
max_value = nil
|
100
|
+
sample_values.each do |value|
|
101
|
+
if value.nil?
|
102
|
+
have_null = true
|
103
|
+
else
|
104
|
+
if max_value.nil?
|
105
|
+
max_value = value.abs
|
106
|
+
else
|
107
|
+
max_value = [value.abs, max_value].max
|
108
|
+
end
|
109
|
+
have_negative = true if value.negative?
|
110
|
+
end
|
111
|
+
end
|
112
|
+
if max_value.nil?
|
113
|
+
width = 0
|
114
|
+
elsif max_value.zero?
|
115
|
+
width = 1
|
116
|
+
else
|
117
|
+
width = (Math.log10(max_value) + 1).truncate
|
118
|
+
end
|
119
|
+
width += 1 if have_negative # Need "-"
|
120
|
+
width = [width, FORMATTED_NULL.size].max if have_null
|
121
|
+
"%*s" % [width, name]
|
122
|
+
when FloatDataType, DoubleDataType
|
123
|
+
"%*s" % [FLOAT_N_DIGITS, name]
|
124
|
+
when StructDataType
|
125
|
+
field_widths = data_type.fields.collect do |field|
|
126
|
+
field_value_width = compute_field_value_width(field, sample_values)
|
127
|
+
field.name.size + ": ".size + field_value_width
|
128
|
+
end
|
129
|
+
width = "{}".size + field_widths.sum
|
130
|
+
if field_widths.size > 0
|
131
|
+
width += (", ".size * (field_widths.size - 1))
|
132
|
+
end
|
133
|
+
"%*s" % [width, name]
|
134
|
+
else
|
135
|
+
name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
21
140
|
def initialize(table, options={})
|
22
141
|
@table = table
|
23
142
|
@options = options
|
@@ -25,38 +144,43 @@ module Arrow
|
|
25
144
|
|
26
145
|
def format
|
27
146
|
text = ""
|
28
|
-
columns = @table.columns
|
29
|
-
format_header(text, columns)
|
30
|
-
|
31
147
|
n_rows = @table.n_rows
|
32
|
-
return text if n_rows.zero?
|
33
|
-
|
34
148
|
border = @options[:border] || 10
|
35
|
-
|
149
|
+
|
36
150
|
head_limit = [border, n_rows].min
|
37
|
-
|
38
|
-
|
151
|
+
|
152
|
+
tail_start = [border, n_rows - border].max
|
153
|
+
tail_limit = n_rows - tail_start
|
154
|
+
|
155
|
+
column_formatters = @table.columns.collect do |column|
|
156
|
+
head_values = column.each.take(head_limit)
|
157
|
+
if tail_limit > 0
|
158
|
+
tail_values = column.reverse_each.take(tail_limit).reverse
|
159
|
+
else
|
160
|
+
tail_values = []
|
161
|
+
end
|
162
|
+
ColumnFormatter.new(column, head_values, tail_values)
|
39
163
|
end
|
164
|
+
|
165
|
+
format_header(text, column_formatters)
|
166
|
+
return text if n_rows.zero?
|
167
|
+
|
168
|
+
n_digits = (Math.log10(n_rows) + 1).truncate
|
40
169
|
format_rows(text,
|
41
|
-
|
42
|
-
|
170
|
+
column_formatters,
|
171
|
+
column_formatters.collect(&:head_values).transpose,
|
43
172
|
n_digits,
|
44
173
|
0)
|
45
174
|
return text if n_rows <= border
|
46
175
|
|
47
|
-
tail_start = [border, n_rows - border].max
|
48
|
-
tail_limit = n_rows - tail_start
|
49
|
-
tail_column_values = columns.collect do |column|
|
50
|
-
column.reverse_each.take(tail_limit).reverse
|
51
|
-
end
|
52
176
|
|
53
177
|
if head_limit != tail_start
|
54
178
|
format_ellipsis(text)
|
55
179
|
end
|
56
180
|
|
57
181
|
format_rows(text,
|
58
|
-
|
59
|
-
|
182
|
+
column_formatters,
|
183
|
+
column_formatters.collect(&:tail_values).transpose,
|
60
184
|
n_digits,
|
61
185
|
tail_start)
|
62
186
|
|
@@ -22,12 +22,14 @@ module Arrow
|
|
22
22
|
def format_header(text, columns)
|
23
23
|
end
|
24
24
|
|
25
|
-
def format_rows(text,
|
25
|
+
def format_rows(text, column_formatters, rows, n_digits, start_offset)
|
26
26
|
rows.each_with_index do |row, nth_row|
|
27
27
|
text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
|
28
28
|
row.each_with_index do |column_value, nth_column|
|
29
|
-
|
30
|
-
|
29
|
+
column_formatter = column_formatters[nth_column]
|
30
|
+
formatted_name = column_formatter.name
|
31
|
+
formatted_value = column_formatter.format_value(column_value)
|
32
|
+
text << "#{formatted_name}: #{formatted_value}\n"
|
31
33
|
end
|
32
34
|
end
|
33
35
|
end
|
data/lib/arrow/table-loader.rb
CHANGED
@@ -15,6 +15,8 @@
|
|
15
15
|
# specific language governing permissions and limitations
|
16
16
|
# under the License.
|
17
17
|
|
18
|
+
require "uri"
|
19
|
+
|
18
20
|
module Arrow
|
19
21
|
class TableLoader
|
20
22
|
class << self
|
@@ -31,6 +33,31 @@ module Arrow
|
|
31
33
|
end
|
32
34
|
|
33
35
|
def load
|
36
|
+
if @input.is_a?(URI)
|
37
|
+
custom_load_method = "load_from_uri"
|
38
|
+
elsif @input.is_a?(String) and ::File.directory?(@input)
|
39
|
+
custom_load_method = "load_from_directory"
|
40
|
+
else
|
41
|
+
custom_load_method = "load_from_file"
|
42
|
+
end
|
43
|
+
unless respond_to?(custom_load_method, true)
|
44
|
+
available_schemes = []
|
45
|
+
(methods(true) | private_methods(true)).each do |name|
|
46
|
+
match_data = /\Aload_from_/.match(name.to_s)
|
47
|
+
if match_data
|
48
|
+
available_schemes << match_data.post_match
|
49
|
+
end
|
50
|
+
end
|
51
|
+
message = "Arrow::Table load source must be one of ["
|
52
|
+
message << available_schemes.join(", ")
|
53
|
+
message << "]: #{@input.inspect}"
|
54
|
+
raise ArgumentError, message
|
55
|
+
end
|
56
|
+
__send__(custom_load_method)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
def load_from_file
|
34
61
|
format = @options[:format]
|
35
62
|
custom_load_method = "load_as_#{format}"
|
36
63
|
unless respond_to?(custom_load_method, true)
|
@@ -56,21 +83,24 @@ module Arrow
|
|
56
83
|
end
|
57
84
|
end
|
58
85
|
|
59
|
-
private
|
60
86
|
def fill_options
|
61
87
|
if @options[:format] and @options.key?(:compression)
|
62
88
|
return
|
63
89
|
end
|
64
90
|
|
65
|
-
|
91
|
+
case @input
|
92
|
+
when Buffer
|
66
93
|
info = {}
|
94
|
+
when URI
|
95
|
+
extension = PathExtension.new(@input.path)
|
96
|
+
info = extension.extract
|
67
97
|
else
|
68
98
|
extension = PathExtension.new(@input)
|
69
99
|
info = extension.extract
|
70
100
|
end
|
71
101
|
format = info[:format]
|
72
102
|
@options = @options.dup
|
73
|
-
if format
|
103
|
+
if format
|
74
104
|
@options[:format] ||= format.to_sym
|
75
105
|
else
|
76
106
|
@options[:format] ||= :arrow
|
@@ -183,5 +213,13 @@ module Arrow
|
|
183
213
|
table.instance_variable_set(:@input, input)
|
184
214
|
table
|
185
215
|
end
|
216
|
+
|
217
|
+
def load_as_json
|
218
|
+
input = open_input_stream
|
219
|
+
reader = JSONReader.new(input)
|
220
|
+
table = reader.read
|
221
|
+
table.instance_variable_set(:@input, input)
|
222
|
+
table
|
223
|
+
end
|
186
224
|
end
|
187
225
|
end
|
data/lib/arrow/table-saver.rb
CHANGED
@@ -32,6 +32,29 @@ module Arrow
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def save
|
35
|
+
if @output.is_a?(URI)
|
36
|
+
custom_save_method = "save_to_uri"
|
37
|
+
else
|
38
|
+
custom_save_method = "save_to_file"
|
39
|
+
end
|
40
|
+
unless respond_to?(custom_save_method, true)
|
41
|
+
available_schemes = []
|
42
|
+
(methods(true) | private_methods(true)).each do |name|
|
43
|
+
match_data = /\Asave_to_/.match(name.to_s)
|
44
|
+
if match_data
|
45
|
+
available_schemes << match_data.post_match
|
46
|
+
end
|
47
|
+
end
|
48
|
+
message = "Arrow::Table save source must be one of ["
|
49
|
+
message << available_schemes.join(", ")
|
50
|
+
message << "]: #{@output.scheme.inspect}"
|
51
|
+
raise ArgumentError, message
|
52
|
+
end
|
53
|
+
__send__(custom_save_method)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
def save_to_file
|
35
58
|
format = @options[:format]
|
36
59
|
custom_save_method = "save_as_#{format}"
|
37
60
|
unless respond_to?(custom_save_method, true)
|
@@ -57,21 +80,24 @@ module Arrow
|
|
57
80
|
end
|
58
81
|
end
|
59
82
|
|
60
|
-
private
|
61
83
|
def fill_options
|
62
84
|
if @options[:format] and @options.key?(:compression)
|
63
85
|
return
|
64
86
|
end
|
65
87
|
|
66
|
-
|
88
|
+
case @output
|
89
|
+
when Buffer
|
67
90
|
info = {}
|
91
|
+
when URI
|
92
|
+
extension = PathExtension.new(@output.path)
|
93
|
+
info = extension.extract
|
68
94
|
else
|
69
95
|
extension = PathExtension.new(@output)
|
70
96
|
info = extension.extract
|
71
97
|
end
|
72
98
|
format = info[:format]
|
73
99
|
@options = @options.dup
|
74
|
-
if format
|
100
|
+
if format
|
75
101
|
@options[:format] ||= format.to_sym
|
76
102
|
else
|
77
103
|
@options[:format] ||= :arrow
|
@@ -21,51 +21,27 @@ module Arrow
|
|
21
21
|
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
22
22
|
class TableTableFormatter < TableFormatter
|
23
23
|
private
|
24
|
-
def format_header(text,
|
25
|
-
|
24
|
+
def format_header(text, column_formatters)
|
25
|
+
column_formatters.each do |column_formatter|
|
26
26
|
text << "\t"
|
27
|
-
text <<
|
27
|
+
text << column_formatter.aligned_name
|
28
28
|
end
|
29
29
|
text << "\n"
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
def format_column_name(column)
|
34
|
-
case column.data_type
|
35
|
-
when TimestampDataType
|
36
|
-
"%*s" % [::Time.now.iso8601.size, column.name]
|
37
|
-
when FloatDataType, DoubleDataType
|
38
|
-
"%*s" % [FLOAT_N_DIGITS, column.name]
|
39
|
-
else
|
40
|
-
column.name
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def format_rows(text, columns, rows, n_digits, start_offset)
|
32
|
+
def format_rows(text, column_formatters, rows, n_digits, start_offset)
|
45
33
|
rows.each_with_index do |row, nth_row|
|
46
34
|
text << ("%*d" % [n_digits, start_offset + nth_row])
|
47
35
|
row.each_with_index do |column_value, nth_column|
|
48
36
|
text << "\t"
|
49
|
-
|
50
|
-
|
37
|
+
column_formatter = column_formatters[nth_column]
|
38
|
+
aligned_name = column_formatter.aligned_name
|
39
|
+
text << column_formatter.format_value(column_value, aligned_name.size)
|
51
40
|
end
|
52
41
|
text << "\n"
|
53
42
|
end
|
54
43
|
end
|
55
44
|
|
56
|
-
def format_column_value(column, value)
|
57
|
-
case value
|
58
|
-
when ::Time
|
59
|
-
value.iso8601
|
60
|
-
when Float
|
61
|
-
"%*f" % [[column.name.size, FLOAT_N_DIGITS].max, value]
|
62
|
-
when Integer
|
63
|
-
"%*d" % [column.name.size, value]
|
64
|
-
else
|
65
|
-
"%-*s" % [column.name.size, value.to_s]
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
45
|
def format_ellipsis(text)
|
70
46
|
text << "...\n"
|
71
47
|
end
|
data/lib/arrow/table.rb
CHANGED
@@ -195,8 +195,6 @@ module Arrow
|
|
195
195
|
alias_method :size, :n_rows
|
196
196
|
alias_method :length, :n_rows
|
197
197
|
|
198
|
-
alias_method :[], :find_column
|
199
|
-
|
200
198
|
alias_method :slice_raw, :slice
|
201
199
|
|
202
200
|
# @overload slice(offset, length)
|
@@ -236,6 +234,12 @@ module Arrow
|
|
236
234
|
# @return [Arrow::Table]
|
237
235
|
# The sub `Arrow::Table` that covers only rows of the range of indices.
|
238
236
|
#
|
237
|
+
# @overload slice(conditions)
|
238
|
+
#
|
239
|
+
# @param conditions [Hash] The conditions to select records.
|
240
|
+
# @return [Arrow::Table]
|
241
|
+
# The sub `Arrow::Table` that covers only rows matched by condition
|
242
|
+
#
|
239
243
|
# @overload slice
|
240
244
|
#
|
241
245
|
# @yield [slicer] Gives slicer that constructs condition to select records.
|
@@ -263,12 +267,37 @@ module Arrow
|
|
263
267
|
expected_n_args = nil
|
264
268
|
case args.size
|
265
269
|
when 1
|
266
|
-
|
270
|
+
case args[0]
|
271
|
+
when Integer
|
267
272
|
index = args[0]
|
268
273
|
index += n_rows if index < 0
|
269
274
|
return nil if index < 0
|
270
275
|
return nil if index >= n_rows
|
271
276
|
return Record.new(self, index)
|
277
|
+
when Hash
|
278
|
+
condition_pairs = args[0]
|
279
|
+
slicer = Slicer.new(self)
|
280
|
+
conditions = []
|
281
|
+
condition_pairs.each do |key, value|
|
282
|
+
case value
|
283
|
+
when Range
|
284
|
+
# TODO: Optimize "begin <= key <= end" case by missing "between" kernel
|
285
|
+
# https://issues.apache.org/jira/browse/ARROW-9843
|
286
|
+
unless value.begin.nil?
|
287
|
+
conditions << (slicer[key] >= value.begin)
|
288
|
+
end
|
289
|
+
unless value.end.nil?
|
290
|
+
if value.exclude_end?
|
291
|
+
conditions << (slicer[key] < value.end)
|
292
|
+
else
|
293
|
+
conditions << (slicer[key] <= value.end)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
else
|
297
|
+
conditions << (slicer[key] == value)
|
298
|
+
end
|
299
|
+
end
|
300
|
+
slicers << conditions.inject(:&)
|
272
301
|
else
|
273
302
|
slicers << args[0]
|
274
303
|
end
|
@@ -397,41 +426,6 @@ module Arrow
|
|
397
426
|
remove_column_raw(index)
|
398
427
|
end
|
399
428
|
|
400
|
-
# TODO
|
401
|
-
#
|
402
|
-
# @return [Arrow::Table]
|
403
|
-
def select_columns(*selectors, &block)
|
404
|
-
if selectors.empty?
|
405
|
-
return to_enum(__method__) unless block_given?
|
406
|
-
selected_columns = columns.select(&block)
|
407
|
-
else
|
408
|
-
selected_columns = []
|
409
|
-
selectors.each do |selector|
|
410
|
-
case selector
|
411
|
-
when String, Symbol
|
412
|
-
column = find_column(selector)
|
413
|
-
if column.nil?
|
414
|
-
message = "unknown column: #{selector.inspect}: #{inspect}"
|
415
|
-
raise KeyError.new(message)
|
416
|
-
end
|
417
|
-
selected_columns << column
|
418
|
-
when Range
|
419
|
-
selected_columns.concat(columns[selector])
|
420
|
-
else
|
421
|
-
column = columns[selector]
|
422
|
-
if column.nil?
|
423
|
-
message = "out of index (0..#{n_columns - 1}): " +
|
424
|
-
"#{selector.inspect}: #{inspect}"
|
425
|
-
raise IndexError.new(message)
|
426
|
-
end
|
427
|
-
selected_columns << column
|
428
|
-
end
|
429
|
-
end
|
430
|
-
selected_columns = selected_columns.select(&block) if block_given?
|
431
|
-
end
|
432
|
-
self.class.new(selected_columns)
|
433
|
-
end
|
434
|
-
|
435
429
|
# Experimental
|
436
430
|
def group(*keys)
|
437
431
|
Group.new(self, keys)
|
@@ -442,8 +436,8 @@ module Arrow
|
|
442
436
|
RollingWindow.new(self, size)
|
443
437
|
end
|
444
438
|
|
445
|
-
def save(
|
446
|
-
saver = TableSaver.new(self,
|
439
|
+
def save(output, options={})
|
440
|
+
saver = TableSaver.new(self, output, options)
|
447
441
|
saver.save
|
448
442
|
end
|
449
443
|
|
@@ -29,7 +29,7 @@ module Arrow
|
|
29
29
|
#
|
30
30
|
# The unit must be second or millisecond.
|
31
31
|
#
|
32
|
-
# @example Create a time32 data type with
|
32
|
+
# @example Create a time32 data type with Arrow::TimeUnit
|
33
33
|
# Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
|
34
34
|
#
|
35
35
|
# @example Create a time32 data type with Symbol
|
@@ -45,7 +45,7 @@ module Arrow
|
|
45
45
|
#
|
46
46
|
# The unit must be second or millisecond.
|
47
47
|
#
|
48
|
-
# @example Create a time32 data type with
|
48
|
+
# @example Create a time32 data type with Arrow::TimeUnit
|
49
49
|
# Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
|
50
50
|
#
|
51
51
|
# @example Create a time32 data type with Symbol
|