red-arrow 4.0.0 → 6.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/array.rb +12 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/datum.rb +100 -0
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +44 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +41 -3
- data/lib/arrow/table-saver.rb +29 -3
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +34 -40
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +2 -1
- data/test/helper.rb +1 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-array.rb +34 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-expression.rb +40 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +176 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +190 -53
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +132 -73
data/lib/arrow/scalar.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Scalar
|
20
|
+
# @param other [Arrow::Scalar] The scalar to be compared.
|
21
|
+
# @param options [Arrow::EqualOptions, Hash] (nil)
|
22
|
+
# The options to custom how to compare.
|
23
|
+
#
|
24
|
+
# @return [Boolean]
|
25
|
+
# `true` if both of them have the same data, `false` otherwise.
|
26
|
+
#
|
27
|
+
# @since 5.0.0
|
28
|
+
def equal_scalar?(other, options=nil)
|
29
|
+
equal_options(other, options)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/arrow/slicer.rb
CHANGED
@@ -16,9 +16,6 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
module Arrow
|
19
|
-
# Experimental
|
20
|
-
#
|
21
|
-
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
22
19
|
class Slicer
|
23
20
|
def initialize(table)
|
24
21
|
@table = table
|
@@ -43,6 +40,21 @@ module Arrow
|
|
43
40
|
super
|
44
41
|
end
|
45
42
|
|
43
|
+
module Helper
|
44
|
+
class << self
|
45
|
+
def ensure_boolean(column)
|
46
|
+
case column.data_type
|
47
|
+
when Arrow::BooleanDataType
|
48
|
+
column.data
|
49
|
+
else
|
50
|
+
options = CastOptions.new
|
51
|
+
options.to_data_type = Arrow::BooleanDataType.new
|
52
|
+
Function.find("cast").execute([column.data], options).value
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
46
58
|
class Condition
|
47
59
|
def evaluate
|
48
60
|
message = "Slicer::Condition must define \#evaluate: #{inspect}"
|
@@ -69,43 +81,28 @@ module Arrow
|
|
69
81
|
end
|
70
82
|
|
71
83
|
def evaluate
|
72
|
-
|
73
|
-
values2 = @condition2.evaluate.each
|
74
|
-
raw_array = []
|
75
|
-
begin
|
76
|
-
loop do
|
77
|
-
value1 = values1.next
|
78
|
-
value2 = values2.next
|
79
|
-
if value1.nil? or value2.nil?
|
80
|
-
raw_array << nil
|
81
|
-
else
|
82
|
-
raw_array << evaluate_value(value1, value2)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
rescue StopIteration
|
86
|
-
end
|
87
|
-
BooleanArray.new(raw_array)
|
84
|
+
function.execute([@condition1.evaluate, @condition2.evaluate]).value
|
88
85
|
end
|
89
86
|
end
|
90
87
|
|
91
88
|
class AndCondition < LogicalCondition
|
92
89
|
private
|
93
|
-
def
|
94
|
-
|
90
|
+
def function
|
91
|
+
Function.find("and")
|
95
92
|
end
|
96
93
|
end
|
97
94
|
|
98
95
|
class OrCondition < LogicalCondition
|
99
96
|
private
|
100
|
-
def
|
101
|
-
|
97
|
+
def function
|
98
|
+
Function.find("or")
|
102
99
|
end
|
103
100
|
end
|
104
101
|
|
105
102
|
class XorCondition < LogicalCondition
|
106
103
|
private
|
107
|
-
def
|
108
|
-
|
104
|
+
def function
|
105
|
+
Function.find("xor")
|
109
106
|
end
|
110
107
|
end
|
111
108
|
|
@@ -115,21 +112,7 @@ module Arrow
|
|
115
112
|
end
|
116
113
|
|
117
114
|
def evaluate
|
118
|
-
|
119
|
-
|
120
|
-
case @column.data_type
|
121
|
-
when BooleanDataType
|
122
|
-
data
|
123
|
-
else
|
124
|
-
if data.n_chunks == 1
|
125
|
-
data.get_chunk(0).cast(BooleanDataType.new, nil)
|
126
|
-
else
|
127
|
-
arrays = data.each_chunk.collect do |chunk|
|
128
|
-
chunk.cast(BooleanDataType.new, nil)
|
129
|
-
end
|
130
|
-
ChunkedArray.new(arrays)
|
131
|
-
end
|
132
|
-
end
|
115
|
+
Helper.ensure_boolean(@column)
|
133
116
|
end
|
134
117
|
|
135
118
|
def !@
|
@@ -187,23 +170,8 @@ module Arrow
|
|
187
170
|
end
|
188
171
|
|
189
172
|
def evaluate
|
190
|
-
data = @column
|
191
|
-
|
192
|
-
data.each_chunk do |chunk|
|
193
|
-
if chunk.is_a?(BooleanArray)
|
194
|
-
boolean_array = chunk
|
195
|
-
else
|
196
|
-
boolean_array = chunk.cast(BooleanDataType.new, nil)
|
197
|
-
end
|
198
|
-
boolean_array.each do |value|
|
199
|
-
if value.nil?
|
200
|
-
raw_array << value
|
201
|
-
else
|
202
|
-
raw_array << !value
|
203
|
-
end
|
204
|
-
end
|
205
|
-
end
|
206
|
-
BooleanArray.new(raw_array)
|
173
|
+
data = Helper.ensure_boolean(@column)
|
174
|
+
Function.find("invert").execute([data]).value
|
207
175
|
end
|
208
176
|
|
209
177
|
def !@
|
@@ -222,19 +190,10 @@ module Arrow
|
|
222
190
|
end
|
223
191
|
|
224
192
|
def evaluate
|
225
|
-
|
226
|
-
|
227
|
-
raw_array = @column.collect(&:nil?)
|
228
|
-
BooleanArray.new(raw_array)
|
193
|
+
if @value.nil?
|
194
|
+
Function.find("is_null").execute([@column.data]).value
|
229
195
|
else
|
230
|
-
|
231
|
-
if value.nil?
|
232
|
-
nil
|
233
|
-
else
|
234
|
-
@value == value
|
235
|
-
end
|
236
|
-
end
|
237
|
-
BooleanArray.new(raw_array)
|
196
|
+
Function.find("equal").execute([@column.data, @value]).value
|
238
197
|
end
|
239
198
|
end
|
240
199
|
end
|
@@ -250,25 +209,10 @@ module Arrow
|
|
250
209
|
end
|
251
210
|
|
252
211
|
def evaluate
|
253
|
-
|
254
|
-
|
255
|
-
if @column.n_nulls.zero?
|
256
|
-
raw_array = [true] * @column.n_rows
|
257
|
-
else
|
258
|
-
raw_array = @column.n_rows.times.collect do |i|
|
259
|
-
@column.valid?(i)
|
260
|
-
end
|
261
|
-
end
|
262
|
-
BooleanArray.new(raw_array)
|
212
|
+
if @value.nil?
|
213
|
+
Function.find("is_valid").execute([@column.data]).value
|
263
214
|
else
|
264
|
-
|
265
|
-
if value.nil?
|
266
|
-
nil
|
267
|
-
else
|
268
|
-
@value != value
|
269
|
-
end
|
270
|
-
end
|
271
|
-
BooleanArray.new(raw_array)
|
215
|
+
Function.find("not_equal").execute([@column.data, @value]).value
|
272
216
|
end
|
273
217
|
end
|
274
218
|
end
|
@@ -284,14 +228,7 @@ module Arrow
|
|
284
228
|
end
|
285
229
|
|
286
230
|
def evaluate
|
287
|
-
|
288
|
-
if value.nil?
|
289
|
-
nil
|
290
|
-
else
|
291
|
-
@value > value
|
292
|
-
end
|
293
|
-
end
|
294
|
-
BooleanArray.new(raw_array)
|
231
|
+
Function.find("less").execute([@column.data, @value]).value
|
295
232
|
end
|
296
233
|
end
|
297
234
|
|
@@ -306,14 +243,7 @@ module Arrow
|
|
306
243
|
end
|
307
244
|
|
308
245
|
def evaluate
|
309
|
-
|
310
|
-
if value.nil?
|
311
|
-
nil
|
312
|
-
else
|
313
|
-
@value >= value
|
314
|
-
end
|
315
|
-
end
|
316
|
-
BooleanArray.new(raw_array)
|
246
|
+
Function.find("less_equal").execute([@column.data, @value]).value
|
317
247
|
end
|
318
248
|
end
|
319
249
|
|
@@ -328,14 +258,7 @@ module Arrow
|
|
328
258
|
end
|
329
259
|
|
330
260
|
def evaluate
|
331
|
-
|
332
|
-
if value.nil?
|
333
|
-
nil
|
334
|
-
else
|
335
|
-
@value < value
|
336
|
-
end
|
337
|
-
end
|
338
|
-
BooleanArray.new(raw_array)
|
261
|
+
Function.find("greater").execute([@column.data, @value]).value
|
339
262
|
end
|
340
263
|
end
|
341
264
|
|
@@ -350,14 +273,7 @@ module Arrow
|
|
350
273
|
end
|
351
274
|
|
352
275
|
def evaluate
|
353
|
-
|
354
|
-
if value.nil?
|
355
|
-
nil
|
356
|
-
else
|
357
|
-
@value <= value
|
358
|
-
end
|
359
|
-
end
|
360
|
-
BooleanArray.new(raw_array)
|
276
|
+
Function.find("greater_equal").execute([@column.data, @value]).value
|
361
277
|
end
|
362
278
|
end
|
363
279
|
|
@@ -372,18 +288,10 @@ module Arrow
|
|
372
288
|
end
|
373
289
|
|
374
290
|
def evaluate
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
raw_array = @column.collect do |value|
|
380
|
-
if value.nil?
|
381
|
-
nil
|
382
|
-
else
|
383
|
-
values_index.key?(value)
|
384
|
-
end
|
385
|
-
end
|
386
|
-
BooleanArray.new(raw_array)
|
291
|
+
values = @values
|
292
|
+
values = Array.new(values) unless values.is_a?(Array)
|
293
|
+
options = SetLookupOptions.new(values)
|
294
|
+
Function.find("is_in").execute([@column.data], options).value
|
387
295
|
end
|
388
296
|
end
|
389
297
|
|
@@ -398,18 +306,11 @@ module Arrow
|
|
398
306
|
end
|
399
307
|
|
400
308
|
def evaluate
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
if value.nil?
|
407
|
-
nil
|
408
|
-
else
|
409
|
-
not values_index.key?(value)
|
410
|
-
end
|
411
|
-
end
|
412
|
-
BooleanArray.new(raw_array)
|
309
|
+
values = @values
|
310
|
+
values = Array.new(values) unless values.is_a?(Array)
|
311
|
+
options = SetLookupOptions.new(values)
|
312
|
+
booleans = Function.find("is_in").execute([@column.data], options).value
|
313
|
+
Function.find("invert").execute([booleans]).value
|
413
314
|
end
|
414
315
|
end
|
415
316
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class SourceNodeOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when RecordBatchReader, RecordBatch, Table
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class StringDictionaryArrayBuilder
|
20
|
+
include SymbolValuesAppendable
|
21
|
+
|
22
|
+
private
|
23
|
+
def create_values_array_builder
|
24
|
+
StringArrayBuilder.new
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module SymbolValuesAppendable
|
20
|
+
def append_values(values, is_valids=nil)
|
21
|
+
builder = create_values_array_builder
|
22
|
+
values = values.collect do |value|
|
23
|
+
case value
|
24
|
+
when Symbol
|
25
|
+
value.to_s
|
26
|
+
else
|
27
|
+
value
|
28
|
+
end
|
29
|
+
end
|
30
|
+
builder.append_values(values, is_valids)
|
31
|
+
append_array(builder.finish)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class TableConcatenateOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
options = new
|
26
|
+
value.each do |k, v|
|
27
|
+
options.public_send("#{k}=", value)
|
28
|
+
end
|
29
|
+
options
|
30
|
+
else
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -18,6 +18,125 @@
|
|
18
18
|
module Arrow
|
19
19
|
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
20
20
|
class TableFormatter
|
21
|
+
# @private
|
22
|
+
class ColumnFormatter
|
23
|
+
attr_reader :column
|
24
|
+
attr_reader :head_values
|
25
|
+
attr_reader :tail_values
|
26
|
+
attr_reader :sample_values
|
27
|
+
def initialize(column, head_values, tail_values)
|
28
|
+
@column = column
|
29
|
+
@head_values = head_values
|
30
|
+
@tail_values = tail_values
|
31
|
+
@sample_values = head_values + tail_values
|
32
|
+
@field_value_widths = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def data_type
|
36
|
+
@data_type ||= @column.data_type
|
37
|
+
end
|
38
|
+
|
39
|
+
def name
|
40
|
+
@name ||= @column.name
|
41
|
+
end
|
42
|
+
|
43
|
+
def aligned_name
|
44
|
+
@aligned_name ||= format_aligned_name(name, data_type, @sample_values)
|
45
|
+
end
|
46
|
+
|
47
|
+
FLOAT_N_DIGITS = 10
|
48
|
+
FORMATTED_NULL = "(null)"
|
49
|
+
|
50
|
+
def format_value(value, width=0)
|
51
|
+
case value
|
52
|
+
when ::Time
|
53
|
+
value.iso8601
|
54
|
+
when Float
|
55
|
+
"%*f" % [[width, FLOAT_N_DIGITS].max, value]
|
56
|
+
when Integer
|
57
|
+
"%*d" % [width, value]
|
58
|
+
when Hash
|
59
|
+
formatted_values = data_type.fields.collect do |field|
|
60
|
+
field_name = field.name
|
61
|
+
field_value_width = compute_field_value_width(field, @sample_values)
|
62
|
+
formatted_name = format_value(field_name, 0)
|
63
|
+
formatted_value = format_value(value[field_name], field_value_width)
|
64
|
+
"#{formatted_name}: #{formatted_value}"
|
65
|
+
end
|
66
|
+
formatted = "{"
|
67
|
+
formatted << formatted_values.join(", ")
|
68
|
+
formatted << "}"
|
69
|
+
"%-*s" % [width, formatted]
|
70
|
+
when nil
|
71
|
+
"%*s" % [width, FORMATTED_NULL]
|
72
|
+
else
|
73
|
+
"%-*s" % [width, value.to_s]
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
def compute_field_value_width(field, sample_values)
|
79
|
+
unless @field_value_widths.key?(field)
|
80
|
+
field_name = field.name
|
81
|
+
field_sample_values = sample_values.collect do |v|
|
82
|
+
(v || {})[field_name]
|
83
|
+
end
|
84
|
+
field_aligned_name = format_aligned_name("",
|
85
|
+
field.data_type,
|
86
|
+
field_sample_values)
|
87
|
+
@field_value_widths[field] = field_aligned_name.size
|
88
|
+
end
|
89
|
+
@field_value_widths[field]
|
90
|
+
end
|
91
|
+
|
92
|
+
def format_aligned_name(name, data_type, sample_values)
|
93
|
+
case data_type
|
94
|
+
when TimestampDataType
|
95
|
+
"%*s" % [::Time.now.iso8601.size, name]
|
96
|
+
when IntegerDataType
|
97
|
+
have_null = false
|
98
|
+
have_negative = false
|
99
|
+
max_value = nil
|
100
|
+
sample_values.each do |value|
|
101
|
+
if value.nil?
|
102
|
+
have_null = true
|
103
|
+
else
|
104
|
+
if max_value.nil?
|
105
|
+
max_value = value.abs
|
106
|
+
else
|
107
|
+
max_value = [value.abs, max_value].max
|
108
|
+
end
|
109
|
+
have_negative = true if value.negative?
|
110
|
+
end
|
111
|
+
end
|
112
|
+
if max_value.nil?
|
113
|
+
width = 0
|
114
|
+
elsif max_value.zero?
|
115
|
+
width = 1
|
116
|
+
else
|
117
|
+
width = (Math.log10(max_value) + 1).truncate
|
118
|
+
end
|
119
|
+
width += 1 if have_negative # Need "-"
|
120
|
+
width = [width, FORMATTED_NULL.size].max if have_null
|
121
|
+
"%*s" % [width, name]
|
122
|
+
when FloatDataType, DoubleDataType
|
123
|
+
"%*s" % [FLOAT_N_DIGITS, name]
|
124
|
+
when StructDataType
|
125
|
+
field_widths = data_type.fields.collect do |field|
|
126
|
+
field_value_width = compute_field_value_width(field, sample_values)
|
127
|
+
field.name.size + ": ".size + field_value_width
|
128
|
+
end
|
129
|
+
width = "{}".size + field_widths.sum
|
130
|
+
if field_widths.size > 0
|
131
|
+
width += (", ".size * (field_widths.size - 1))
|
132
|
+
end
|
133
|
+
"%*s" % [width, name]
|
134
|
+
else
|
135
|
+
name
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
21
140
|
def initialize(table, options={})
|
22
141
|
@table = table
|
23
142
|
@options = options
|
@@ -25,38 +144,43 @@ module Arrow
|
|
25
144
|
|
26
145
|
def format
|
27
146
|
text = ""
|
28
|
-
columns = @table.columns
|
29
|
-
format_header(text, columns)
|
30
|
-
|
31
147
|
n_rows = @table.n_rows
|
32
|
-
return text if n_rows.zero?
|
33
|
-
|
34
148
|
border = @options[:border] || 10
|
35
|
-
|
149
|
+
|
36
150
|
head_limit = [border, n_rows].min
|
37
|
-
|
38
|
-
|
151
|
+
|
152
|
+
tail_start = [border, n_rows - border].max
|
153
|
+
tail_limit = n_rows - tail_start
|
154
|
+
|
155
|
+
column_formatters = @table.columns.collect do |column|
|
156
|
+
head_values = column.each.take(head_limit)
|
157
|
+
if tail_limit > 0
|
158
|
+
tail_values = column.reverse_each.take(tail_limit).reverse
|
159
|
+
else
|
160
|
+
tail_values = []
|
161
|
+
end
|
162
|
+
ColumnFormatter.new(column, head_values, tail_values)
|
39
163
|
end
|
164
|
+
|
165
|
+
format_header(text, column_formatters)
|
166
|
+
return text if n_rows.zero?
|
167
|
+
|
168
|
+
n_digits = (Math.log10(n_rows) + 1).truncate
|
40
169
|
format_rows(text,
|
41
|
-
|
42
|
-
|
170
|
+
column_formatters,
|
171
|
+
column_formatters.collect(&:head_values).transpose,
|
43
172
|
n_digits,
|
44
173
|
0)
|
45
174
|
return text if n_rows <= border
|
46
175
|
|
47
|
-
tail_start = [border, n_rows - border].max
|
48
|
-
tail_limit = n_rows - tail_start
|
49
|
-
tail_column_values = columns.collect do |column|
|
50
|
-
column.reverse_each.take(tail_limit).reverse
|
51
|
-
end
|
52
176
|
|
53
177
|
if head_limit != tail_start
|
54
178
|
format_ellipsis(text)
|
55
179
|
end
|
56
180
|
|
57
181
|
format_rows(text,
|
58
|
-
|
59
|
-
|
182
|
+
column_formatters,
|
183
|
+
column_formatters.collect(&:tail_values).transpose,
|
60
184
|
n_digits,
|
61
185
|
tail_start)
|
62
186
|
|
@@ -22,12 +22,14 @@ module Arrow
|
|
22
22
|
def format_header(text, columns)
|
23
23
|
end
|
24
24
|
|
25
|
-
def format_rows(text,
|
25
|
+
def format_rows(text, column_formatters, rows, n_digits, start_offset)
|
26
26
|
rows.each_with_index do |row, nth_row|
|
27
27
|
text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
|
28
28
|
row.each_with_index do |column_value, nth_column|
|
29
|
-
|
30
|
-
|
29
|
+
column_formatter = column_formatters[nth_column]
|
30
|
+
formatted_name = column_formatter.name
|
31
|
+
formatted_value = column_formatter.format_value(column_value)
|
32
|
+
text << "#{formatted_name}: #{formatted_value}\n"
|
31
33
|
end
|
32
34
|
end
|
33
35
|
end
|