red-arrow 16.0.0 → 17.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/arrow/field-containable.rb +1 -1
- data/lib/arrow/table-formatter.rb +33 -7
- data/lib/arrow/table-list-formatter.rb +3 -3
- data/lib/arrow/table-table-formatter.rb +7 -0
- data/lib/arrow/version.rb +1 -1
- data/test/test-csv-loader.rb +40 -32
- data/test/test-group.rb +13 -0
- data/test/test-schema.rb +1 -1
- data/test/test-slicer.rb +191 -154
- data/test/test-struct-data-type.rb +1 -1
- data/test/test-table.rb +305 -230
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 219ff5131490e19062c8ec0813d2478182b0e4804fd3b574cfae56c684fe14a2
|
4
|
+
data.tar.gz: e3a579210e075a7bf2bae488ab59d4c764efdddf6a684d2ec3bf85d121f0aca3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed735f057c60826172842e0f9a24e23ac950e8a972bb5e3b9d0ab0b5dc16ee87e4996a5f0552cb97e70f1e134c882372110c488945ad8d12e548c55234e40d5b
|
7
|
+
data.tar.gz: ae524564569d6889e2cde7616f5e937490c30ae55a4c3e362af4af0ef4444f2062fb3910164c55ba29cb4d34c3f158abb478d9d8332024bf85a68beff8f069e8
|
@@ -29,7 +29,7 @@ module Arrow
|
|
29
29
|
return nil if index < 0 or index >= n_fields
|
30
30
|
get_field(index)
|
31
31
|
else
|
32
|
-
message = "field name or index must be String, Symbol or Integer"
|
32
|
+
message = +"field name or index must be String, Symbol or Integer"
|
33
33
|
message << ": <#{name_or_index.inspect}>"
|
34
34
|
raise ArgumentError, message
|
35
35
|
end
|
@@ -24,7 +24,8 @@ module Arrow
|
|
24
24
|
attr_reader :head_values
|
25
25
|
attr_reader :tail_values
|
26
26
|
attr_reader :sample_values
|
27
|
-
def initialize(column, head_values, tail_values)
|
27
|
+
def initialize(table_formatter, column, head_values, tail_values)
|
28
|
+
@table_formatter = table_formatter
|
28
29
|
@column = column
|
29
30
|
@head_values = head_values
|
30
31
|
@tail_values = tail_values
|
@@ -36,6 +37,15 @@ module Arrow
|
|
36
37
|
@data_type ||= @column.data_type
|
37
38
|
end
|
38
39
|
|
40
|
+
def formatted_data_type_name
|
41
|
+
@formatted_data_type_name ||= "(#{data_type.name})"
|
42
|
+
end
|
43
|
+
|
44
|
+
def aligned_data_type_name
|
45
|
+
@aligned_data_type_name ||=
|
46
|
+
"%*s" % [aligned_name.size, formatted_data_type_name]
|
47
|
+
end
|
48
|
+
|
39
49
|
def name
|
40
50
|
@name ||= @column.name
|
41
51
|
end
|
@@ -63,7 +73,7 @@ module Arrow
|
|
63
73
|
formatted_value = format_value(value[field_name], field_value_width)
|
64
74
|
"#{formatted_name}: #{formatted_value}"
|
65
75
|
end
|
66
|
-
formatted = "{"
|
76
|
+
formatted = +"{"
|
67
77
|
formatted << formatted_values.join(", ")
|
68
78
|
formatted << "}"
|
69
79
|
"%-*s" % [width, formatted]
|
@@ -90,9 +100,16 @@ module Arrow
|
|
90
100
|
end
|
91
101
|
|
92
102
|
def format_aligned_name(name, data_type, sample_values)
|
103
|
+
if @table_formatter.show_column_type?
|
104
|
+
min_width = formatted_data_type_name.size
|
105
|
+
else
|
106
|
+
min_width = 0
|
107
|
+
end
|
93
108
|
case data_type
|
94
109
|
when TimestampDataType
|
95
|
-
|
110
|
+
width = ::Time.now.iso8601.size
|
111
|
+
width = min_width if width < min_width
|
112
|
+
"%*s" % [width, name]
|
96
113
|
when IntegerDataType
|
97
114
|
have_null = false
|
98
115
|
have_negative = false
|
@@ -118,9 +135,12 @@ module Arrow
|
|
118
135
|
end
|
119
136
|
width += 1 if have_negative # Need "-"
|
120
137
|
width = [width, FORMATTED_NULL.size].max if have_null
|
138
|
+
width = min_width if width < min_width
|
121
139
|
"%*s" % [width, name]
|
122
140
|
when FloatDataType, DoubleDataType
|
123
|
-
|
141
|
+
width = FLOAT_N_DIGITS
|
142
|
+
width = min_width if width < min_width
|
143
|
+
"%*s" % [width, name]
|
124
144
|
when StructDataType
|
125
145
|
field_widths = data_type.fields.collect do |field|
|
126
146
|
field_value_width = compute_field_value_width(field, sample_values)
|
@@ -130,9 +150,11 @@ module Arrow
|
|
130
150
|
if field_widths.size > 0
|
131
151
|
width += (", ".size * (field_widths.size - 1))
|
132
152
|
end
|
153
|
+
width = min_width if width < min_width
|
133
154
|
"%*s" % [width, name]
|
134
155
|
else
|
135
|
-
|
156
|
+
width = min_width
|
157
|
+
"%*s" % [width, name]
|
136
158
|
end
|
137
159
|
end
|
138
160
|
end
|
@@ -143,7 +165,7 @@ module Arrow
|
|
143
165
|
end
|
144
166
|
|
145
167
|
def format
|
146
|
-
text = ""
|
168
|
+
text = +""
|
147
169
|
n_rows = @table.n_rows
|
148
170
|
border = @options[:border] || 10
|
149
171
|
|
@@ -159,7 +181,7 @@ module Arrow
|
|
159
181
|
else
|
160
182
|
tail_values = []
|
161
183
|
end
|
162
|
-
ColumnFormatter.new(column, head_values, tail_values)
|
184
|
+
ColumnFormatter.new(self, column, head_values, tail_values)
|
163
185
|
end
|
164
186
|
|
165
187
|
format_header(text, column_formatters)
|
@@ -186,5 +208,9 @@ module Arrow
|
|
186
208
|
|
187
209
|
text
|
188
210
|
end
|
211
|
+
|
212
|
+
def show_column_type?
|
213
|
+
@options.fetch(:show_column_type, true)
|
214
|
+
end
|
189
215
|
end
|
190
216
|
end
|
@@ -27,9 +27,9 @@ module Arrow
|
|
27
27
|
text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
|
28
28
|
row.each_with_index do |column_value, nth_column|
|
29
29
|
column_formatter = column_formatters[nth_column]
|
30
|
-
|
31
|
-
|
32
|
-
text << "
|
30
|
+
text << column_formatter.name
|
31
|
+
text << "(#{column_formatter.data_type.name})" if show_column_type?
|
32
|
+
text << ": #{column_formatter.format_value(column_value)}\n"
|
33
33
|
end
|
34
34
|
end
|
35
35
|
end
|
@@ -26,6 +26,13 @@ module Arrow
|
|
26
26
|
text << "\t"
|
27
27
|
text << column_formatter.aligned_name
|
28
28
|
end
|
29
|
+
if show_column_type?
|
30
|
+
text << "\n"
|
31
|
+
column_formatters.each do |column_formatter|
|
32
|
+
text << "\t"
|
33
|
+
text << column_formatter.aligned_data_type_name
|
34
|
+
end
|
35
|
+
end
|
29
36
|
text << "\n"
|
30
37
|
end
|
31
38
|
|
data/lib/arrow/version.rb
CHANGED
data/test/test-csv-loader.rb
CHANGED
@@ -27,80 +27,88 @@ class CSVLoaderTest < Test::Unit::TestCase
|
|
27
27
|
test("String: data: with header") do
|
28
28
|
data = fixture_path("with-header-float.csv").read
|
29
29
|
assert_equal(<<-TABLE, load_csv(data).to_s)
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
30
|
+
name score
|
31
|
+
(utf8) (double)
|
32
|
+
0 alice 10.100000
|
33
|
+
1 bob 29.200000
|
34
|
+
2 chris -1.300000
|
34
35
|
TABLE
|
35
36
|
end
|
36
37
|
|
37
38
|
test("String: data: without header") do
|
38
39
|
data = fixture_path("without-header-float.csv").read
|
39
40
|
assert_equal(<<-TABLE, load_csv(data).to_s)
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
0 1
|
42
|
+
(utf8) (double)
|
43
|
+
0 alice 10.100000
|
44
|
+
1 bob 29.200000
|
45
|
+
2 chris -1.300000
|
44
46
|
TABLE
|
45
47
|
end
|
46
48
|
|
47
49
|
test("String: path: with header") do
|
48
50
|
path = fixture_path("with-header-float.csv").to_s
|
49
51
|
assert_equal(<<-TABLE, load_csv(path).to_s)
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
52
|
+
name score
|
53
|
+
(utf8) (double)
|
54
|
+
0 alice 10.100000
|
55
|
+
1 bob 29.200000
|
56
|
+
2 chris -1.300000
|
54
57
|
TABLE
|
55
58
|
end
|
56
59
|
|
57
60
|
test("String: path: without header") do
|
58
61
|
path = fixture_path("without-header-float.csv").to_s
|
59
62
|
assert_equal(<<-TABLE, load_csv(path).to_s)
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
63
|
+
0 1
|
64
|
+
(utf8) (double)
|
65
|
+
0 alice 10.100000
|
66
|
+
1 bob 29.200000
|
67
|
+
2 chris -1.300000
|
64
68
|
TABLE
|
65
69
|
end
|
66
70
|
|
67
71
|
test("Pathname: with header") do
|
68
72
|
path = fixture_path("with-header-float.csv")
|
69
73
|
assert_equal(<<-TABLE, load_csv(path).to_s)
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
+
name score
|
75
|
+
(utf8) (double)
|
76
|
+
0 alice 10.100000
|
77
|
+
1 bob 29.200000
|
78
|
+
2 chris -1.300000
|
74
79
|
TABLE
|
75
80
|
end
|
76
81
|
|
77
82
|
test("Pathname: without header") do
|
78
83
|
path = fixture_path("without-header-float.csv")
|
79
84
|
assert_equal(<<-TABLE, load_csv(path).to_s)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
85
|
+
0 1
|
86
|
+
(utf8) (double)
|
87
|
+
0 alice 10.100000
|
88
|
+
1 bob 29.200000
|
89
|
+
2 chris -1.300000
|
84
90
|
TABLE
|
85
91
|
end
|
86
92
|
|
87
93
|
test("null: with double quote") do
|
88
94
|
path = fixture_path("null-with-double-quote.csv").to_s
|
89
95
|
assert_equal(<<-TABLE, load_csv(path).to_s)
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
96
|
+
name score
|
97
|
+
(utf8) (int8)
|
98
|
+
0 alice 10
|
99
|
+
1 bob (null)
|
100
|
+
2 chris -1
|
94
101
|
TABLE
|
95
102
|
end
|
96
103
|
|
97
104
|
test("null: without double quote") do
|
98
105
|
path = fixture_path("null-without-double-quote.csv").to_s
|
99
106
|
assert_equal(<<-TABLE, load_csv(path).to_s)
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
107
|
+
name score
|
108
|
+
(utf8) (int8)
|
109
|
+
0 alice 10
|
110
|
+
1 bob (null)
|
111
|
+
2 chris -1
|
104
112
|
TABLE
|
105
113
|
end
|
106
114
|
|
data/test/test-group.rb
CHANGED
@@ -43,6 +43,7 @@ class GroupTest < Test::Unit::TestCase
|
|
43
43
|
table = Arrow::Table.new(raw_table)
|
44
44
|
assert_equal(<<-TABLE, table.group(:time).count.to_s)
|
45
45
|
time count(int)
|
46
|
+
(timestamp) (int64)
|
46
47
|
0 #{time_values[0].iso8601} 1
|
47
48
|
1 #{time_values[1].iso8601} 1
|
48
49
|
TABLE
|
@@ -53,6 +54,7 @@ class GroupTest < Test::Unit::TestCase
|
|
53
54
|
test("single") do
|
54
55
|
assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s)
|
55
56
|
group_key1 count(group_key2) count(int) count(uint) count(float) count(string)
|
57
|
+
(uint8) (int64) (int64) (int64) (int64) (int64)
|
56
58
|
0 1 2 2 1 1 2
|
57
59
|
1 2 1 0 1 1 1
|
58
60
|
2 3 3 3 3 3 2
|
@@ -62,6 +64,7 @@ class GroupTest < Test::Unit::TestCase
|
|
62
64
|
test("multiple") do
|
63
65
|
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s)
|
64
66
|
group_key1 group_key2 count(int) count(uint) count(float) count(string)
|
67
|
+
(uint8) (uint8) (int64) (int64) (int64) (int64)
|
65
68
|
0 1 1 2 1 1 2
|
66
69
|
1 2 1 0 1 1 1
|
67
70
|
2 3 1 1 1 1 0
|
@@ -73,6 +76,7 @@ class GroupTest < Test::Unit::TestCase
|
|
73
76
|
group = @table.group(:group_key1, :group_key2)
|
74
77
|
assert_equal(<<-TABLE, group.count(:int, :uint).to_s)
|
75
78
|
group_key1 group_key2 count(int) count(uint)
|
79
|
+
(uint8) (uint8) (int64) (int64)
|
76
80
|
0 1 1 2 1
|
77
81
|
1 2 1 0 1
|
78
82
|
2 3 1 1 1
|
@@ -85,6 +89,7 @@ class GroupTest < Test::Unit::TestCase
|
|
85
89
|
test("single") do
|
86
90
|
assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s)
|
87
91
|
group_key1 sum(group_key2) sum(int) sum(uint) sum(float)
|
92
|
+
(uint8) (uint64) (int64) (uint64) (double)
|
88
93
|
0 1 2 -3 1 2.200000
|
89
94
|
1 2 1 (null) 3 3.300000
|
90
95
|
2 3 5 -15 15 16.500000
|
@@ -94,6 +99,7 @@ class GroupTest < Test::Unit::TestCase
|
|
94
99
|
test("multiple") do
|
95
100
|
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s)
|
96
101
|
group_key1 group_key2 sum(int) sum(uint) sum(float)
|
102
|
+
(uint8) (uint8) (int64) (uint64) (double)
|
97
103
|
0 1 1 -3 1 2.200000
|
98
104
|
1 2 1 (null) 3 3.300000
|
99
105
|
2 3 1 -4 4 4.400000
|
@@ -106,6 +112,7 @@ class GroupTest < Test::Unit::TestCase
|
|
106
112
|
test("single") do
|
107
113
|
assert_equal(<<-TABLE, @table.group(:group_key1).mean.to_s)
|
108
114
|
group_key1 mean(group_key2) mean(int) mean(uint) mean(float)
|
115
|
+
(uint8) (double) (double) (double) (double)
|
109
116
|
0 1 1.000000 -1.500000 1.000000 2.200000
|
110
117
|
1 2 1.000000 (null) 3.000000 3.300000
|
111
118
|
2 3 1.666667 -5.000000 5.000000 5.500000
|
@@ -115,6 +122,7 @@ class GroupTest < Test::Unit::TestCase
|
|
115
122
|
test("multiple") do
|
116
123
|
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).mean.to_s)
|
117
124
|
group_key1 group_key2 mean(int) mean(uint) mean(float)
|
125
|
+
(uint8) (uint8) (double) (double) (double)
|
118
126
|
0 1 1 -1.500000 1.000000 2.200000
|
119
127
|
1 2 1 (null) 3.000000 3.300000
|
120
128
|
2 3 1 -4.000000 4.000000 4.400000
|
@@ -127,6 +135,7 @@ class GroupTest < Test::Unit::TestCase
|
|
127
135
|
test("single") do
|
128
136
|
assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s)
|
129
137
|
group_key1 min(group_key2) min(int) min(uint) min(float)
|
138
|
+
(uint8) (uint8) (int32) (uint32) (float)
|
130
139
|
0 1 1 -2 1 2.200000
|
131
140
|
1 2 1 (null) 3 3.300000
|
132
141
|
2 3 1 -6 4 4.400000
|
@@ -136,6 +145,7 @@ class GroupTest < Test::Unit::TestCase
|
|
136
145
|
test("multiple") do
|
137
146
|
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s)
|
138
147
|
group_key1 group_key2 min(int) min(uint) min(float)
|
148
|
+
(uint8) (uint8) (int32) (uint32) (float)
|
139
149
|
0 1 1 -2 1 2.200000
|
140
150
|
1 2 1 (null) 3 3.300000
|
141
151
|
2 3 1 -4 4 4.400000
|
@@ -148,6 +158,7 @@ class GroupTest < Test::Unit::TestCase
|
|
148
158
|
test("single") do
|
149
159
|
assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s)
|
150
160
|
group_key1 max(group_key2) max(int) max(uint) max(float)
|
161
|
+
(uint8) (uint8) (int32) (uint32) (float)
|
151
162
|
0 1 1 -1 1 2.200000
|
152
163
|
1 2 1 (null) 3 3.300000
|
153
164
|
2 3 2 -4 6 6.600000
|
@@ -157,6 +168,7 @@ class GroupTest < Test::Unit::TestCase
|
|
157
168
|
test("multiple") do
|
158
169
|
assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s)
|
159
170
|
group_key1 group_key2 max(int) max(uint) max(float)
|
171
|
+
(uint8) (uint8) (int32) (uint32) (float)
|
160
172
|
0 1 1 -1 1 2.200000
|
161
173
|
1 2 1 (null) 3 3.300000
|
162
174
|
2 3 1 -4 4 4.400000
|
@@ -170,6 +182,7 @@ class GroupTest < Test::Unit::TestCase
|
|
170
182
|
group = @table.group(:group_key1, :group_key2)
|
171
183
|
assert_equal(<<-TABLE, group.aggregate("count(int)", "sum(uint)").to_s)
|
172
184
|
group_key1 group_key2 count(int) sum(uint)
|
185
|
+
(uint8) (uint8) (int64) (uint64)
|
173
186
|
0 1 1 2 1
|
174
187
|
1 2 1 0 3
|
175
188
|
2 3 1 1 4
|
data/test/test-schema.rb
CHANGED
@@ -95,7 +95,7 @@ class SchemaTest < Test::Unit::TestCase
|
|
95
95
|
|
96
96
|
test("[invalid]") do
|
97
97
|
invalid = []
|
98
|
-
message = "field name or index must be String, Symbol or Integer"
|
98
|
+
message = +"field name or index must be String, Symbol or Integer"
|
99
99
|
message << ": <#{invalid.inspect}>"
|
100
100
|
assert_raise(ArgumentError.new(message)) do
|
101
101
|
@schema[invalid]
|