red-arrow 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +44 -0
- data/lib/arrow/array-builder.rb +0 -1
- data/lib/arrow/array.rb +9 -1
- data/lib/arrow/chunked-array.rb +43 -7
- data/lib/arrow/column.rb +18 -0
- data/lib/arrow/csv-loader.rb +17 -2
- data/lib/arrow/data-type.rb +81 -0
- data/lib/arrow/group.rb +133 -0
- data/lib/arrow/loader.rb +4 -1
- data/lib/arrow/slicer.rb +76 -19
- data/lib/arrow/table-formatter.rb +21 -47
- data/lib/arrow/table-list-formatter.rb +35 -0
- data/lib/arrow/table-table-formatter.rb +69 -0
- data/lib/arrow/table.rb +62 -40
- data/lib/arrow/version.rb +1 -1
- data/test/fixture/null-with-double-quote.csv +4 -0
- data/test/fixture/null-without-double-quote.csv +4 -0
- data/test/run-test.rb +3 -1
- data/test/test-chunked-array.rb +39 -2
- data/test/test-column.rb +13 -0
- data/test/test-csv-loader.rb +20 -0
- data/test/test-group.rb +111 -0
- data/test/test-slicer.rb +42 -0
- data/test/test-table.rb +52 -10
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d5c8c88ef78b27019b54dca4de8c914f17b80d9e
|
4
|
+
data.tar.gz: 85a527d72ea710c9f4c55e4c2db36110693a9f2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b4635c2517a7b750c7f46341bd659504e4d2cbbba6f1d2ef4f32c47c1870de891a7bf009fcd4783284f72b515eaeb2e4846121bf0b66e8eeb017d9ab5f1727c
|
7
|
+
data.tar.gz: 21a10ffe298a91ec56b5354bcc6693348fdb815db04861fc04124849b111ffadc5ddbac2582cf6d87d8f96ec2060179fb2e19866f8f7be2699d547a2e2d861dc
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,49 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.8.2 - 2018-02-04
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* `Arrow::Table#size`: Added.
|
8
|
+
|
9
|
+
* `Arrow::Table#length`: Added.
|
10
|
+
|
11
|
+
* `Arrow::Table#pack`: Added.
|
12
|
+
|
13
|
+
* `Arrow::Column#pack`: Added.
|
14
|
+
|
15
|
+
* `Arrow::ChunkedArray#pack`: Added.
|
16
|
+
|
17
|
+
* `Arrow::Column#reverse_each`: Added.
|
18
|
+
|
19
|
+
* `Arrow::Table#slice`: Added negative integer support.
|
20
|
+
|
21
|
+
* `Arrow::Slicer::ColumnCondition#in?`: Added.
|
22
|
+
|
23
|
+
* `Arrow::Table#group`: Added.
|
24
|
+
|
25
|
+
* `Arrow::ChunkedArray#null?`: Added.
|
26
|
+
|
27
|
+
* `Arrow::Column#null?`: Added.
|
28
|
+
|
29
|
+
* `Arrow::Group`: Added.
|
30
|
+
|
31
|
+
* `Arrow::CSVLoader`: Changed to treat `""` as a null value instead
|
32
|
+
of empty string.
|
33
|
+
|
34
|
+
* `Arrow::Table#[]`: Stopped to accept multiple column name.
|
35
|
+
|
36
|
+
* `Arrow::ChunkedArray#valid?`: Added.
|
37
|
+
|
38
|
+
* `Arrow::Column#valid?`: Added.
|
39
|
+
|
40
|
+
* `Arrow::Slicer::ColumnCondition#valid?`: Added.
|
41
|
+
|
42
|
+
### Fixes
|
43
|
+
|
44
|
+
* `Arrow::TableFormatter`: Fixed a bug that too much records are
|
45
|
+
formatted.
|
46
|
+
|
3
47
|
## 0.8.1 - 2018-01-05
|
4
48
|
|
5
49
|
### Improvements
|
data/lib/arrow/array-builder.rb
CHANGED
data/lib/arrow/array.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -44,6 +44,14 @@ module Arrow
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
+
def reverse_each
|
48
|
+
return to_enum(__method__) unless block_given?
|
49
|
+
|
50
|
+
(length - 1).downto(0) do |i|
|
51
|
+
yield(self[i])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
47
55
|
def to_arrow
|
48
56
|
self
|
49
57
|
end
|
data/lib/arrow/chunked-array.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -16,9 +16,29 @@ module Arrow
|
|
16
16
|
class ChunkedArray
|
17
17
|
include Enumerable
|
18
18
|
|
19
|
+
alias_method :chunks_raw, :chunks
|
20
|
+
def chunks
|
21
|
+
@chunks ||= chunks_raw
|
22
|
+
end
|
23
|
+
|
24
|
+
def null?(i)
|
25
|
+
chunks.each do |array|
|
26
|
+
return array.null?(i) if i < array.length
|
27
|
+
i -= array.length
|
28
|
+
end
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def valid?(i)
|
33
|
+
chunks.each do |array|
|
34
|
+
return array.valid?(i) if i < array.length
|
35
|
+
i -= array.length
|
36
|
+
end
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
|
19
40
|
def [](i)
|
20
|
-
|
21
|
-
array = get_chunk(j)
|
41
|
+
chunks.each do |array|
|
22
42
|
return array[i] if i < array.length
|
23
43
|
i -= array.length
|
24
44
|
end
|
@@ -28,16 +48,32 @@ module Arrow
|
|
28
48
|
def each(&block)
|
29
49
|
return to_enum(__method__) unless block_given?
|
30
50
|
|
31
|
-
|
51
|
+
chunks.each do |array|
|
32
52
|
array.each(&block)
|
33
53
|
end
|
34
54
|
end
|
35
55
|
|
36
|
-
def
|
56
|
+
def reverse_each(&block)
|
37
57
|
return to_enum(__method__) unless block_given?
|
38
58
|
|
39
|
-
|
40
|
-
|
59
|
+
chunks.reverse_each do |array|
|
60
|
+
array.reverse_each(&block)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def each_chunk(&block)
|
65
|
+
chunks.each(&block)
|
66
|
+
end
|
67
|
+
|
68
|
+
def pack
|
69
|
+
first_chunk = chunks.first
|
70
|
+
data_type = first_chunk.value_data_type
|
71
|
+
case data_type
|
72
|
+
when TimestampDataType
|
73
|
+
builder = TimestampArrayBuilder.new(data_type)
|
74
|
+
builder.build(to_a)
|
75
|
+
else
|
76
|
+
first_chunk.class.new(to_a)
|
41
77
|
end
|
42
78
|
end
|
43
79
|
end
|
data/lib/arrow/column.rb
CHANGED
@@ -16,6 +16,14 @@ module Arrow
|
|
16
16
|
class Column
|
17
17
|
include Enumerable
|
18
18
|
|
19
|
+
def null?(i)
|
20
|
+
data.null?(i)
|
21
|
+
end
|
22
|
+
|
23
|
+
def valid?(i)
|
24
|
+
data.valid?(i)
|
25
|
+
end
|
26
|
+
|
19
27
|
def [](i)
|
20
28
|
data[i]
|
21
29
|
end
|
@@ -25,5 +33,15 @@ module Arrow
|
|
25
33
|
|
26
34
|
data.each(&block)
|
27
35
|
end
|
36
|
+
|
37
|
+
def reverse_each(&block)
|
38
|
+
return to_enum(__method__) unless block_given?
|
39
|
+
|
40
|
+
data.reverse_each(&block)
|
41
|
+
end
|
42
|
+
|
43
|
+
def pack
|
44
|
+
self.class.new(field, data.pack)
|
45
|
+
end
|
28
46
|
end
|
29
47
|
end
|
data/lib/arrow/csv-loader.rb
CHANGED
@@ -172,6 +172,9 @@ module Arrow
|
|
172
172
|
candidate_type = :date_time
|
173
173
|
when Date
|
174
174
|
candidate_type = :date
|
175
|
+
when String
|
176
|
+
next if value.empty?
|
177
|
+
candidate_type = :string
|
175
178
|
else
|
176
179
|
candidate_type = :string
|
177
180
|
end
|
@@ -189,9 +192,21 @@ module Arrow
|
|
189
192
|
when :boolean
|
190
193
|
converters << selective_converter(i, &BOOLEAN_CONVERTER)
|
191
194
|
when :integer
|
192
|
-
converters << selective_converter(i
|
195
|
+
converters << selective_converter(i) do |field|
|
196
|
+
if field.nil? or field.empty?
|
197
|
+
nil
|
198
|
+
else
|
199
|
+
CSV::Converters[:integer].call(field)
|
200
|
+
end
|
201
|
+
end
|
193
202
|
when :float
|
194
|
-
converters << selective_converter(i
|
203
|
+
converters << selective_converter(i) do |field|
|
204
|
+
if field.nil? or field.empty?
|
205
|
+
nil
|
206
|
+
else
|
207
|
+
CSV::Converters[:float].call(field)
|
208
|
+
end
|
209
|
+
end
|
195
210
|
when :time
|
196
211
|
converters << selective_converter(i, &ISO8601_CONVERTER)
|
197
212
|
when :date_time
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Arrow
|
16
|
+
class DataType
|
17
|
+
def numeric?
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Int8DataType
|
23
|
+
def numeric?
|
24
|
+
true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Int16DataType
|
29
|
+
def numeric?
|
30
|
+
true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class Int32DataType
|
35
|
+
def numeric?
|
36
|
+
true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class Int64DataType
|
41
|
+
def numeric?
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class UInt8DataType
|
47
|
+
def numeric?
|
48
|
+
true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class UInt16DataType
|
53
|
+
def numeric?
|
54
|
+
true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class UInt32DataType
|
59
|
+
def numeric?
|
60
|
+
true
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class UInt64DataType
|
65
|
+
def numeric?
|
66
|
+
true
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class FloatDataType
|
71
|
+
def numeric?
|
72
|
+
true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class DoubleDataType
|
77
|
+
def numeric?
|
78
|
+
true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/arrow/group.rb
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Arrow
|
16
|
+
class Group
|
17
|
+
def initialize(table, keys)
|
18
|
+
@table = table
|
19
|
+
@keys = keys
|
20
|
+
end
|
21
|
+
|
22
|
+
def count
|
23
|
+
key_names = @keys.collect(&:to_s)
|
24
|
+
target_columns = @table.columns.reject do |column|
|
25
|
+
key_names.include?(column.name)
|
26
|
+
end
|
27
|
+
aggregate(target_columns) do |column, indexes|
|
28
|
+
n = 0
|
29
|
+
indexes.each do |index|
|
30
|
+
n += 1 unless column.null?(index)
|
31
|
+
end
|
32
|
+
n
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def sum
|
37
|
+
key_names = @keys.collect(&:to_s)
|
38
|
+
target_columns = @table.columns.reject do |column|
|
39
|
+
key_names.include?(column.name) or
|
40
|
+
not column.data_type.numeric?
|
41
|
+
end
|
42
|
+
aggregate(target_columns) do |column, indexes|
|
43
|
+
n = 0
|
44
|
+
indexes.each do |index|
|
45
|
+
value = column[index]
|
46
|
+
n += value unless value.nil?
|
47
|
+
end
|
48
|
+
n
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def average
|
53
|
+
key_names = @keys.collect(&:to_s)
|
54
|
+
target_columns = @table.columns.reject do |column|
|
55
|
+
key_names.include?(column.name) or
|
56
|
+
not column.data_type.numeric?
|
57
|
+
end
|
58
|
+
aggregate(target_columns) do |column, indexes|
|
59
|
+
average = 0.0
|
60
|
+
n = 0
|
61
|
+
indexes.each do |index|
|
62
|
+
value = column[index]
|
63
|
+
unless value.nil?
|
64
|
+
n += 1
|
65
|
+
average += (value - average) / n
|
66
|
+
end
|
67
|
+
end
|
68
|
+
average
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def aggregate(target_columns)
|
74
|
+
sort_values = @table.n_rows.times.collect do |i|
|
75
|
+
key_values = @keys.collect do |key|
|
76
|
+
@table[key][i]
|
77
|
+
end
|
78
|
+
[key_values, i]
|
79
|
+
end
|
80
|
+
sorted = sort_values.sort_by do |key_values, i|
|
81
|
+
key_values
|
82
|
+
end
|
83
|
+
|
84
|
+
grouped_keys = []
|
85
|
+
aggregated_arrays_raw = []
|
86
|
+
target_columns.size.times do
|
87
|
+
aggregated_arrays_raw << []
|
88
|
+
end
|
89
|
+
indexes = []
|
90
|
+
sorted.each do |key_values, i|
|
91
|
+
if grouped_keys.empty?
|
92
|
+
grouped_keys << key_values
|
93
|
+
indexes.clear
|
94
|
+
indexes << i
|
95
|
+
else
|
96
|
+
if key_values == grouped_keys.last
|
97
|
+
indexes << i
|
98
|
+
else
|
99
|
+
grouped_keys << key_values
|
100
|
+
target_columns.each_with_index do |column, j|
|
101
|
+
aggregated_arrays_raw[j] << yield(column, indexes)
|
102
|
+
end
|
103
|
+
indexes.clear
|
104
|
+
indexes << i
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
target_columns.each_with_index do |column, j|
|
109
|
+
aggregated_arrays_raw[j] << yield(column, indexes)
|
110
|
+
end
|
111
|
+
|
112
|
+
grouped_key_arrays_raw = grouped_keys.transpose
|
113
|
+
columns = @keys.collect.with_index do |key, i|
|
114
|
+
key_column = @table[key]
|
115
|
+
key_column_array_class = key_column.data.chunks.first.class
|
116
|
+
if key_column_array_class == TimestampArray
|
117
|
+
builder = TimestampArrayBuilder.new(key_column.data_type)
|
118
|
+
key_column_array = builder.build(grouped_key_arrays_raw[i])
|
119
|
+
else
|
120
|
+
key_column_array =
|
121
|
+
key_column_array_class.new(grouped_key_arrays_raw[i])
|
122
|
+
end
|
123
|
+
Column.new(key_column.field, key_column_array)
|
124
|
+
end
|
125
|
+
target_columns.each_with_index do |column, i|
|
126
|
+
array = ArrayBuilder.build(aggregated_arrays_raw[i])
|
127
|
+
field = Field.new(column.name, array.value_data_type)
|
128
|
+
columns << Column.new(field, array)
|
129
|
+
end
|
130
|
+
Table.new(columns)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|