red-arrow 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +44 -0
- data/lib/arrow/array-builder.rb +0 -1
- data/lib/arrow/array.rb +9 -1
- data/lib/arrow/chunked-array.rb +43 -7
- data/lib/arrow/column.rb +18 -0
- data/lib/arrow/csv-loader.rb +17 -2
- data/lib/arrow/data-type.rb +81 -0
- data/lib/arrow/group.rb +133 -0
- data/lib/arrow/loader.rb +4 -1
- data/lib/arrow/slicer.rb +76 -19
- data/lib/arrow/table-formatter.rb +21 -47
- data/lib/arrow/table-list-formatter.rb +35 -0
- data/lib/arrow/table-table-formatter.rb +69 -0
- data/lib/arrow/table.rb +62 -40
- data/lib/arrow/version.rb +1 -1
- data/test/fixture/null-with-double-quote.csv +4 -0
- data/test/fixture/null-without-double-quote.csv +4 -0
- data/test/run-test.rb +3 -1
- data/test/test-chunked-array.rb +39 -2
- data/test/test-column.rb +13 -0
- data/test/test-csv-loader.rb +20 -0
- data/test/test-group.rb +111 -0
- data/test/test-slicer.rb +42 -0
- data/test/test-table.rb +52 -10
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d5c8c88ef78b27019b54dca4de8c914f17b80d9e
|
4
|
+
data.tar.gz: 85a527d72ea710c9f4c55e4c2db36110693a9f2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b4635c2517a7b750c7f46341bd659504e4d2cbbba6f1d2ef4f32c47c1870de891a7bf009fcd4783284f72b515eaeb2e4846121bf0b66e8eeb017d9ab5f1727c
|
7
|
+
data.tar.gz: 21a10ffe298a91ec56b5354bcc6693348fdb815db04861fc04124849b111ffadc5ddbac2582cf6d87d8f96ec2060179fb2e19866f8f7be2699d547a2e2d861dc
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,49 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.8.2 - 2018-02-04
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* `Arrow::Table#size`: Added.
|
8
|
+
|
9
|
+
* `Arrow::Table#length`: Added.
|
10
|
+
|
11
|
+
* `Arrow::Table#pack`: Added.
|
12
|
+
|
13
|
+
* `Arrow::Column#pack`: Added.
|
14
|
+
|
15
|
+
* `Arrow::ChunkedArray#pack`: Added.
|
16
|
+
|
17
|
+
* `Arrow::Column#reverse_each`: Added.
|
18
|
+
|
19
|
+
* `Arrow::Table#slice`: Added negative integer support.
|
20
|
+
|
21
|
+
* `Arrow::Slicer::ColumnCondition#in?`: Added.
|
22
|
+
|
23
|
+
* `Arrow::Table#group`: Added.
|
24
|
+
|
25
|
+
* `Arrow::ChunkedArray#null?`: Added.
|
26
|
+
|
27
|
+
* `Arrow::Column#null?`: Added.
|
28
|
+
|
29
|
+
* `Arrow::Group`: Added.
|
30
|
+
|
31
|
+
* `Arrow::CSVLoader`: Changed to treat `""` as a null value instead
|
32
|
+
of empty string.
|
33
|
+
|
34
|
+
* `Arrow::Table#[]`: Stopped to accept multiple column name.
|
35
|
+
|
36
|
+
* `Arrow::ChunkedArray#valid?`: Added.
|
37
|
+
|
38
|
+
* `Arrow::Column#valid?`: Added.
|
39
|
+
|
40
|
+
* `Arrow::Slicer::ColumnCondition#valid?`: Added.
|
41
|
+
|
42
|
+
### Fixes
|
43
|
+
|
44
|
+
* `Arrow::TableFormatter`: Fixed a bug that too much records are
|
45
|
+
formatted.
|
46
|
+
|
3
47
|
## 0.8.1 - 2018-01-05
|
4
48
|
|
5
49
|
### Improvements
|
data/lib/arrow/array-builder.rb
CHANGED
data/lib/arrow/array.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -44,6 +44,14 @@ module Arrow
|
|
44
44
|
end
|
45
45
|
end
|
46
46
|
|
47
|
+
def reverse_each
|
48
|
+
return to_enum(__method__) unless block_given?
|
49
|
+
|
50
|
+
(length - 1).downto(0) do |i|
|
51
|
+
yield(self[i])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
47
55
|
def to_arrow
|
48
56
|
self
|
49
57
|
end
|
data/lib/arrow/chunked-array.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2017 Kouhei Sutou <kou@clear-code.com>
|
1
|
+
# Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -16,9 +16,29 @@ module Arrow
|
|
16
16
|
class ChunkedArray
|
17
17
|
include Enumerable
|
18
18
|
|
19
|
+
alias_method :chunks_raw, :chunks
|
20
|
+
def chunks
|
21
|
+
@chunks ||= chunks_raw
|
22
|
+
end
|
23
|
+
|
24
|
+
def null?(i)
|
25
|
+
chunks.each do |array|
|
26
|
+
return array.null?(i) if i < array.length
|
27
|
+
i -= array.length
|
28
|
+
end
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def valid?(i)
|
33
|
+
chunks.each do |array|
|
34
|
+
return array.valid?(i) if i < array.length
|
35
|
+
i -= array.length
|
36
|
+
end
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
|
19
40
|
def [](i)
|
20
|
-
|
21
|
-
array = get_chunk(j)
|
41
|
+
chunks.each do |array|
|
22
42
|
return array[i] if i < array.length
|
23
43
|
i -= array.length
|
24
44
|
end
|
@@ -28,16 +48,32 @@ module Arrow
|
|
28
48
|
def each(&block)
|
29
49
|
return to_enum(__method__) unless block_given?
|
30
50
|
|
31
|
-
|
51
|
+
chunks.each do |array|
|
32
52
|
array.each(&block)
|
33
53
|
end
|
34
54
|
end
|
35
55
|
|
36
|
-
def
|
56
|
+
def reverse_each(&block)
|
37
57
|
return to_enum(__method__) unless block_given?
|
38
58
|
|
39
|
-
|
40
|
-
|
59
|
+
chunks.reverse_each do |array|
|
60
|
+
array.reverse_each(&block)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def each_chunk(&block)
|
65
|
+
chunks.each(&block)
|
66
|
+
end
|
67
|
+
|
68
|
+
def pack
|
69
|
+
first_chunk = chunks.first
|
70
|
+
data_type = first_chunk.value_data_type
|
71
|
+
case data_type
|
72
|
+
when TimestampDataType
|
73
|
+
builder = TimestampArrayBuilder.new(data_type)
|
74
|
+
builder.build(to_a)
|
75
|
+
else
|
76
|
+
first_chunk.class.new(to_a)
|
41
77
|
end
|
42
78
|
end
|
43
79
|
end
|
data/lib/arrow/column.rb
CHANGED
@@ -16,6 +16,14 @@ module Arrow
|
|
16
16
|
class Column
|
17
17
|
include Enumerable
|
18
18
|
|
19
|
+
def null?(i)
|
20
|
+
data.null?(i)
|
21
|
+
end
|
22
|
+
|
23
|
+
def valid?(i)
|
24
|
+
data.valid?(i)
|
25
|
+
end
|
26
|
+
|
19
27
|
def [](i)
|
20
28
|
data[i]
|
21
29
|
end
|
@@ -25,5 +33,15 @@ module Arrow
|
|
25
33
|
|
26
34
|
data.each(&block)
|
27
35
|
end
|
36
|
+
|
37
|
+
def reverse_each(&block)
|
38
|
+
return to_enum(__method__) unless block_given?
|
39
|
+
|
40
|
+
data.reverse_each(&block)
|
41
|
+
end
|
42
|
+
|
43
|
+
def pack
|
44
|
+
self.class.new(field, data.pack)
|
45
|
+
end
|
28
46
|
end
|
29
47
|
end
|
data/lib/arrow/csv-loader.rb
CHANGED
@@ -172,6 +172,9 @@ module Arrow
|
|
172
172
|
candidate_type = :date_time
|
173
173
|
when Date
|
174
174
|
candidate_type = :date
|
175
|
+
when String
|
176
|
+
next if value.empty?
|
177
|
+
candidate_type = :string
|
175
178
|
else
|
176
179
|
candidate_type = :string
|
177
180
|
end
|
@@ -189,9 +192,21 @@ module Arrow
|
|
189
192
|
when :boolean
|
190
193
|
converters << selective_converter(i, &BOOLEAN_CONVERTER)
|
191
194
|
when :integer
|
192
|
-
converters << selective_converter(i
|
195
|
+
converters << selective_converter(i) do |field|
|
196
|
+
if field.nil? or field.empty?
|
197
|
+
nil
|
198
|
+
else
|
199
|
+
CSV::Converters[:integer].call(field)
|
200
|
+
end
|
201
|
+
end
|
193
202
|
when :float
|
194
|
-
converters << selective_converter(i
|
203
|
+
converters << selective_converter(i) do |field|
|
204
|
+
if field.nil? or field.empty?
|
205
|
+
nil
|
206
|
+
else
|
207
|
+
CSV::Converters[:float].call(field)
|
208
|
+
end
|
209
|
+
end
|
195
210
|
when :time
|
196
211
|
converters << selective_converter(i, &ISO8601_CONVERTER)
|
197
212
|
when :date_time
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Arrow
|
16
|
+
class DataType
|
17
|
+
def numeric?
|
18
|
+
false
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Int8DataType
|
23
|
+
def numeric?
|
24
|
+
true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Int16DataType
|
29
|
+
def numeric?
|
30
|
+
true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class Int32DataType
|
35
|
+
def numeric?
|
36
|
+
true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class Int64DataType
|
41
|
+
def numeric?
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class UInt8DataType
|
47
|
+
def numeric?
|
48
|
+
true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class UInt16DataType
|
53
|
+
def numeric?
|
54
|
+
true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
class UInt32DataType
|
59
|
+
def numeric?
|
60
|
+
true
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class UInt64DataType
|
65
|
+
def numeric?
|
66
|
+
true
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
class FloatDataType
|
71
|
+
def numeric?
|
72
|
+
true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class DoubleDataType
|
77
|
+
def numeric?
|
78
|
+
true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/arrow/group.rb
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
# Copyright 2018 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module Arrow
|
16
|
+
class Group
|
17
|
+
def initialize(table, keys)
|
18
|
+
@table = table
|
19
|
+
@keys = keys
|
20
|
+
end
|
21
|
+
|
22
|
+
def count
|
23
|
+
key_names = @keys.collect(&:to_s)
|
24
|
+
target_columns = @table.columns.reject do |column|
|
25
|
+
key_names.include?(column.name)
|
26
|
+
end
|
27
|
+
aggregate(target_columns) do |column, indexes|
|
28
|
+
n = 0
|
29
|
+
indexes.each do |index|
|
30
|
+
n += 1 unless column.null?(index)
|
31
|
+
end
|
32
|
+
n
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def sum
|
37
|
+
key_names = @keys.collect(&:to_s)
|
38
|
+
target_columns = @table.columns.reject do |column|
|
39
|
+
key_names.include?(column.name) or
|
40
|
+
not column.data_type.numeric?
|
41
|
+
end
|
42
|
+
aggregate(target_columns) do |column, indexes|
|
43
|
+
n = 0
|
44
|
+
indexes.each do |index|
|
45
|
+
value = column[index]
|
46
|
+
n += value unless value.nil?
|
47
|
+
end
|
48
|
+
n
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def average
|
53
|
+
key_names = @keys.collect(&:to_s)
|
54
|
+
target_columns = @table.columns.reject do |column|
|
55
|
+
key_names.include?(column.name) or
|
56
|
+
not column.data_type.numeric?
|
57
|
+
end
|
58
|
+
aggregate(target_columns) do |column, indexes|
|
59
|
+
average = 0.0
|
60
|
+
n = 0
|
61
|
+
indexes.each do |index|
|
62
|
+
value = column[index]
|
63
|
+
unless value.nil?
|
64
|
+
n += 1
|
65
|
+
average += (value - average) / n
|
66
|
+
end
|
67
|
+
end
|
68
|
+
average
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def aggregate(target_columns)
|
74
|
+
sort_values = @table.n_rows.times.collect do |i|
|
75
|
+
key_values = @keys.collect do |key|
|
76
|
+
@table[key][i]
|
77
|
+
end
|
78
|
+
[key_values, i]
|
79
|
+
end
|
80
|
+
sorted = sort_values.sort_by do |key_values, i|
|
81
|
+
key_values
|
82
|
+
end
|
83
|
+
|
84
|
+
grouped_keys = []
|
85
|
+
aggregated_arrays_raw = []
|
86
|
+
target_columns.size.times do
|
87
|
+
aggregated_arrays_raw << []
|
88
|
+
end
|
89
|
+
indexes = []
|
90
|
+
sorted.each do |key_values, i|
|
91
|
+
if grouped_keys.empty?
|
92
|
+
grouped_keys << key_values
|
93
|
+
indexes.clear
|
94
|
+
indexes << i
|
95
|
+
else
|
96
|
+
if key_values == grouped_keys.last
|
97
|
+
indexes << i
|
98
|
+
else
|
99
|
+
grouped_keys << key_values
|
100
|
+
target_columns.each_with_index do |column, j|
|
101
|
+
aggregated_arrays_raw[j] << yield(column, indexes)
|
102
|
+
end
|
103
|
+
indexes.clear
|
104
|
+
indexes << i
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
target_columns.each_with_index do |column, j|
|
109
|
+
aggregated_arrays_raw[j] << yield(column, indexes)
|
110
|
+
end
|
111
|
+
|
112
|
+
grouped_key_arrays_raw = grouped_keys.transpose
|
113
|
+
columns = @keys.collect.with_index do |key, i|
|
114
|
+
key_column = @table[key]
|
115
|
+
key_column_array_class = key_column.data.chunks.first.class
|
116
|
+
if key_column_array_class == TimestampArray
|
117
|
+
builder = TimestampArrayBuilder.new(key_column.data_type)
|
118
|
+
key_column_array = builder.build(grouped_key_arrays_raw[i])
|
119
|
+
else
|
120
|
+
key_column_array =
|
121
|
+
key_column_array_class.new(grouped_key_arrays_raw[i])
|
122
|
+
end
|
123
|
+
Column.new(key_column.field, key_column_array)
|
124
|
+
end
|
125
|
+
target_columns.each_with_index do |column, i|
|
126
|
+
array = ArrayBuilder.build(aggregated_arrays_raw[i])
|
127
|
+
field = Field.new(column.name, array.value_data_type)
|
128
|
+
columns << Column.new(field, array)
|
129
|
+
end
|
130
|
+
Table.new(columns)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|