red-arrow 0.8.1 → 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 59cce19c00fbc436f48f29c2d58aca2685330a63
4
- data.tar.gz: 280b1305e60c39d74d4fd101dc43387cd398e770
3
+ metadata.gz: d5c8c88ef78b27019b54dca4de8c914f17b80d9e
4
+ data.tar.gz: 85a527d72ea710c9f4c55e4c2db36110693a9f2c
5
5
  SHA512:
6
- metadata.gz: 3ecba86d0d061186def935c54a999f1c0adf1f109a1687eefd0375792a87f34333e7a46ee7f78fe51b01fbe294166491f300d9e46a8315e19ff47a1cb3f45102
7
- data.tar.gz: 49d09794b0d007bf9e65675e133c45cd1a90a3eec748b9c747d2e7d78e4c21d682ba503c696cb61d128fec362fa138ec50b04ab780a0bf554557417ccaf02afd
6
+ metadata.gz: 7b4635c2517a7b750c7f46341bd659504e4d2cbbba6f1d2ef4f32c47c1870de891a7bf009fcd4783284f72b515eaeb2e4846121bf0b66e8eeb017d9ab5f1727c
7
+ data.tar.gz: 21a10ffe298a91ec56b5354bcc6693348fdb815db04861fc04124849b111ffadc5ddbac2582cf6d87d8f96ec2060179fb2e19866f8f7be2699d547a2e2d861dc
data/doc/text/news.md CHANGED
@@ -1,5 +1,49 @@
1
1
  # News
2
2
 
3
+ ## 0.8.2 - 2018-02-04
4
+
5
+ ### Improvements
6
+
7
+ * `Arrow::Table#size`: Added.
8
+
9
+ * `Arrow::Table#length`: Added.
10
+
11
+ * `Arrow::Table#pack`: Added.
12
+
13
+ * `Arrow::Column#pack`: Added.
14
+
15
+ * `Arrow::ChunkedArray#pack`: Added.
16
+
17
+ * `Arrow::Column#reverse_each`: Added.
18
+
19
+ * `Arrow::Table#slice`: Added negative integer support.
20
+
21
+ * `Arrow::Slicer::ColumnCondition#in?`: Added.
22
+
23
+ * `Arrow::Table#group`: Added.
24
+
25
+ * `Arrow::ChunkedArray#null?`: Added.
26
+
27
+ * `Arrow::Column#null?`: Added.
28
+
29
+ * `Arrow::Group`: Added.
30
+
31
+ * `Arrow::CSVLoader`: Changed to treat `""` as a null value instead
32
+ of empty string.
33
+
34
+ * `Arrow::Table#[]`: Stopped to accept multiple column name.
35
+
36
+ * `Arrow::ChunkedArray#valid?`: Added.
37
+
38
+ * `Arrow::Column#valid?`: Added.
39
+
40
+ * `Arrow::Slicer::ColumnCondition#valid?`: Added.
41
+
42
+ ### Fixes
43
+
44
+ * `Arrow::TableFormatter`: Fixed a bug that too much records are
45
+ formatted.
46
+
3
47
  ## 0.8.1 - 2018-01-05
4
48
 
5
49
  ### Improvements
@@ -28,7 +28,6 @@ module Arrow
28
28
  case value
29
29
  when nil
30
30
  # Ignore
31
- nil
32
31
  when true, false
33
32
  return BooleanArray.new(values)
34
33
  when String
data/lib/arrow/array.rb CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -44,6 +44,14 @@ module Arrow
44
44
  end
45
45
  end
46
46
 
47
+ def reverse_each
48
+ return to_enum(__method__) unless block_given?
49
+
50
+ (length - 1).downto(0) do |i|
51
+ yield(self[i])
52
+ end
53
+ end
54
+
47
55
  def to_arrow
48
56
  self
49
57
  end
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -16,9 +16,29 @@ module Arrow
16
16
  class ChunkedArray
17
17
  include Enumerable
18
18
 
19
+ alias_method :chunks_raw, :chunks
20
+ def chunks
21
+ @chunks ||= chunks_raw
22
+ end
23
+
24
+ def null?(i)
25
+ chunks.each do |array|
26
+ return array.null?(i) if i < array.length
27
+ i -= array.length
28
+ end
29
+ nil
30
+ end
31
+
32
+ def valid?(i)
33
+ chunks.each do |array|
34
+ return array.valid?(i) if i < array.length
35
+ i -= array.length
36
+ end
37
+ nil
38
+ end
39
+
19
40
  def [](i)
20
- n_chunks.times do |j|
21
- array = get_chunk(j)
41
+ chunks.each do |array|
22
42
  return array[i] if i < array.length
23
43
  i -= array.length
24
44
  end
@@ -28,16 +48,32 @@ module Arrow
28
48
  def each(&block)
29
49
  return to_enum(__method__) unless block_given?
30
50
 
31
- each_chunk do |array|
51
+ chunks.each do |array|
32
52
  array.each(&block)
33
53
  end
34
54
  end
35
55
 
36
- def each_chunk
56
+ def reverse_each(&block)
37
57
  return to_enum(__method__) unless block_given?
38
58
 
39
- n_chunks.times do |i|
40
- yield(get_chunk(i))
59
+ chunks.reverse_each do |array|
60
+ array.reverse_each(&block)
61
+ end
62
+ end
63
+
64
+ def each_chunk(&block)
65
+ chunks.each(&block)
66
+ end
67
+
68
+ def pack
69
+ first_chunk = chunks.first
70
+ data_type = first_chunk.value_data_type
71
+ case data_type
72
+ when TimestampDataType
73
+ builder = TimestampArrayBuilder.new(data_type)
74
+ builder.build(to_a)
75
+ else
76
+ first_chunk.class.new(to_a)
41
77
  end
42
78
  end
43
79
  end
data/lib/arrow/column.rb CHANGED
@@ -16,6 +16,14 @@ module Arrow
16
16
  class Column
17
17
  include Enumerable
18
18
 
19
+ def null?(i)
20
+ data.null?(i)
21
+ end
22
+
23
+ def valid?(i)
24
+ data.valid?(i)
25
+ end
26
+
19
27
  def [](i)
20
28
  data[i]
21
29
  end
@@ -25,5 +33,15 @@ module Arrow
25
33
 
26
34
  data.each(&block)
27
35
  end
36
+
37
+ def reverse_each(&block)
38
+ return to_enum(__method__) unless block_given?
39
+
40
+ data.reverse_each(&block)
41
+ end
42
+
43
+ def pack
44
+ self.class.new(field, data.pack)
45
+ end
28
46
  end
29
47
  end
@@ -172,6 +172,9 @@ module Arrow
172
172
  candidate_type = :date_time
173
173
  when Date
174
174
  candidate_type = :date
175
+ when String
176
+ next if value.empty?
177
+ candidate_type = :string
175
178
  else
176
179
  candidate_type = :string
177
180
  end
@@ -189,9 +192,21 @@ module Arrow
189
192
  when :boolean
190
193
  converters << selective_converter(i, &BOOLEAN_CONVERTER)
191
194
  when :integer
192
- converters << selective_converter(i, &CSV::Converters[:integer])
195
+ converters << selective_converter(i) do |field|
196
+ if field.nil? or field.empty?
197
+ nil
198
+ else
199
+ CSV::Converters[:integer].call(field)
200
+ end
201
+ end
193
202
  when :float
194
- converters << selective_converter(i, &CSV::Converters[:float])
203
+ converters << selective_converter(i) do |field|
204
+ if field.nil? or field.empty?
205
+ nil
206
+ else
207
+ CSV::Converters[:float].call(field)
208
+ end
209
+ end
195
210
  when :time
196
211
  converters << selective_converter(i, &ISO8601_CONVERTER)
197
212
  when :date_time
@@ -0,0 +1,81 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class DataType
17
+ def numeric?
18
+ false
19
+ end
20
+ end
21
+
22
+ class Int8DataType
23
+ def numeric?
24
+ true
25
+ end
26
+ end
27
+
28
+ class Int16DataType
29
+ def numeric?
30
+ true
31
+ end
32
+ end
33
+
34
+ class Int32DataType
35
+ def numeric?
36
+ true
37
+ end
38
+ end
39
+
40
+ class Int64DataType
41
+ def numeric?
42
+ true
43
+ end
44
+ end
45
+
46
+ class UInt8DataType
47
+ def numeric?
48
+ true
49
+ end
50
+ end
51
+
52
+ class UInt16DataType
53
+ def numeric?
54
+ true
55
+ end
56
+ end
57
+
58
+ class UInt32DataType
59
+ def numeric?
60
+ true
61
+ end
62
+ end
63
+
64
+ class UInt64DataType
65
+ def numeric?
66
+ true
67
+ end
68
+ end
69
+
70
+ class FloatDataType
71
+ def numeric?
72
+ true
73
+ end
74
+ end
75
+
76
+ class DoubleDataType
77
+ def numeric?
78
+ true
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,133 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Group
17
+ def initialize(table, keys)
18
+ @table = table
19
+ @keys = keys
20
+ end
21
+
22
+ def count
23
+ key_names = @keys.collect(&:to_s)
24
+ target_columns = @table.columns.reject do |column|
25
+ key_names.include?(column.name)
26
+ end
27
+ aggregate(target_columns) do |column, indexes|
28
+ n = 0
29
+ indexes.each do |index|
30
+ n += 1 unless column.null?(index)
31
+ end
32
+ n
33
+ end
34
+ end
35
+
36
+ def sum
37
+ key_names = @keys.collect(&:to_s)
38
+ target_columns = @table.columns.reject do |column|
39
+ key_names.include?(column.name) or
40
+ not column.data_type.numeric?
41
+ end
42
+ aggregate(target_columns) do |column, indexes|
43
+ n = 0
44
+ indexes.each do |index|
45
+ value = column[index]
46
+ n += value unless value.nil?
47
+ end
48
+ n
49
+ end
50
+ end
51
+
52
+ def average
53
+ key_names = @keys.collect(&:to_s)
54
+ target_columns = @table.columns.reject do |column|
55
+ key_names.include?(column.name) or
56
+ not column.data_type.numeric?
57
+ end
58
+ aggregate(target_columns) do |column, indexes|
59
+ average = 0.0
60
+ n = 0
61
+ indexes.each do |index|
62
+ value = column[index]
63
+ unless value.nil?
64
+ n += 1
65
+ average += (value - average) / n
66
+ end
67
+ end
68
+ average
69
+ end
70
+ end
71
+
72
+ private
73
+ def aggregate(target_columns)
74
+ sort_values = @table.n_rows.times.collect do |i|
75
+ key_values = @keys.collect do |key|
76
+ @table[key][i]
77
+ end
78
+ [key_values, i]
79
+ end
80
+ sorted = sort_values.sort_by do |key_values, i|
81
+ key_values
82
+ end
83
+
84
+ grouped_keys = []
85
+ aggregated_arrays_raw = []
86
+ target_columns.size.times do
87
+ aggregated_arrays_raw << []
88
+ end
89
+ indexes = []
90
+ sorted.each do |key_values, i|
91
+ if grouped_keys.empty?
92
+ grouped_keys << key_values
93
+ indexes.clear
94
+ indexes << i
95
+ else
96
+ if key_values == grouped_keys.last
97
+ indexes << i
98
+ else
99
+ grouped_keys << key_values
100
+ target_columns.each_with_index do |column, j|
101
+ aggregated_arrays_raw[j] << yield(column, indexes)
102
+ end
103
+ indexes.clear
104
+ indexes << i
105
+ end
106
+ end
107
+ end
108
+ target_columns.each_with_index do |column, j|
109
+ aggregated_arrays_raw[j] << yield(column, indexes)
110
+ end
111
+
112
+ grouped_key_arrays_raw = grouped_keys.transpose
113
+ columns = @keys.collect.with_index do |key, i|
114
+ key_column = @table[key]
115
+ key_column_array_class = key_column.data.chunks.first.class
116
+ if key_column_array_class == TimestampArray
117
+ builder = TimestampArrayBuilder.new(key_column.data_type)
118
+ key_column_array = builder.build(grouped_key_arrays_raw[i])
119
+ else
120
+ key_column_array =
121
+ key_column_array_class.new(grouped_key_arrays_raw[i])
122
+ end
123
+ Column.new(key_column.field, key_column_array)
124
+ end
125
+ target_columns.each_with_index do |column, i|
126
+ array = ArrayBuilder.build(aggregated_arrays_raw[i])
127
+ field = Field.new(column.name, array.value_data_type)
128
+ columns << Column.new(field, array)
129
+ end
130
+ Table.new(columns)
131
+ end
132
+ end
133
+ end