red-arrow 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 59cce19c00fbc436f48f29c2d58aca2685330a63
4
- data.tar.gz: 280b1305e60c39d74d4fd101dc43387cd398e770
3
+ metadata.gz: d5c8c88ef78b27019b54dca4de8c914f17b80d9e
4
+ data.tar.gz: 85a527d72ea710c9f4c55e4c2db36110693a9f2c
5
5
  SHA512:
6
- metadata.gz: 3ecba86d0d061186def935c54a999f1c0adf1f109a1687eefd0375792a87f34333e7a46ee7f78fe51b01fbe294166491f300d9e46a8315e19ff47a1cb3f45102
7
- data.tar.gz: 49d09794b0d007bf9e65675e133c45cd1a90a3eec748b9c747d2e7d78e4c21d682ba503c696cb61d128fec362fa138ec50b04ab780a0bf554557417ccaf02afd
6
+ metadata.gz: 7b4635c2517a7b750c7f46341bd659504e4d2cbbba6f1d2ef4f32c47c1870de891a7bf009fcd4783284f72b515eaeb2e4846121bf0b66e8eeb017d9ab5f1727c
7
+ data.tar.gz: 21a10ffe298a91ec56b5354bcc6693348fdb815db04861fc04124849b111ffadc5ddbac2582cf6d87d8f96ec2060179fb2e19866f8f7be2699d547a2e2d861dc
data/doc/text/news.md CHANGED
@@ -1,5 +1,49 @@
1
1
  # News
2
2
 
3
+ ## 0.8.2 - 2018-02-04
4
+
5
+ ### Improvements
6
+
7
+ * `Arrow::Table#size`: Added.
8
+
9
+ * `Arrow::Table#length`: Added.
10
+
11
+ * `Arrow::Table#pack`: Added.
12
+
13
+ * `Arrow::Column#pack`: Added.
14
+
15
+ * `Arrow::ChunkedArray#pack`: Added.
16
+
17
+ * `Arrow::Column#reverse_each`: Added.
18
+
19
+ * `Arrow::Table#slice`: Added negative integer support.
20
+
21
+ * `Arrow::Slicer::ColumnCondition#in?`: Added.
22
+
23
+ * `Arrow::Table#group`: Added.
24
+
25
+ * `Arrow::ChunkedArray#null?`: Added.
26
+
27
+ * `Arrow::Column#null?`: Added.
28
+
29
+ * `Arrow::Group`: Added.
30
+
31
+ * `Arrow::CSVLoader`: Changed to treat `""` as a null value instead
32
+ of empty string.
33
+
34
+ * `Arrow::Table#[]`: Stopped to accept multiple column name.
35
+
36
+ * `Arrow::ChunkedArray#valid?`: Added.
37
+
38
+ * `Arrow::Column#valid?`: Added.
39
+
40
+ * `Arrow::Slicer::ColumnCondition#valid?`: Added.
41
+
42
+ ### Fixes
43
+
44
+ * `Arrow::TableFormatter`: Fixed a bug that too much records are
45
+ formatted.
46
+
3
47
  ## 0.8.1 - 2018-01-05
4
48
 
5
49
  ### Improvements
@@ -28,7 +28,6 @@ module Arrow
28
28
  case value
29
29
  when nil
30
30
  # Ignore
31
- nil
32
31
  when true, false
33
32
  return BooleanArray.new(values)
34
33
  when String
data/lib/arrow/array.rb CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -44,6 +44,14 @@ module Arrow
44
44
  end
45
45
  end
46
46
 
47
+ def reverse_each
48
+ return to_enum(__method__) unless block_given?
49
+
50
+ (length - 1).downto(0) do |i|
51
+ yield(self[i])
52
+ end
53
+ end
54
+
47
55
  def to_arrow
48
56
  self
49
57
  end
@@ -1,4 +1,4 @@
1
- # Copyright 2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright 2017-2018 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -16,9 +16,29 @@ module Arrow
16
16
  class ChunkedArray
17
17
  include Enumerable
18
18
 
19
+ alias_method :chunks_raw, :chunks
20
+ def chunks
21
+ @chunks ||= chunks_raw
22
+ end
23
+
24
+ def null?(i)
25
+ chunks.each do |array|
26
+ return array.null?(i) if i < array.length
27
+ i -= array.length
28
+ end
29
+ nil
30
+ end
31
+
32
+ def valid?(i)
33
+ chunks.each do |array|
34
+ return array.valid?(i) if i < array.length
35
+ i -= array.length
36
+ end
37
+ nil
38
+ end
39
+
19
40
  def [](i)
20
- n_chunks.times do |j|
21
- array = get_chunk(j)
41
+ chunks.each do |array|
22
42
  return array[i] if i < array.length
23
43
  i -= array.length
24
44
  end
@@ -28,16 +48,32 @@ module Arrow
28
48
  def each(&block)
29
49
  return to_enum(__method__) unless block_given?
30
50
 
31
- each_chunk do |array|
51
+ chunks.each do |array|
32
52
  array.each(&block)
33
53
  end
34
54
  end
35
55
 
36
- def each_chunk
56
+ def reverse_each(&block)
37
57
  return to_enum(__method__) unless block_given?
38
58
 
39
- n_chunks.times do |i|
40
- yield(get_chunk(i))
59
+ chunks.reverse_each do |array|
60
+ array.reverse_each(&block)
61
+ end
62
+ end
63
+
64
+ def each_chunk(&block)
65
+ chunks.each(&block)
66
+ end
67
+
68
+ def pack
69
+ first_chunk = chunks.first
70
+ data_type = first_chunk.value_data_type
71
+ case data_type
72
+ when TimestampDataType
73
+ builder = TimestampArrayBuilder.new(data_type)
74
+ builder.build(to_a)
75
+ else
76
+ first_chunk.class.new(to_a)
41
77
  end
42
78
  end
43
79
  end
data/lib/arrow/column.rb CHANGED
@@ -16,6 +16,14 @@ module Arrow
16
16
  class Column
17
17
  include Enumerable
18
18
 
19
+ def null?(i)
20
+ data.null?(i)
21
+ end
22
+
23
+ def valid?(i)
24
+ data.valid?(i)
25
+ end
26
+
19
27
  def [](i)
20
28
  data[i]
21
29
  end
@@ -25,5 +33,15 @@ module Arrow
25
33
 
26
34
  data.each(&block)
27
35
  end
36
+
37
+ def reverse_each(&block)
38
+ return to_enum(__method__) unless block_given?
39
+
40
+ data.reverse_each(&block)
41
+ end
42
+
43
+ def pack
44
+ self.class.new(field, data.pack)
45
+ end
28
46
  end
29
47
  end
@@ -172,6 +172,9 @@ module Arrow
172
172
  candidate_type = :date_time
173
173
  when Date
174
174
  candidate_type = :date
175
+ when String
176
+ next if value.empty?
177
+ candidate_type = :string
175
178
  else
176
179
  candidate_type = :string
177
180
  end
@@ -189,9 +192,21 @@ module Arrow
189
192
  when :boolean
190
193
  converters << selective_converter(i, &BOOLEAN_CONVERTER)
191
194
  when :integer
192
- converters << selective_converter(i, &CSV::Converters[:integer])
195
+ converters << selective_converter(i) do |field|
196
+ if field.nil? or field.empty?
197
+ nil
198
+ else
199
+ CSV::Converters[:integer].call(field)
200
+ end
201
+ end
193
202
  when :float
194
- converters << selective_converter(i, &CSV::Converters[:float])
203
+ converters << selective_converter(i) do |field|
204
+ if field.nil? or field.empty?
205
+ nil
206
+ else
207
+ CSV::Converters[:float].call(field)
208
+ end
209
+ end
195
210
  when :time
196
211
  converters << selective_converter(i, &ISO8601_CONVERTER)
197
212
  when :date_time
@@ -0,0 +1,81 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class DataType
17
+ def numeric?
18
+ false
19
+ end
20
+ end
21
+
22
+ class Int8DataType
23
+ def numeric?
24
+ true
25
+ end
26
+ end
27
+
28
+ class Int16DataType
29
+ def numeric?
30
+ true
31
+ end
32
+ end
33
+
34
+ class Int32DataType
35
+ def numeric?
36
+ true
37
+ end
38
+ end
39
+
40
+ class Int64DataType
41
+ def numeric?
42
+ true
43
+ end
44
+ end
45
+
46
+ class UInt8DataType
47
+ def numeric?
48
+ true
49
+ end
50
+ end
51
+
52
+ class UInt16DataType
53
+ def numeric?
54
+ true
55
+ end
56
+ end
57
+
58
+ class UInt32DataType
59
+ def numeric?
60
+ true
61
+ end
62
+ end
63
+
64
+ class UInt64DataType
65
+ def numeric?
66
+ true
67
+ end
68
+ end
69
+
70
+ class FloatDataType
71
+ def numeric?
72
+ true
73
+ end
74
+ end
75
+
76
+ class DoubleDataType
77
+ def numeric?
78
+ true
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,133 @@
1
+ # Copyright 2018 Kouhei Sutou <kou@clear-code.com>
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Arrow
16
+ class Group
17
+ def initialize(table, keys)
18
+ @table = table
19
+ @keys = keys
20
+ end
21
+
22
+ def count
23
+ key_names = @keys.collect(&:to_s)
24
+ target_columns = @table.columns.reject do |column|
25
+ key_names.include?(column.name)
26
+ end
27
+ aggregate(target_columns) do |column, indexes|
28
+ n = 0
29
+ indexes.each do |index|
30
+ n += 1 unless column.null?(index)
31
+ end
32
+ n
33
+ end
34
+ end
35
+
36
+ def sum
37
+ key_names = @keys.collect(&:to_s)
38
+ target_columns = @table.columns.reject do |column|
39
+ key_names.include?(column.name) or
40
+ not column.data_type.numeric?
41
+ end
42
+ aggregate(target_columns) do |column, indexes|
43
+ n = 0
44
+ indexes.each do |index|
45
+ value = column[index]
46
+ n += value unless value.nil?
47
+ end
48
+ n
49
+ end
50
+ end
51
+
52
+ def average
53
+ key_names = @keys.collect(&:to_s)
54
+ target_columns = @table.columns.reject do |column|
55
+ key_names.include?(column.name) or
56
+ not column.data_type.numeric?
57
+ end
58
+ aggregate(target_columns) do |column, indexes|
59
+ average = 0.0
60
+ n = 0
61
+ indexes.each do |index|
62
+ value = column[index]
63
+ unless value.nil?
64
+ n += 1
65
+ average += (value - average) / n
66
+ end
67
+ end
68
+ average
69
+ end
70
+ end
71
+
72
+ private
73
+ def aggregate(target_columns)
74
+ sort_values = @table.n_rows.times.collect do |i|
75
+ key_values = @keys.collect do |key|
76
+ @table[key][i]
77
+ end
78
+ [key_values, i]
79
+ end
80
+ sorted = sort_values.sort_by do |key_values, i|
81
+ key_values
82
+ end
83
+
84
+ grouped_keys = []
85
+ aggregated_arrays_raw = []
86
+ target_columns.size.times do
87
+ aggregated_arrays_raw << []
88
+ end
89
+ indexes = []
90
+ sorted.each do |key_values, i|
91
+ if grouped_keys.empty?
92
+ grouped_keys << key_values
93
+ indexes.clear
94
+ indexes << i
95
+ else
96
+ if key_values == grouped_keys.last
97
+ indexes << i
98
+ else
99
+ grouped_keys << key_values
100
+ target_columns.each_with_index do |column, j|
101
+ aggregated_arrays_raw[j] << yield(column, indexes)
102
+ end
103
+ indexes.clear
104
+ indexes << i
105
+ end
106
+ end
107
+ end
108
+ target_columns.each_with_index do |column, j|
109
+ aggregated_arrays_raw[j] << yield(column, indexes)
110
+ end
111
+
112
+ grouped_key_arrays_raw = grouped_keys.transpose
113
+ columns = @keys.collect.with_index do |key, i|
114
+ key_column = @table[key]
115
+ key_column_array_class = key_column.data.chunks.first.class
116
+ if key_column_array_class == TimestampArray
117
+ builder = TimestampArrayBuilder.new(key_column.data_type)
118
+ key_column_array = builder.build(grouped_key_arrays_raw[i])
119
+ else
120
+ key_column_array =
121
+ key_column_array_class.new(grouped_key_arrays_raw[i])
122
+ end
123
+ Column.new(key_column.field, key_column_array)
124
+ end
125
+ target_columns.each_with_index do |column, i|
126
+ array = ArrayBuilder.build(aggregated_arrays_raw[i])
127
+ field = Field.new(column.name, array.value_data_type)
128
+ columns << Column.new(field, array)
129
+ end
130
+ Table.new(columns)
131
+ end
132
+ end
133
+ end