red-arrow 3.0.0 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/arrow.cpp +3 -0
  4. data/ext/arrow/converters.cpp +5 -0
  5. data/ext/arrow/converters.hpp +126 -0
  6. data/ext/arrow/extconf.rb +13 -0
  7. data/ext/arrow/memory-view.cpp +311 -0
  8. data/ext/arrow/memory-view.hpp +26 -0
  9. data/ext/arrow/raw-records.cpp +1 -0
  10. data/ext/arrow/values.cpp +1 -0
  11. data/lib/arrow/aggregate-node-options.rb +35 -0
  12. data/lib/arrow/aggregation.rb +46 -0
  13. data/lib/arrow/array-builder.rb +5 -0
  14. data/lib/arrow/array.rb +130 -0
  15. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  16. data/lib/arrow/buffer.rb +10 -6
  17. data/lib/arrow/column-containable.rb +100 -1
  18. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  19. data/lib/arrow/data-type.rb +14 -5
  20. data/lib/arrow/datum.rb +100 -0
  21. data/lib/arrow/dense-union-data-type.rb +2 -2
  22. data/lib/arrow/dictionary-data-type.rb +2 -2
  23. data/lib/arrow/equal-options.rb +38 -0
  24. data/lib/arrow/expression.rb +48 -0
  25. data/lib/arrow/file-system.rb +34 -0
  26. data/lib/arrow/group.rb +116 -124
  27. data/lib/arrow/loader.rb +46 -0
  28. data/lib/arrow/map-array-builder.rb +109 -0
  29. data/lib/arrow/map-array.rb +26 -0
  30. data/lib/arrow/map-data-type.rb +89 -0
  31. data/lib/arrow/path-extension.rb +1 -1
  32. data/lib/arrow/record-batch-reader.rb +41 -0
  33. data/lib/arrow/record-batch.rb +0 -2
  34. data/lib/arrow/scalar.rb +32 -0
  35. data/lib/arrow/slicer.rb +44 -143
  36. data/lib/arrow/sort-key.rb +193 -0
  37. data/lib/arrow/sort-options.rb +109 -0
  38. data/lib/arrow/source-node-options.rb +32 -0
  39. data/lib/arrow/sparse-union-data-type.rb +2 -2
  40. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  41. data/lib/arrow/symbol-values-appendable.rb +34 -0
  42. data/lib/arrow/table-concatenate-options.rb +36 -0
  43. data/lib/arrow/table-formatter.rb +141 -17
  44. data/lib/arrow/table-list-formatter.rb +5 -3
  45. data/lib/arrow/table-loader.rb +41 -3
  46. data/lib/arrow/table-saver.rb +29 -3
  47. data/lib/arrow/table-table-formatter.rb +7 -31
  48. data/lib/arrow/table.rb +34 -40
  49. data/lib/arrow/time32-data-type.rb +2 -2
  50. data/lib/arrow/time64-data-type.rb +2 -2
  51. data/lib/arrow/timestamp-data-type.rb +2 -2
  52. data/lib/arrow/version.rb +1 -1
  53. data/red-arrow.gemspec +2 -1
  54. data/test/helper.rb +1 -0
  55. data/test/raw-records/test-dense-union-array.rb +14 -0
  56. data/test/raw-records/test-list-array.rb +19 -0
  57. data/test/raw-records/test-map-array.rb +441 -0
  58. data/test/raw-records/test-sparse-union-array.rb +14 -0
  59. data/test/raw-records/test-struct-array.rb +15 -0
  60. data/test/test-array-builder.rb +7 -0
  61. data/test/test-array.rb +154 -0
  62. data/test/test-binary-dictionary-array-builder.rb +103 -0
  63. data/test/test-boolean-scalar.rb +26 -0
  64. data/test/test-csv-loader.rb +8 -8
  65. data/test/test-decimal128-data-type.rb +2 -2
  66. data/test/test-expression.rb +40 -0
  67. data/test/test-float-scalar.rb +46 -0
  68. data/test/test-function.rb +176 -0
  69. data/test/test-group.rb +75 -51
  70. data/test/test-map-array-builder.rb +110 -0
  71. data/test/test-map-array.rb +33 -0
  72. data/test/test-map-data-type.rb +36 -0
  73. data/test/test-memory-view.rb +434 -0
  74. data/test/test-orc.rb +19 -23
  75. data/test/test-record-batch-reader.rb +46 -0
  76. data/test/test-record-batch.rb +42 -0
  77. data/test/test-slicer.rb +166 -167
  78. data/test/test-sort-indices.rb +40 -0
  79. data/test/test-sort-key.rb +81 -0
  80. data/test/test-sort-options.rb +58 -0
  81. data/test/test-string-dictionary-array-builder.rb +103 -0
  82. data/test/test-table.rb +190 -53
  83. data/test/values/test-dense-union-array.rb +14 -0
  84. data/test/values/test-list-array.rb +17 -0
  85. data/test/values/test-map-array.rb +433 -0
  86. data/test/values/test-sparse-union-array.rb +14 -0
  87. data/test/values/test-struct-array.rb +15 -0
  88. metadata +73 -6
@@ -0,0 +1,32 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SourceNodeOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when RecordBatchReader, RecordBatch, Table
25
+ new(value)
26
+ else
27
+ nil
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -33,7 +33,7 @@ module Arrow
33
33
  # @param type_codes [::Array<Integer>] The IDs that indicates
34
34
  # corresponding fields.
35
35
  #
36
- # @example Create a sparse union data type for {2: visible, 9: count}
36
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
37
37
  # fields = [
38
38
  # Arrow::Field.new("visible", :boolean),
39
39
  # {
@@ -57,7 +57,7 @@ module Arrow
57
57
  # @option description [::Array<Integer>] :type_codes The IDs
58
58
  # that indicates corresponding fields.
59
59
  #
60
- # @example Create a sparse union data type for {2: visible, 9: count}
60
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
61
61
  # fields = [
62
62
  # Arrow::Field.new("visible", :boolean),
63
63
  # {
@@ -0,0 +1,27 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class StringDictionaryArrayBuilder
20
+ include SymbolValuesAppendable
21
+
22
+ private
23
+ def create_values_array_builder
24
+ StringArrayBuilder.new
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,34 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module SymbolValuesAppendable
20
+ def append_values(values, is_valids=nil)
21
+ builder = create_values_array_builder
22
+ values = values.collect do |value|
23
+ case value
24
+ when Symbol
25
+ value.to_s
26
+ else
27
+ value
28
+ end
29
+ end
30
+ builder.append_values(values, is_valids)
31
+ append_array(builder.finish)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class TableConcatenateOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Hash
25
+ options = new
26
+ value.each do |k, v|
27
+ options.public_send("#{k}=", value)
28
+ end
29
+ options
30
+ else
31
+ nil
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -18,6 +18,125 @@
18
18
  module Arrow
19
19
  # TODO: Almost codes should be implemented in Apache Arrow C++.
20
20
  class TableFormatter
21
+ # @private
22
+ class ColumnFormatter
23
+ attr_reader :column
24
+ attr_reader :head_values
25
+ attr_reader :tail_values
26
+ attr_reader :sample_values
27
+ def initialize(column, head_values, tail_values)
28
+ @column = column
29
+ @head_values = head_values
30
+ @tail_values = tail_values
31
+ @sample_values = head_values + tail_values
32
+ @field_value_widths = {}
33
+ end
34
+
35
+ def data_type
36
+ @data_type ||= @column.data_type
37
+ end
38
+
39
+ def name
40
+ @name ||= @column.name
41
+ end
42
+
43
+ def aligned_name
44
+ @aligned_name ||= format_aligned_name(name, data_type, @sample_values)
45
+ end
46
+
47
+ FLOAT_N_DIGITS = 10
48
+ FORMATTED_NULL = "(null)"
49
+
50
+ def format_value(value, width=0)
51
+ case value
52
+ when ::Time
53
+ value.iso8601
54
+ when Float
55
+ "%*f" % [[width, FLOAT_N_DIGITS].max, value]
56
+ when Integer
57
+ "%*d" % [width, value]
58
+ when Hash
59
+ formatted_values = data_type.fields.collect do |field|
60
+ field_name = field.name
61
+ field_value_width = compute_field_value_width(field, @sample_values)
62
+ formatted_name = format_value(field_name, 0)
63
+ formatted_value = format_value(value[field_name], field_value_width)
64
+ "#{formatted_name}: #{formatted_value}"
65
+ end
66
+ formatted = "{"
67
+ formatted << formatted_values.join(", ")
68
+ formatted << "}"
69
+ "%-*s" % [width, formatted]
70
+ when nil
71
+ "%*s" % [width, FORMATTED_NULL]
72
+ else
73
+ "%-*s" % [width, value.to_s]
74
+ end
75
+ end
76
+
77
+ private
78
+ def compute_field_value_width(field, sample_values)
79
+ unless @field_value_widths.key?(field)
80
+ field_name = field.name
81
+ field_sample_values = sample_values.collect do |v|
82
+ (v || {})[field_name]
83
+ end
84
+ field_aligned_name = format_aligned_name("",
85
+ field.data_type,
86
+ field_sample_values)
87
+ @field_value_widths[field] = field_aligned_name.size
88
+ end
89
+ @field_value_widths[field]
90
+ end
91
+
92
+ def format_aligned_name(name, data_type, sample_values)
93
+ case data_type
94
+ when TimestampDataType
95
+ "%*s" % [::Time.now.iso8601.size, name]
96
+ when IntegerDataType
97
+ have_null = false
98
+ have_negative = false
99
+ max_value = nil
100
+ sample_values.each do |value|
101
+ if value.nil?
102
+ have_null = true
103
+ else
104
+ if max_value.nil?
105
+ max_value = value.abs
106
+ else
107
+ max_value = [value.abs, max_value].max
108
+ end
109
+ have_negative = true if value.negative?
110
+ end
111
+ end
112
+ if max_value.nil?
113
+ width = 0
114
+ elsif max_value.zero?
115
+ width = 1
116
+ else
117
+ width = (Math.log10(max_value) + 1).truncate
118
+ end
119
+ width += 1 if have_negative # Need "-"
120
+ width = [width, FORMATTED_NULL.size].max if have_null
121
+ "%*s" % [width, name]
122
+ when FloatDataType, DoubleDataType
123
+ "%*s" % [FLOAT_N_DIGITS, name]
124
+ when StructDataType
125
+ field_widths = data_type.fields.collect do |field|
126
+ field_value_width = compute_field_value_width(field, sample_values)
127
+ field.name.size + ": ".size + field_value_width
128
+ end
129
+ width = "{}".size + field_widths.sum
130
+ if field_widths.size > 0
131
+ width += (", ".size * (field_widths.size - 1))
132
+ end
133
+ "%*s" % [width, name]
134
+ else
135
+ name
136
+ end
137
+ end
138
+ end
139
+
21
140
  def initialize(table, options={})
22
141
  @table = table
23
142
  @options = options
@@ -25,38 +144,43 @@ module Arrow
25
144
 
26
145
  def format
27
146
  text = ""
28
- columns = @table.columns
29
- format_header(text, columns)
30
-
31
147
  n_rows = @table.n_rows
32
- return text if n_rows.zero?
33
-
34
148
  border = @options[:border] || 10
35
- n_digits = (Math.log10(n_rows) + 1).truncate
149
+
36
150
  head_limit = [border, n_rows].min
37
- head_column_values = columns.collect do |column|
38
- column.each.take(head_limit)
151
+
152
+ tail_start = [border, n_rows - border].max
153
+ tail_limit = n_rows - tail_start
154
+
155
+ column_formatters = @table.columns.collect do |column|
156
+ head_values = column.each.take(head_limit)
157
+ if tail_limit > 0
158
+ tail_values = column.reverse_each.take(tail_limit).reverse
159
+ else
160
+ tail_values = []
161
+ end
162
+ ColumnFormatter.new(column, head_values, tail_values)
39
163
  end
164
+
165
+ format_header(text, column_formatters)
166
+ return text if n_rows.zero?
167
+
168
+ n_digits = (Math.log10(n_rows) + 1).truncate
40
169
  format_rows(text,
41
- columns,
42
- head_column_values.transpose,
170
+ column_formatters,
171
+ column_formatters.collect(&:head_values).transpose,
43
172
  n_digits,
44
173
  0)
45
174
  return text if n_rows <= border
46
175
 
47
- tail_start = [border, n_rows - border].max
48
- tail_limit = n_rows - tail_start
49
- tail_column_values = columns.collect do |column|
50
- column.reverse_each.take(tail_limit).reverse
51
- end
52
176
 
53
177
  if head_limit != tail_start
54
178
  format_ellipsis(text)
55
179
  end
56
180
 
57
181
  format_rows(text,
58
- columns,
59
- tail_column_values.transpose,
182
+ column_formatters,
183
+ column_formatters.collect(&:tail_values).transpose,
60
184
  n_digits,
61
185
  tail_start)
62
186
 
@@ -22,12 +22,14 @@ module Arrow
22
22
  def format_header(text, columns)
23
23
  end
24
24
 
25
- def format_rows(text, columns, rows, n_digits, start_offset)
25
+ def format_rows(text, column_formatters, rows, n_digits, start_offset)
26
26
  rows.each_with_index do |row, nth_row|
27
27
  text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
28
28
  row.each_with_index do |column_value, nth_column|
29
- column = columns[nth_column]
30
- text << "#{column.name}: #{column_value}\n"
29
+ column_formatter = column_formatters[nth_column]
30
+ formatted_name = column_formatter.name
31
+ formatted_value = column_formatter.format_value(column_value)
32
+ text << "#{formatted_name}: #{formatted_value}\n"
31
33
  end
32
34
  end
33
35
  end
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "uri"
19
+
18
20
  module Arrow
19
21
  class TableLoader
20
22
  class << self
@@ -31,6 +33,31 @@ module Arrow
31
33
  end
32
34
 
33
35
  def load
36
+ if @input.is_a?(URI)
37
+ custom_load_method = "load_from_uri"
38
+ elsif @input.is_a?(String) and ::File.directory?(@input)
39
+ custom_load_method = "load_from_directory"
40
+ else
41
+ custom_load_method = "load_from_file"
42
+ end
43
+ unless respond_to?(custom_load_method, true)
44
+ available_schemes = []
45
+ (methods(true) | private_methods(true)).each do |name|
46
+ match_data = /\Aload_from_/.match(name.to_s)
47
+ if match_data
48
+ available_schemes << match_data.post_match
49
+ end
50
+ end
51
+ message = "Arrow::Table load source must be one of ["
52
+ message << available_schemes.join(", ")
53
+ message << "]: #{@input.inspect}"
54
+ raise ArgumentError, message
55
+ end
56
+ __send__(custom_load_method)
57
+ end
58
+
59
+ private
60
+ def load_from_file
34
61
  format = @options[:format]
35
62
  custom_load_method = "load_as_#{format}"
36
63
  unless respond_to?(custom_load_method, true)
@@ -56,21 +83,24 @@ module Arrow
56
83
  end
57
84
  end
58
85
 
59
- private
60
86
  def fill_options
61
87
  if @options[:format] and @options.key?(:compression)
62
88
  return
63
89
  end
64
90
 
65
- if @input.is_a?(Buffer)
91
+ case @input
92
+ when Buffer
66
93
  info = {}
94
+ when URI
95
+ extension = PathExtension.new(@input.path)
96
+ info = extension.extract
67
97
  else
68
98
  extension = PathExtension.new(@input)
69
99
  info = extension.extract
70
100
  end
71
101
  format = info[:format]
72
102
  @options = @options.dup
73
- if format and respond_to?("load_as_#{format}", true)
103
+ if format
74
104
  @options[:format] ||= format.to_sym
75
105
  else
76
106
  @options[:format] ||= :arrow
@@ -183,5 +213,13 @@ module Arrow
183
213
  table.instance_variable_set(:@input, input)
184
214
  table
185
215
  end
216
+
217
+ def load_as_json
218
+ input = open_input_stream
219
+ reader = JSONReader.new(input)
220
+ table = reader.read
221
+ table.instance_variable_set(:@input, input)
222
+ table
223
+ end
186
224
  end
187
225
  end
@@ -32,6 +32,29 @@ module Arrow
32
32
  end
33
33
 
34
34
  def save
35
+ if @output.is_a?(URI)
36
+ custom_save_method = "save_to_uri"
37
+ else
38
+ custom_save_method = "save_to_file"
39
+ end
40
+ unless respond_to?(custom_save_method, true)
41
+ available_schemes = []
42
+ (methods(true) | private_methods(true)).each do |name|
43
+ match_data = /\Asave_to_/.match(name.to_s)
44
+ if match_data
45
+ available_schemes << match_data.post_match
46
+ end
47
+ end
48
+ message = "Arrow::Table save source must be one of ["
49
+ message << available_schemes.join(", ")
50
+ message << "]: #{@output.scheme.inspect}"
51
+ raise ArgumentError, message
52
+ end
53
+ __send__(custom_save_method)
54
+ end
55
+
56
+ private
57
+ def save_to_file
35
58
  format = @options[:format]
36
59
  custom_save_method = "save_as_#{format}"
37
60
  unless respond_to?(custom_save_method, true)
@@ -57,21 +80,24 @@ module Arrow
57
80
  end
58
81
  end
59
82
 
60
- private
61
83
  def fill_options
62
84
  if @options[:format] and @options.key?(:compression)
63
85
  return
64
86
  end
65
87
 
66
- if @output.is_a?(Buffer)
88
+ case @output
89
+ when Buffer
67
90
  info = {}
91
+ when URI
92
+ extension = PathExtension.new(@output.path)
93
+ info = extension.extract
68
94
  else
69
95
  extension = PathExtension.new(@output)
70
96
  info = extension.extract
71
97
  end
72
98
  format = info[:format]
73
99
  @options = @options.dup
74
- if format and respond_to?("save_as_#{format}", true)
100
+ if format
75
101
  @options[:format] ||= format.to_sym
76
102
  else
77
103
  @options[:format] ||= :arrow
@@ -21,51 +21,27 @@ module Arrow
21
21
  # TODO: Almost codes should be implemented in Apache Arrow C++.
22
22
  class TableTableFormatter < TableFormatter
23
23
  private
24
- def format_header(text, columns)
25
- columns.each do |column|
24
+ def format_header(text, column_formatters)
25
+ column_formatters.each do |column_formatter|
26
26
  text << "\t"
27
- text << format_column_name(column)
27
+ text << column_formatter.aligned_name
28
28
  end
29
29
  text << "\n"
30
30
  end
31
31
 
32
- FLOAT_N_DIGITS = 10
33
- def format_column_name(column)
34
- case column.data_type
35
- when TimestampDataType
36
- "%*s" % [::Time.now.iso8601.size, column.name]
37
- when FloatDataType, DoubleDataType
38
- "%*s" % [FLOAT_N_DIGITS, column.name]
39
- else
40
- column.name
41
- end
42
- end
43
-
44
- def format_rows(text, columns, rows, n_digits, start_offset)
32
+ def format_rows(text, column_formatters, rows, n_digits, start_offset)
45
33
  rows.each_with_index do |row, nth_row|
46
34
  text << ("%*d" % [n_digits, start_offset + nth_row])
47
35
  row.each_with_index do |column_value, nth_column|
48
36
  text << "\t"
49
- column = columns[nth_column]
50
- text << format_column_value(column, column_value)
37
+ column_formatter = column_formatters[nth_column]
38
+ aligned_name = column_formatter.aligned_name
39
+ text << column_formatter.format_value(column_value, aligned_name.size)
51
40
  end
52
41
  text << "\n"
53
42
  end
54
43
  end
55
44
 
56
- def format_column_value(column, value)
57
- case value
58
- when ::Time
59
- value.iso8601
60
- when Float
61
- "%*f" % [[column.name.size, FLOAT_N_DIGITS].max, value]
62
- when Integer
63
- "%*d" % [column.name.size, value]
64
- else
65
- "%-*s" % [column.name.size, value.to_s]
66
- end
67
- end
68
-
69
45
  def format_ellipsis(text)
70
46
  text << "...\n"
71
47
  end
data/lib/arrow/table.rb CHANGED
@@ -195,8 +195,6 @@ module Arrow
195
195
  alias_method :size, :n_rows
196
196
  alias_method :length, :n_rows
197
197
 
198
- alias_method :[], :find_column
199
-
200
198
  alias_method :slice_raw, :slice
201
199
 
202
200
  # @overload slice(offset, length)
@@ -236,6 +234,12 @@ module Arrow
236
234
  # @return [Arrow::Table]
237
235
  # The sub `Arrow::Table` that covers only rows of the range of indices.
238
236
  #
237
+ # @overload slice(conditions)
238
+ #
239
+ # @param conditions [Hash] The conditions to select records.
240
+ # @return [Arrow::Table]
241
+ # The sub `Arrow::Table` that covers only rows matched by condition
242
+ #
239
243
  # @overload slice
240
244
  #
241
245
  # @yield [slicer] Gives slicer that constructs condition to select records.
@@ -263,12 +267,37 @@ module Arrow
263
267
  expected_n_args = nil
264
268
  case args.size
265
269
  when 1
266
- if args[0].is_a?(Integer)
270
+ case args[0]
271
+ when Integer
267
272
  index = args[0]
268
273
  index += n_rows if index < 0
269
274
  return nil if index < 0
270
275
  return nil if index >= n_rows
271
276
  return Record.new(self, index)
277
+ when Hash
278
+ condition_pairs = args[0]
279
+ slicer = Slicer.new(self)
280
+ conditions = []
281
+ condition_pairs.each do |key, value|
282
+ case value
283
+ when Range
284
+ # TODO: Optimize "begin <= key <= end" case by missing "between" kernel
285
+ # https://issues.apache.org/jira/browse/ARROW-9843
286
+ unless value.begin.nil?
287
+ conditions << (slicer[key] >= value.begin)
288
+ end
289
+ unless value.end.nil?
290
+ if value.exclude_end?
291
+ conditions << (slicer[key] < value.end)
292
+ else
293
+ conditions << (slicer[key] <= value.end)
294
+ end
295
+ end
296
+ else
297
+ conditions << (slicer[key] == value)
298
+ end
299
+ end
300
+ slicers << conditions.inject(:&)
272
301
  else
273
302
  slicers << args[0]
274
303
  end
@@ -397,41 +426,6 @@ module Arrow
397
426
  remove_column_raw(index)
398
427
  end
399
428
 
400
- # TODO
401
- #
402
- # @return [Arrow::Table]
403
- def select_columns(*selectors, &block)
404
- if selectors.empty?
405
- return to_enum(__method__) unless block_given?
406
- selected_columns = columns.select(&block)
407
- else
408
- selected_columns = []
409
- selectors.each do |selector|
410
- case selector
411
- when String, Symbol
412
- column = find_column(selector)
413
- if column.nil?
414
- message = "unknown column: #{selector.inspect}: #{inspect}"
415
- raise KeyError.new(message)
416
- end
417
- selected_columns << column
418
- when Range
419
- selected_columns.concat(columns[selector])
420
- else
421
- column = columns[selector]
422
- if column.nil?
423
- message = "out of index (0..#{n_columns - 1}): " +
424
- "#{selector.inspect}: #{inspect}"
425
- raise IndexError.new(message)
426
- end
427
- selected_columns << column
428
- end
429
- end
430
- selected_columns = selected_columns.select(&block) if block_given?
431
- end
432
- self.class.new(selected_columns)
433
- end
434
-
435
429
  # Experimental
436
430
  def group(*keys)
437
431
  Group.new(self, keys)
@@ -442,8 +436,8 @@ module Arrow
442
436
  RollingWindow.new(self, size)
443
437
  end
444
438
 
445
- def save(path, options={})
446
- saver = TableSaver.new(self, path, options)
439
+ def save(output, options={})
440
+ saver = TableSaver.new(self, output, options)
447
441
  saver.save
448
442
  end
449
443
 
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be second or millisecond.
31
31
  #
32
- # @example Create a time32 data type with {Arrow::TimeUnit}
32
+ # @example Create a time32 data type with Arrow::TimeUnit
33
33
  # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
34
34
  #
35
35
  # @example Create a time32 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be second or millisecond.
47
47
  #
48
- # @example Create a time32 data type with {Arrow::TimeUnit}
48
+ # @example Create a time32 data type with Arrow::TimeUnit
49
49
  # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
50
50
  #
51
51
  # @example Create a time32 data type with Symbol