red-arrow 3.0.0 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -0
  3. data/ext/arrow/arrow.cpp +3 -0
  4. data/ext/arrow/converters.cpp +5 -0
  5. data/ext/arrow/converters.hpp +126 -0
  6. data/ext/arrow/extconf.rb +13 -0
  7. data/ext/arrow/memory-view.cpp +311 -0
  8. data/ext/arrow/memory-view.hpp +26 -0
  9. data/ext/arrow/raw-records.cpp +1 -0
  10. data/ext/arrow/values.cpp +1 -0
  11. data/lib/arrow/aggregate-node-options.rb +35 -0
  12. data/lib/arrow/aggregation.rb +46 -0
  13. data/lib/arrow/array-builder.rb +5 -0
  14. data/lib/arrow/array.rb +130 -0
  15. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  16. data/lib/arrow/buffer.rb +10 -6
  17. data/lib/arrow/column-containable.rb +100 -1
  18. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  19. data/lib/arrow/data-type.rb +14 -5
  20. data/lib/arrow/datum.rb +100 -0
  21. data/lib/arrow/dense-union-data-type.rb +2 -2
  22. data/lib/arrow/dictionary-data-type.rb +2 -2
  23. data/lib/arrow/equal-options.rb +38 -0
  24. data/lib/arrow/expression.rb +48 -0
  25. data/lib/arrow/file-system.rb +34 -0
  26. data/lib/arrow/group.rb +116 -124
  27. data/lib/arrow/loader.rb +46 -0
  28. data/lib/arrow/map-array-builder.rb +109 -0
  29. data/lib/arrow/map-array.rb +26 -0
  30. data/lib/arrow/map-data-type.rb +89 -0
  31. data/lib/arrow/path-extension.rb +1 -1
  32. data/lib/arrow/record-batch-reader.rb +41 -0
  33. data/lib/arrow/record-batch.rb +0 -2
  34. data/lib/arrow/scalar.rb +32 -0
  35. data/lib/arrow/slicer.rb +44 -143
  36. data/lib/arrow/sort-key.rb +193 -0
  37. data/lib/arrow/sort-options.rb +109 -0
  38. data/lib/arrow/source-node-options.rb +32 -0
  39. data/lib/arrow/sparse-union-data-type.rb +2 -2
  40. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  41. data/lib/arrow/symbol-values-appendable.rb +34 -0
  42. data/lib/arrow/table-concatenate-options.rb +36 -0
  43. data/lib/arrow/table-formatter.rb +141 -17
  44. data/lib/arrow/table-list-formatter.rb +5 -3
  45. data/lib/arrow/table-loader.rb +41 -3
  46. data/lib/arrow/table-saver.rb +29 -3
  47. data/lib/arrow/table-table-formatter.rb +7 -31
  48. data/lib/arrow/table.rb +34 -40
  49. data/lib/arrow/time32-data-type.rb +2 -2
  50. data/lib/arrow/time64-data-type.rb +2 -2
  51. data/lib/arrow/timestamp-data-type.rb +2 -2
  52. data/lib/arrow/version.rb +1 -1
  53. data/red-arrow.gemspec +2 -1
  54. data/test/helper.rb +1 -0
  55. data/test/raw-records/test-dense-union-array.rb +14 -0
  56. data/test/raw-records/test-list-array.rb +19 -0
  57. data/test/raw-records/test-map-array.rb +441 -0
  58. data/test/raw-records/test-sparse-union-array.rb +14 -0
  59. data/test/raw-records/test-struct-array.rb +15 -0
  60. data/test/test-array-builder.rb +7 -0
  61. data/test/test-array.rb +154 -0
  62. data/test/test-binary-dictionary-array-builder.rb +103 -0
  63. data/test/test-boolean-scalar.rb +26 -0
  64. data/test/test-csv-loader.rb +8 -8
  65. data/test/test-decimal128-data-type.rb +2 -2
  66. data/test/test-expression.rb +40 -0
  67. data/test/test-float-scalar.rb +46 -0
  68. data/test/test-function.rb +176 -0
  69. data/test/test-group.rb +75 -51
  70. data/test/test-map-array-builder.rb +110 -0
  71. data/test/test-map-array.rb +33 -0
  72. data/test/test-map-data-type.rb +36 -0
  73. data/test/test-memory-view.rb +434 -0
  74. data/test/test-orc.rb +19 -23
  75. data/test/test-record-batch-reader.rb +46 -0
  76. data/test/test-record-batch.rb +42 -0
  77. data/test/test-slicer.rb +166 -167
  78. data/test/test-sort-indices.rb +40 -0
  79. data/test/test-sort-key.rb +81 -0
  80. data/test/test-sort-options.rb +58 -0
  81. data/test/test-string-dictionary-array-builder.rb +103 -0
  82. data/test/test-table.rb +190 -53
  83. data/test/values/test-dense-union-array.rb +14 -0
  84. data/test/values/test-list-array.rb +17 -0
  85. data/test/values/test-map-array.rb +433 -0
  86. data/test/values/test-sparse-union-array.rb +14 -0
  87. data/test/values/test-struct-array.rb +15 -0
  88. metadata +73 -6
@@ -0,0 +1,32 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class SourceNodeOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when RecordBatchReader, RecordBatch, Table
25
+ new(value)
26
+ else
27
+ nil
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -33,7 +33,7 @@ module Arrow
33
33
  # @param type_codes [::Array<Integer>] The IDs that indicates
34
34
  # corresponding fields.
35
35
  #
36
- # @example Create a sparse union data type for {2: visible, 9: count}
36
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
37
37
  # fields = [
38
38
  # Arrow::Field.new("visible", :boolean),
39
39
  # {
@@ -57,7 +57,7 @@ module Arrow
57
57
  # @option description [::Array<Integer>] :type_codes The IDs
58
58
  # that indicates corresponding fields.
59
59
  #
60
- # @example Create a sparse union data type for {2: visible, 9: count}
60
+ # @example Create a sparse union data type for `{2: visible, 9: count}`
61
61
  # fields = [
62
62
  # Arrow::Field.new("visible", :boolean),
63
63
  # {
@@ -0,0 +1,27 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class StringDictionaryArrayBuilder
20
+ include SymbolValuesAppendable
21
+
22
+ private
23
+ def create_values_array_builder
24
+ StringArrayBuilder.new
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,34 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module SymbolValuesAppendable
20
+ def append_values(values, is_valids=nil)
21
+ builder = create_values_array_builder
22
+ values = values.collect do |value|
23
+ case value
24
+ when Symbol
25
+ value.to_s
26
+ else
27
+ value
28
+ end
29
+ end
30
+ builder.append_values(values, is_valids)
31
+ append_array(builder.finish)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class TableConcatenateOptions
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Hash
25
+ options = new
26
+ value.each do |k, v|
27
+ options.public_send("#{k}=", value)
28
+ end
29
+ options
30
+ else
31
+ nil
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -18,6 +18,125 @@
18
18
  module Arrow
19
19
  # TODO: Almost codes should be implemented in Apache Arrow C++.
20
20
  class TableFormatter
21
+ # @private
22
+ class ColumnFormatter
23
+ attr_reader :column
24
+ attr_reader :head_values
25
+ attr_reader :tail_values
26
+ attr_reader :sample_values
27
+ def initialize(column, head_values, tail_values)
28
+ @column = column
29
+ @head_values = head_values
30
+ @tail_values = tail_values
31
+ @sample_values = head_values + tail_values
32
+ @field_value_widths = {}
33
+ end
34
+
35
+ def data_type
36
+ @data_type ||= @column.data_type
37
+ end
38
+
39
+ def name
40
+ @name ||= @column.name
41
+ end
42
+
43
+ def aligned_name
44
+ @aligned_name ||= format_aligned_name(name, data_type, @sample_values)
45
+ end
46
+
47
+ FLOAT_N_DIGITS = 10
48
+ FORMATTED_NULL = "(null)"
49
+
50
+ def format_value(value, width=0)
51
+ case value
52
+ when ::Time
53
+ value.iso8601
54
+ when Float
55
+ "%*f" % [[width, FLOAT_N_DIGITS].max, value]
56
+ when Integer
57
+ "%*d" % [width, value]
58
+ when Hash
59
+ formatted_values = data_type.fields.collect do |field|
60
+ field_name = field.name
61
+ field_value_width = compute_field_value_width(field, @sample_values)
62
+ formatted_name = format_value(field_name, 0)
63
+ formatted_value = format_value(value[field_name], field_value_width)
64
+ "#{formatted_name}: #{formatted_value}"
65
+ end
66
+ formatted = "{"
67
+ formatted << formatted_values.join(", ")
68
+ formatted << "}"
69
+ "%-*s" % [width, formatted]
70
+ when nil
71
+ "%*s" % [width, FORMATTED_NULL]
72
+ else
73
+ "%-*s" % [width, value.to_s]
74
+ end
75
+ end
76
+
77
+ private
78
+ def compute_field_value_width(field, sample_values)
79
+ unless @field_value_widths.key?(field)
80
+ field_name = field.name
81
+ field_sample_values = sample_values.collect do |v|
82
+ (v || {})[field_name]
83
+ end
84
+ field_aligned_name = format_aligned_name("",
85
+ field.data_type,
86
+ field_sample_values)
87
+ @field_value_widths[field] = field_aligned_name.size
88
+ end
89
+ @field_value_widths[field]
90
+ end
91
+
92
+ def format_aligned_name(name, data_type, sample_values)
93
+ case data_type
94
+ when TimestampDataType
95
+ "%*s" % [::Time.now.iso8601.size, name]
96
+ when IntegerDataType
97
+ have_null = false
98
+ have_negative = false
99
+ max_value = nil
100
+ sample_values.each do |value|
101
+ if value.nil?
102
+ have_null = true
103
+ else
104
+ if max_value.nil?
105
+ max_value = value.abs
106
+ else
107
+ max_value = [value.abs, max_value].max
108
+ end
109
+ have_negative = true if value.negative?
110
+ end
111
+ end
112
+ if max_value.nil?
113
+ width = 0
114
+ elsif max_value.zero?
115
+ width = 1
116
+ else
117
+ width = (Math.log10(max_value) + 1).truncate
118
+ end
119
+ width += 1 if have_negative # Need "-"
120
+ width = [width, FORMATTED_NULL.size].max if have_null
121
+ "%*s" % [width, name]
122
+ when FloatDataType, DoubleDataType
123
+ "%*s" % [FLOAT_N_DIGITS, name]
124
+ when StructDataType
125
+ field_widths = data_type.fields.collect do |field|
126
+ field_value_width = compute_field_value_width(field, sample_values)
127
+ field.name.size + ": ".size + field_value_width
128
+ end
129
+ width = "{}".size + field_widths.sum
130
+ if field_widths.size > 0
131
+ width += (", ".size * (field_widths.size - 1))
132
+ end
133
+ "%*s" % [width, name]
134
+ else
135
+ name
136
+ end
137
+ end
138
+ end
139
+
21
140
  def initialize(table, options={})
22
141
  @table = table
23
142
  @options = options
@@ -25,38 +144,43 @@ module Arrow
25
144
 
26
145
  def format
27
146
  text = ""
28
- columns = @table.columns
29
- format_header(text, columns)
30
-
31
147
  n_rows = @table.n_rows
32
- return text if n_rows.zero?
33
-
34
148
  border = @options[:border] || 10
35
- n_digits = (Math.log10(n_rows) + 1).truncate
149
+
36
150
  head_limit = [border, n_rows].min
37
- head_column_values = columns.collect do |column|
38
- column.each.take(head_limit)
151
+
152
+ tail_start = [border, n_rows - border].max
153
+ tail_limit = n_rows - tail_start
154
+
155
+ column_formatters = @table.columns.collect do |column|
156
+ head_values = column.each.take(head_limit)
157
+ if tail_limit > 0
158
+ tail_values = column.reverse_each.take(tail_limit).reverse
159
+ else
160
+ tail_values = []
161
+ end
162
+ ColumnFormatter.new(column, head_values, tail_values)
39
163
  end
164
+
165
+ format_header(text, column_formatters)
166
+ return text if n_rows.zero?
167
+
168
+ n_digits = (Math.log10(n_rows) + 1).truncate
40
169
  format_rows(text,
41
- columns,
42
- head_column_values.transpose,
170
+ column_formatters,
171
+ column_formatters.collect(&:head_values).transpose,
43
172
  n_digits,
44
173
  0)
45
174
  return text if n_rows <= border
46
175
 
47
- tail_start = [border, n_rows - border].max
48
- tail_limit = n_rows - tail_start
49
- tail_column_values = columns.collect do |column|
50
- column.reverse_each.take(tail_limit).reverse
51
- end
52
176
 
53
177
  if head_limit != tail_start
54
178
  format_ellipsis(text)
55
179
  end
56
180
 
57
181
  format_rows(text,
58
- columns,
59
- tail_column_values.transpose,
182
+ column_formatters,
183
+ column_formatters.collect(&:tail_values).transpose,
60
184
  n_digits,
61
185
  tail_start)
62
186
 
@@ -22,12 +22,14 @@ module Arrow
22
22
  def format_header(text, columns)
23
23
  end
24
24
 
25
- def format_rows(text, columns, rows, n_digits, start_offset)
25
+ def format_rows(text, column_formatters, rows, n_digits, start_offset)
26
26
  rows.each_with_index do |row, nth_row|
27
27
  text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n")
28
28
  row.each_with_index do |column_value, nth_column|
29
- column = columns[nth_column]
30
- text << "#{column.name}: #{column_value}\n"
29
+ column_formatter = column_formatters[nth_column]
30
+ formatted_name = column_formatter.name
31
+ formatted_value = column_formatter.format_value(column_value)
32
+ text << "#{formatted_name}: #{formatted_value}\n"
31
33
  end
32
34
  end
33
35
  end
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "uri"
19
+
18
20
  module Arrow
19
21
  class TableLoader
20
22
  class << self
@@ -31,6 +33,31 @@ module Arrow
31
33
  end
32
34
 
33
35
  def load
36
+ if @input.is_a?(URI)
37
+ custom_load_method = "load_from_uri"
38
+ elsif @input.is_a?(String) and ::File.directory?(@input)
39
+ custom_load_method = "load_from_directory"
40
+ else
41
+ custom_load_method = "load_from_file"
42
+ end
43
+ unless respond_to?(custom_load_method, true)
44
+ available_schemes = []
45
+ (methods(true) | private_methods(true)).each do |name|
46
+ match_data = /\Aload_from_/.match(name.to_s)
47
+ if match_data
48
+ available_schemes << match_data.post_match
49
+ end
50
+ end
51
+ message = "Arrow::Table load source must be one of ["
52
+ message << available_schemes.join(", ")
53
+ message << "]: #{@input.inspect}"
54
+ raise ArgumentError, message
55
+ end
56
+ __send__(custom_load_method)
57
+ end
58
+
59
+ private
60
+ def load_from_file
34
61
  format = @options[:format]
35
62
  custom_load_method = "load_as_#{format}"
36
63
  unless respond_to?(custom_load_method, true)
@@ -56,21 +83,24 @@ module Arrow
56
83
  end
57
84
  end
58
85
 
59
- private
60
86
  def fill_options
61
87
  if @options[:format] and @options.key?(:compression)
62
88
  return
63
89
  end
64
90
 
65
- if @input.is_a?(Buffer)
91
+ case @input
92
+ when Buffer
66
93
  info = {}
94
+ when URI
95
+ extension = PathExtension.new(@input.path)
96
+ info = extension.extract
67
97
  else
68
98
  extension = PathExtension.new(@input)
69
99
  info = extension.extract
70
100
  end
71
101
  format = info[:format]
72
102
  @options = @options.dup
73
- if format and respond_to?("load_as_#{format}", true)
103
+ if format
74
104
  @options[:format] ||= format.to_sym
75
105
  else
76
106
  @options[:format] ||= :arrow
@@ -183,5 +213,13 @@ module Arrow
183
213
  table.instance_variable_set(:@input, input)
184
214
  table
185
215
  end
216
+
217
+ def load_as_json
218
+ input = open_input_stream
219
+ reader = JSONReader.new(input)
220
+ table = reader.read
221
+ table.instance_variable_set(:@input, input)
222
+ table
223
+ end
186
224
  end
187
225
  end
@@ -32,6 +32,29 @@ module Arrow
32
32
  end
33
33
 
34
34
  def save
35
+ if @output.is_a?(URI)
36
+ custom_save_method = "save_to_uri"
37
+ else
38
+ custom_save_method = "save_to_file"
39
+ end
40
+ unless respond_to?(custom_save_method, true)
41
+ available_schemes = []
42
+ (methods(true) | private_methods(true)).each do |name|
43
+ match_data = /\Asave_to_/.match(name.to_s)
44
+ if match_data
45
+ available_schemes << match_data.post_match
46
+ end
47
+ end
48
+ message = "Arrow::Table save source must be one of ["
49
+ message << available_schemes.join(", ")
50
+ message << "]: #{@output.scheme.inspect}"
51
+ raise ArgumentError, message
52
+ end
53
+ __send__(custom_save_method)
54
+ end
55
+
56
+ private
57
+ def save_to_file
35
58
  format = @options[:format]
36
59
  custom_save_method = "save_as_#{format}"
37
60
  unless respond_to?(custom_save_method, true)
@@ -57,21 +80,24 @@ module Arrow
57
80
  end
58
81
  end
59
82
 
60
- private
61
83
  def fill_options
62
84
  if @options[:format] and @options.key?(:compression)
63
85
  return
64
86
  end
65
87
 
66
- if @output.is_a?(Buffer)
88
+ case @output
89
+ when Buffer
67
90
  info = {}
91
+ when URI
92
+ extension = PathExtension.new(@output.path)
93
+ info = extension.extract
68
94
  else
69
95
  extension = PathExtension.new(@output)
70
96
  info = extension.extract
71
97
  end
72
98
  format = info[:format]
73
99
  @options = @options.dup
74
- if format and respond_to?("save_as_#{format}", true)
100
+ if format
75
101
  @options[:format] ||= format.to_sym
76
102
  else
77
103
  @options[:format] ||= :arrow
@@ -21,51 +21,27 @@ module Arrow
21
21
  # TODO: Almost codes should be implemented in Apache Arrow C++.
22
22
  class TableTableFormatter < TableFormatter
23
23
  private
24
- def format_header(text, columns)
25
- columns.each do |column|
24
+ def format_header(text, column_formatters)
25
+ column_formatters.each do |column_formatter|
26
26
  text << "\t"
27
- text << format_column_name(column)
27
+ text << column_formatter.aligned_name
28
28
  end
29
29
  text << "\n"
30
30
  end
31
31
 
32
- FLOAT_N_DIGITS = 10
33
- def format_column_name(column)
34
- case column.data_type
35
- when TimestampDataType
36
- "%*s" % [::Time.now.iso8601.size, column.name]
37
- when FloatDataType, DoubleDataType
38
- "%*s" % [FLOAT_N_DIGITS, column.name]
39
- else
40
- column.name
41
- end
42
- end
43
-
44
- def format_rows(text, columns, rows, n_digits, start_offset)
32
+ def format_rows(text, column_formatters, rows, n_digits, start_offset)
45
33
  rows.each_with_index do |row, nth_row|
46
34
  text << ("%*d" % [n_digits, start_offset + nth_row])
47
35
  row.each_with_index do |column_value, nth_column|
48
36
  text << "\t"
49
- column = columns[nth_column]
50
- text << format_column_value(column, column_value)
37
+ column_formatter = column_formatters[nth_column]
38
+ aligned_name = column_formatter.aligned_name
39
+ text << column_formatter.format_value(column_value, aligned_name.size)
51
40
  end
52
41
  text << "\n"
53
42
  end
54
43
  end
55
44
 
56
- def format_column_value(column, value)
57
- case value
58
- when ::Time
59
- value.iso8601
60
- when Float
61
- "%*f" % [[column.name.size, FLOAT_N_DIGITS].max, value]
62
- when Integer
63
- "%*d" % [column.name.size, value]
64
- else
65
- "%-*s" % [column.name.size, value.to_s]
66
- end
67
- end
68
-
69
45
  def format_ellipsis(text)
70
46
  text << "...\n"
71
47
  end
data/lib/arrow/table.rb CHANGED
@@ -195,8 +195,6 @@ module Arrow
195
195
  alias_method :size, :n_rows
196
196
  alias_method :length, :n_rows
197
197
 
198
- alias_method :[], :find_column
199
-
200
198
  alias_method :slice_raw, :slice
201
199
 
202
200
  # @overload slice(offset, length)
@@ -236,6 +234,12 @@ module Arrow
236
234
  # @return [Arrow::Table]
237
235
  # The sub `Arrow::Table` that covers only rows of the range of indices.
238
236
  #
237
+ # @overload slice(conditions)
238
+ #
239
+ # @param conditions [Hash] The conditions to select records.
240
+ # @return [Arrow::Table]
241
+ # The sub `Arrow::Table` that covers only rows matched by condition
242
+ #
239
243
  # @overload slice
240
244
  #
241
245
  # @yield [slicer] Gives slicer that constructs condition to select records.
@@ -263,12 +267,37 @@ module Arrow
263
267
  expected_n_args = nil
264
268
  case args.size
265
269
  when 1
266
- if args[0].is_a?(Integer)
270
+ case args[0]
271
+ when Integer
267
272
  index = args[0]
268
273
  index += n_rows if index < 0
269
274
  return nil if index < 0
270
275
  return nil if index >= n_rows
271
276
  return Record.new(self, index)
277
+ when Hash
278
+ condition_pairs = args[0]
279
+ slicer = Slicer.new(self)
280
+ conditions = []
281
+ condition_pairs.each do |key, value|
282
+ case value
283
+ when Range
284
+ # TODO: Optimize "begin <= key <= end" case by missing "between" kernel
285
+ # https://issues.apache.org/jira/browse/ARROW-9843
286
+ unless value.begin.nil?
287
+ conditions << (slicer[key] >= value.begin)
288
+ end
289
+ unless value.end.nil?
290
+ if value.exclude_end?
291
+ conditions << (slicer[key] < value.end)
292
+ else
293
+ conditions << (slicer[key] <= value.end)
294
+ end
295
+ end
296
+ else
297
+ conditions << (slicer[key] == value)
298
+ end
299
+ end
300
+ slicers << conditions.inject(:&)
272
301
  else
273
302
  slicers << args[0]
274
303
  end
@@ -397,41 +426,6 @@ module Arrow
397
426
  remove_column_raw(index)
398
427
  end
399
428
 
400
- # TODO
401
- #
402
- # @return [Arrow::Table]
403
- def select_columns(*selectors, &block)
404
- if selectors.empty?
405
- return to_enum(__method__) unless block_given?
406
- selected_columns = columns.select(&block)
407
- else
408
- selected_columns = []
409
- selectors.each do |selector|
410
- case selector
411
- when String, Symbol
412
- column = find_column(selector)
413
- if column.nil?
414
- message = "unknown column: #{selector.inspect}: #{inspect}"
415
- raise KeyError.new(message)
416
- end
417
- selected_columns << column
418
- when Range
419
- selected_columns.concat(columns[selector])
420
- else
421
- column = columns[selector]
422
- if column.nil?
423
- message = "out of index (0..#{n_columns - 1}): " +
424
- "#{selector.inspect}: #{inspect}"
425
- raise IndexError.new(message)
426
- end
427
- selected_columns << column
428
- end
429
- end
430
- selected_columns = selected_columns.select(&block) if block_given?
431
- end
432
- self.class.new(selected_columns)
433
- end
434
-
435
429
  # Experimental
436
430
  def group(*keys)
437
431
  Group.new(self, keys)
@@ -442,8 +436,8 @@ module Arrow
442
436
  RollingWindow.new(self, size)
443
437
  end
444
438
 
445
- def save(path, options={})
446
- saver = TableSaver.new(self, path, options)
439
+ def save(output, options={})
440
+ saver = TableSaver.new(self, output, options)
447
441
  saver.save
448
442
  end
449
443
 
@@ -29,7 +29,7 @@ module Arrow
29
29
  #
30
30
  # The unit must be second or millisecond.
31
31
  #
32
- # @example Create a time32 data type with {Arrow::TimeUnit}
32
+ # @example Create a time32 data type with Arrow::TimeUnit
33
33
  # Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI)
34
34
  #
35
35
  # @example Create a time32 data type with Symbol
@@ -45,7 +45,7 @@ module Arrow
45
45
  #
46
46
  # The unit must be second or millisecond.
47
47
  #
48
- # @example Create a time32 data type with {Arrow::TimeUnit}
48
+ # @example Create a time32 data type with Arrow::TimeUnit
49
49
  # Arrow::Time32DataType.new(unit: Arrow::TimeUnit::MILLI)
50
50
  #
51
51
  # @example Create a time32 data type with Symbol