red-arrow 4.0.1 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +10 -0
  3. data/README.md +23 -0
  4. data/ext/arrow/arrow.cpp +3 -0
  5. data/ext/arrow/converters.cpp +5 -0
  6. data/ext/arrow/converters.hpp +126 -0
  7. data/ext/arrow/extconf.rb +13 -0
  8. data/ext/arrow/memory-view.cpp +311 -0
  9. data/ext/arrow/memory-view.hpp +26 -0
  10. data/ext/arrow/raw-records.cpp +1 -0
  11. data/ext/arrow/values.cpp +1 -0
  12. data/lib/arrow/aggregate-node-options.rb +35 -0
  13. data/lib/arrow/aggregation.rb +46 -0
  14. data/lib/arrow/array-builder.rb +5 -0
  15. data/lib/arrow/array.rb +12 -0
  16. data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
  17. data/lib/arrow/buffer.rb +10 -6
  18. data/lib/arrow/column-containable.rb +100 -1
  19. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  20. data/lib/arrow/datum.rb +102 -0
  21. data/lib/arrow/equal-options.rb +38 -0
  22. data/lib/arrow/expression.rb +48 -0
  23. data/lib/arrow/file-system.rb +34 -0
  24. data/lib/arrow/function.rb +52 -0
  25. data/lib/arrow/group.rb +116 -124
  26. data/lib/arrow/loader.rb +58 -0
  27. data/lib/arrow/map-array-builder.rb +109 -0
  28. data/lib/arrow/map-array.rb +26 -0
  29. data/lib/arrow/map-data-type.rb +89 -0
  30. data/lib/arrow/path-extension.rb +1 -1
  31. data/lib/arrow/record-batch-reader.rb +41 -0
  32. data/lib/arrow/record-batch.rb +0 -2
  33. data/lib/arrow/s3-global-options.rb +38 -0
  34. data/lib/arrow/scalar.rb +32 -0
  35. data/lib/arrow/slicer.rb +44 -143
  36. data/lib/arrow/sort-key.rb +61 -55
  37. data/lib/arrow/sort-options.rb +8 -8
  38. data/lib/arrow/source-node-options.rb +32 -0
  39. data/lib/arrow/string-dictionary-array-builder.rb +27 -0
  40. data/lib/arrow/symbol-values-appendable.rb +34 -0
  41. data/lib/arrow/table-concatenate-options.rb +36 -0
  42. data/lib/arrow/table-formatter.rb +141 -17
  43. data/lib/arrow/table-list-formatter.rb +5 -3
  44. data/lib/arrow/table-loader.rb +119 -44
  45. data/lib/arrow/table-saver.rb +36 -5
  46. data/lib/arrow/table-table-formatter.rb +7 -31
  47. data/lib/arrow/table.rb +112 -40
  48. data/lib/arrow/version.rb +1 -1
  49. data/red-arrow.gemspec +1 -9
  50. data/test/helper.rb +3 -0
  51. data/test/raw-records/test-dense-union-array.rb +14 -0
  52. data/test/raw-records/test-list-array.rb +19 -0
  53. data/test/raw-records/test-map-array.rb +441 -0
  54. data/test/raw-records/test-sparse-union-array.rb +14 -0
  55. data/test/raw-records/test-struct-array.rb +15 -0
  56. data/test/test-array-builder.rb +7 -0
  57. data/test/test-array.rb +34 -0
  58. data/test/test-binary-dictionary-array-builder.rb +103 -0
  59. data/test/test-boolean-scalar.rb +26 -0
  60. data/test/test-csv-loader.rb +8 -8
  61. data/test/test-expression.rb +40 -0
  62. data/test/test-float-scalar.rb +46 -0
  63. data/test/test-function.rb +210 -0
  64. data/test/test-group.rb +75 -51
  65. data/test/test-map-array-builder.rb +110 -0
  66. data/test/test-map-array.rb +33 -0
  67. data/test/test-map-data-type.rb +36 -0
  68. data/test/test-memory-view.rb +434 -0
  69. data/test/test-record-batch-reader.rb +46 -0
  70. data/test/test-record-batch.rb +42 -0
  71. data/test/test-slicer.rb +166 -167
  72. data/test/test-string-dictionary-array-builder.rb +103 -0
  73. data/test/test-table.rb +376 -56
  74. data/test/values/test-dense-union-array.rb +14 -0
  75. data/test/values/test-list-array.rb +17 -0
  76. data/test/values/test-map-array.rb +433 -0
  77. data/test/values/test-sparse-union-array.rb +14 -0
  78. data/test/values/test-struct-array.rb +15 -0
  79. metadata +117 -168
data/lib/arrow/group.rb CHANGED
@@ -16,157 +16,149 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- # Experimental
20
- #
21
- # TODO: Almost codes should be implemented in Apache Arrow C++.
22
19
  class Group
23
20
  def initialize(table, keys)
24
21
  @table = table
25
22
  @keys = keys
26
23
  end
27
24
 
28
- def count
29
- key_names = @keys.collect(&:to_s)
30
- target_columns = @table.columns.reject do |column|
31
- key_names.include?(column.name)
32
- end
33
- aggregate(target_columns) do |column, indexes|
34
- n = 0
35
- indexes.each do |index|
36
- n += 1 unless column.null?(index)
37
- end
38
- n
39
- end
25
+ def count(*target_names)
26
+ aggregate(*build_aggregations("hash_count", target_names))
40
27
  end
41
28
 
42
- def sum
43
- key_names = @keys.collect(&:to_s)
44
- target_columns = @table.columns.reject do |column|
45
- key_names.include?(column.name) or
46
- not column.data_type.is_a?(NumericDataType)
47
- end
48
- aggregate(target_columns) do |column, indexes|
49
- n = 0
50
- indexes.each do |index|
51
- value = column[index]
52
- n += value unless value.nil?
53
- end
54
- n
55
- end
29
+ def sum(*target_names)
30
+ aggregate(*build_aggregations("hash_sum", target_names))
56
31
  end
57
32
 
58
- def average
59
- key_names = @keys.collect(&:to_s)
60
- target_columns = @table.columns.reject do |column|
61
- key_names.include?(column.name) or
62
- not column.data_type.is_a?(NumericDataType)
63
- end
64
- aggregate(target_columns) do |column, indexes|
65
- average = 0.0
66
- n = 0
67
- indexes.each do |index|
68
- value = column[index]
69
- unless value.nil?
70
- n += 1
71
- average += (value - average) / n
72
- end
73
- end
74
- average
75
- end
33
+ def product(*target_names)
34
+ aggregate(*build_aggregations("hash_product", target_names))
76
35
  end
77
36
 
78
- def min
79
- key_names = @keys.collect(&:to_s)
80
- target_columns = @table.columns.reject do |column|
81
- key_names.include?(column.name) or
82
- not column.data_type.is_a?(NumericDataType)
83
- end
84
- aggregate(target_columns) do |column, indexes|
85
- n = nil
86
- indexes.each do |index|
87
- value = column[index]
88
- next if value.nil?
89
- n ||= value
90
- n = value if value < n
91
- end
92
- n
93
- end
37
+ def mean(*target_names)
38
+ aggregate(*build_aggregations("hash_mean", target_names))
94
39
  end
95
40
 
96
- def max
97
- key_names = @keys.collect(&:to_s)
98
- target_columns = @table.columns.reject do |column|
99
- key_names.include?(column.name) or
100
- not column.data_type.is_a?(NumericDataType)
101
- end
102
- aggregate(target_columns) do |column, indexes|
103
- n = nil
104
- indexes.each do |index|
105
- value = column[index]
106
- next if value.nil?
107
- n ||= value
108
- n = value if value > n
109
- end
110
- n
111
- end
41
+ def min(*target_names)
42
+ aggregate(*build_aggregations("hash_min", target_names))
43
+ end
44
+
45
+ def max(*target_names)
46
+ aggregate(*build_aggregations("hash_max", target_names))
47
+ end
48
+
49
+ def stddev(*target_names)
50
+ aggregate(*build_aggregations("hash_stddev", target_names))
51
+ end
52
+
53
+ def variance(*target_names)
54
+ aggregate(*build_aggregations("hash_variance", target_names))
55
+ end
56
+
57
+ def aggregate(aggregation, *more_aggregations)
58
+ aggregations = [aggregation] + more_aggregations
59
+ normalized_aggregations = normalize_aggregations(aggregations)
60
+ plan = ExecutePlan.new
61
+ source_node = plan.build_source_node(@table)
62
+ aggregate_node =
63
+ plan.build_aggregate_node(source_node,
64
+ {
65
+ aggregations: normalized_aggregations,
66
+ keys: @keys
67
+ })
68
+ sink_node_options = SinkNodeOptions.new
69
+ plan.build_sink_node(aggregate_node, sink_node_options)
70
+ plan.validate
71
+ plan.start
72
+ plan.wait
73
+ reader = sink_node_options.get_reader(aggregate_node.output_schema)
74
+ reader.read_all
112
75
  end
113
76
 
114
77
  private
115
- def aggregate(target_columns)
116
- sort_values = @table.n_rows.times.collect do |i|
117
- key_values = @keys.collect do |key|
118
- @table[key][i]
78
+ def build_aggregations(function_name, target_names)
79
+ if target_names.empty?
80
+ [function_name]
81
+ else
82
+ target_names.collect do |name|
83
+ "#{function_name}(#{name})"
119
84
  end
120
- [key_values, i]
121
- end
122
- sorted = sort_values.sort_by do |key_values, i|
123
- key_values
124
85
  end
86
+ end
125
87
 
126
- grouped_keys = []
127
- aggregated_arrays_raw = []
128
- target_columns.size.times do
129
- aggregated_arrays_raw << []
130
- end
131
- indexes = []
132
- sorted.each do |key_values, i|
133
- if grouped_keys.empty?
134
- grouped_keys << key_values
135
- indexes.clear
136
- indexes << i
137
- else
138
- if key_values == grouped_keys.last
139
- indexes << i
140
- else
141
- grouped_keys << key_values
142
- target_columns.each_with_index do |column, j|
143
- aggregated_arrays_raw[j] << yield(column, indexes)
144
- end
145
- indexes.clear
146
- indexes << i
88
+ def normalize_aggregations(aggregations)
89
+ normalized_aggregations = []
90
+ aggregations.each do |aggregation|
91
+ case aggregation
92
+ when :all
93
+ all_functions = [
94
+ "hash_count",
95
+ "hash_sum",
96
+ "hash_product",
97
+ "hash_mean",
98
+ "hash_stddev",
99
+ "hash_variance",
100
+ # "hash_tdigest",
101
+ "hash_min",
102
+ "hash_max",
103
+ "hash_any",
104
+ "hash_all",
105
+ ]
106
+ normalized_aggregations.concat(normalize_aggregations(all_functions))
107
+ when /\A([a-zA-Z0-9_].+?)\((.+?)\)\z/
108
+ function = $1
109
+ input = $2.strip
110
+ normalized_aggregations << {function: function, input: input}
111
+ when "count", "hash_count"
112
+ function = aggregation
113
+ target_columns.each do |column|
114
+ normalized_aggregations << {function: function, input: column.name}
147
115
  end
116
+ when "any", "hash_any", "all", "hash_all"
117
+ function = aggregation
118
+ boolean_target_columns.each do |column|
119
+ normalized_aggregations << {function: function, input: column.name}
120
+ end
121
+ when String
122
+ function = aggregation
123
+ numeric_target_columns.each do |column|
124
+ normalized_aggregations << {function: function, input: column.name}
125
+ end
126
+ else
127
+ normalized_aggregations << aggregation
148
128
  end
149
129
  end
150
- target_columns.each_with_index do |column, j|
151
- aggregated_arrays_raw[j] << yield(column, indexes)
130
+ normalized_aggregations
131
+ end
132
+
133
+ def target_columns
134
+ @target_columns ||= find_target_columns
135
+ end
136
+
137
+ def find_target_columns
138
+ key_names = @keys.collect(&:to_s)
139
+ @table.columns.find_all do |column|
140
+ not key_names.include?(column.name)
152
141
  end
142
+ end
153
143
 
154
- grouped_key_arrays_raw = grouped_keys.transpose
155
- fields = []
156
- arrays = []
157
- @keys.each_with_index do |key, i|
158
- key_column = @table[key]
159
- key_column_array_raw = grouped_key_arrays_raw[i]
160
- key_column_array = key_column.data_type.build_array(key_column_array_raw)
161
- fields << key_column.field
162
- arrays << key_column_array
144
+ def boolean_target_columns
145
+ @boolean_target_columns ||= find_boolean_target_columns
146
+ end
147
+
148
+ def find_boolean_target_columns
149
+ target_columns.find_all do |column|
150
+ column.data_type.is_a?(BooleanDataType)
163
151
  end
164
- target_columns.each_with_index do |column, i|
165
- array = ArrayBuilder.build(aggregated_arrays_raw[i])
166
- arrays << array
167
- fields << Field.new(column.field.name, array.value_data_type)
152
+ end
153
+
154
+ def numeric_target_columns
155
+ @numeric_target_columns ||= find_numeric_target_columns
156
+ end
157
+
158
+ def find_numeric_target_columns
159
+ target_columns.find_all do |column|
160
+ column.data_type.is_a?(NumericDataType)
168
161
  end
169
- Table.new(fields, arrays)
170
162
  end
171
163
  end
172
164
  end
data/lib/arrow/loader.rb CHANGED
@@ -29,6 +29,8 @@ module Arrow
29
29
  def post_load(repository, namespace)
30
30
  require_libraries
31
31
  require_extension_library
32
+ gc_guard
33
+ self.class.start_callback_dispatch_thread
32
34
  end
33
35
 
34
36
  def require_libraries
@@ -37,10 +39,14 @@ module Arrow
37
39
  require "arrow/generic-filterable"
38
40
  require "arrow/generic-takeable"
39
41
  require "arrow/record-containable"
42
+ require "arrow/symbol-values-appendable"
40
43
 
44
+ require "arrow/aggregate-node-options"
45
+ require "arrow/aggregation"
41
46
  require "arrow/array"
42
47
  require "arrow/array-builder"
43
48
  require "arrow/bigdecimal-extension"
49
+ require "arrow/binary-dictionary-array-builder"
44
50
  require "arrow/buffer"
45
51
  require "arrow/chunked-array"
46
52
  require "arrow/column"
@@ -52,6 +58,7 @@ module Arrow
52
58
  require "arrow/date32-array-builder"
53
59
  require "arrow/date64-array"
54
60
  require "arrow/date64-array-builder"
61
+ require "arrow/datum"
55
62
  require "arrow/decimal128"
56
63
  require "arrow/decimal128-array"
57
64
  require "arrow/decimal128-array-builder"
@@ -63,13 +70,20 @@ module Arrow
63
70
  require "arrow/dense-union-data-type"
64
71
  require "arrow/dictionary-array"
65
72
  require "arrow/dictionary-data-type"
73
+ require "arrow/equal-options"
74
+ require "arrow/expression"
66
75
  require "arrow/field"
67
76
  require "arrow/file-output-stream"
77
+ require "arrow/file-system"
68
78
  require "arrow/fixed-size-binary-array"
69
79
  require "arrow/fixed-size-binary-array-builder"
80
+ require "arrow/function"
70
81
  require "arrow/group"
71
82
  require "arrow/list-array-builder"
72
83
  require "arrow/list-data-type"
84
+ require "arrow/map-array"
85
+ require "arrow/map-array-builder"
86
+ require "arrow/map-data-type"
73
87
  require "arrow/null-array"
74
88
  require "arrow/null-array-builder"
75
89
  require "arrow/path-extension"
@@ -78,17 +92,23 @@ module Arrow
78
92
  require "arrow/record-batch-builder"
79
93
  require "arrow/record-batch-file-reader"
80
94
  require "arrow/record-batch-iterator"
95
+ require "arrow/record-batch-reader"
81
96
  require "arrow/record-batch-stream-reader"
82
97
  require "arrow/rolling-window"
98
+ require "arrow/s3-global-options"
99
+ require "arrow/scalar"
83
100
  require "arrow/schema"
84
101
  require "arrow/slicer"
85
102
  require "arrow/sort-key"
86
103
  require "arrow/sort-options"
104
+ require "arrow/source-node-options"
87
105
  require "arrow/sparse-union-data-type"
106
+ require "arrow/string-dictionary-array-builder"
88
107
  require "arrow/struct-array"
89
108
  require "arrow/struct-array-builder"
90
109
  require "arrow/struct-data-type"
91
110
  require "arrow/table"
111
+ require "arrow/table-concatenate-options"
92
112
  require "arrow/table-formatter"
93
113
  require "arrow/table-list-formatter"
94
114
  require "arrow/table-table-formatter"
@@ -112,6 +132,27 @@ module Arrow
112
132
  require "arrow.so"
113
133
  end
114
134
 
135
+ def gc_guard
136
+ require "arrow/constructor-arguments-gc-guardable"
137
+
138
+ [
139
+ @base_module::BinaryScalar,
140
+ @base_module::Buffer,
141
+ @base_module::DenseUnionScalar,
142
+ @base_module::FixedSizeBinaryScalar,
143
+ @base_module::LargeBinaryScalar,
144
+ @base_module::LargeListScalar,
145
+ @base_module::LargeStringScalar,
146
+ @base_module::ListScalar,
147
+ @base_module::MapScalar,
148
+ @base_module::SparseUnionScalar,
149
+ @base_module::StringScalar,
150
+ @base_module::StructScalar,
151
+ ].each do |klass|
152
+ klass.prepend(ConstructorArgumentsGCGuardable)
153
+ end
154
+ end
155
+
115
156
  def load_object_info(info)
116
157
  super
117
158
 
@@ -164,9 +205,26 @@ module Arrow
164
205
  method_name = "dup"
165
206
  end
166
207
  super(info, klass, method_name)
208
+ when "Arrow::BooleanScalar"
209
+ case method_name
210
+ when "value?"
211
+ method_name = "value"
212
+ end
213
+ super(info, klass, method_name)
167
214
  else
168
215
  super
169
216
  end
170
217
  end
218
+
219
+ def prepare_function_info_lock_gvl(function_info, klass)
220
+ super
221
+ case klass.name
222
+ when "Arrow::RecordBatchFileReader"
223
+ case function_info.name
224
+ when "new"
225
+ function_info.lock_gvl_default = false
226
+ end
227
+ end
228
+ end
171
229
  end
172
230
  end
@@ -0,0 +1,109 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class MapArrayBuilder
20
+ class << self
21
+ def build(data_type, values)
22
+ builder = new(data_type)
23
+ builder.build(values)
24
+ end
25
+ end
26
+
27
+ alias_method :append_value_raw, :append_value
28
+
29
+ # @overload append_value
30
+ #
31
+ # Starts appending a map record. You need to append
32
+ # values of map by {#key_builder} and {#item_builder}.
33
+ #
34
+ # @overload append_value(value)
35
+ #
36
+ # Appends a map record including key and item values.
37
+ #
38
+ # @param value [nil, #each] The map record.
39
+ #
40
+ # If this is `nil`, the map record is null.
41
+ #
42
+ # If this is an `Object` that has `#each`, each value is a pair of key and item.
43
+ #
44
+ # @since 6.0.0
45
+ def append_value(*args)
46
+ n_args = args.size
47
+
48
+ case n_args
49
+ when 0
50
+ append_value_raw
51
+ when 1
52
+ value = args[0]
53
+ case value
54
+ when nil
55
+ append_null
56
+ else
57
+ unless value.respond_to?(:each)
58
+ message = "map value must be nil, Hash or Object that has #each: #{value.inspect}"
59
+ raise ArgumentError, message
60
+ end
61
+ append_value_raw
62
+ @key_builder ||= key_builder
63
+ @item_builder ||= item_builder
64
+ case value
65
+ when Hash
66
+ keys = value.keys
67
+ values = value.values
68
+ else
69
+ keys = []
70
+ values = []
71
+ value.each do |key, item|
72
+ keys << key
73
+ values << item
74
+ end
75
+ end
76
+ @key_builder.append(*keys)
77
+ @item_builder.append(*values)
78
+ end
79
+ else
80
+ message = "wrong number of arguments (given #{n_args}, expected 0..1)"
81
+ raise ArgumentError, message
82
+ end
83
+ end
84
+
85
+ alias_method :append_values_raw, :append_values
86
+
87
+ def append_values(values, is_valids=nil)
88
+ value = values[0]
89
+ case value
90
+ when Integer
91
+ append_values_raw(values, is_valids)
92
+ else
93
+ if is_valids
94
+ is_valids.each_with_index do |is_valid, i|
95
+ if is_valid
96
+ append_value(values[i])
97
+ else
98
+ append_null
99
+ end
100
+ end
101
+ else
102
+ values.each do |value|
103
+ append_value(value)
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class MapArray
20
+ def get_value(i)
21
+ super.each_with_object({}) do |item, result|
22
+ result[item["key"]] = item["value"]
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,89 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class MapDataType
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ # Creates a new {Arrow::MapDataType}.
24
+ #
25
+ # @overload initialize(key, item)
26
+ #
27
+ # @param key [Arrow::DataType, Hash, String, Symbol]
28
+ # The key data type of the map data type.
29
+ #
30
+ # You can specify data type as a description by `Hash`.
31
+ #
32
+ # See {Arrow::DataType.resolve} how to specify data type
33
+ # description.
34
+ #
35
+ # @param item [Arrow::DataType, Hash, String, Symbol]
36
+ # The item data type of the map data type.
37
+ #
38
+ # You can specify data type as a description by `Hash`.
39
+ #
40
+ # See {Arrow::DataType.resolve} how to specify data type
41
+ # description.
42
+ #
43
+ # @example Create a map data type for `{0: "Hello", 1: "World"}`
44
+ # key = :int8
45
+ # item = :string
46
+ # Arrow::MapDataType.new(key, item)
47
+ #
48
+ # @overload initialize(description)
49
+ #
50
+ # @param description [Hash] The description of the map data
51
+ # type. It must have `:key`, `:item` values.
52
+ #
53
+ # @option description [Arrow::DataType, Hash, String, Symbol]
54
+ # :key The key data type of the map data type.
55
+ #
56
+ # You can specify data type as a description by `Hash`.
57
+ #
58
+ # See {Arrow::DataType.resolve} how to specify data type
59
+ # description.
60
+ #
61
+ # @option description [Arrow::DataType, Hash, String, Symbol]
62
+ # :item The item data type of the map data type.
63
+ #
64
+ # You can specify data type as a description by `Hash`.
65
+ #
66
+ # See {Arrow::DataType.resolve} how to specify data type
67
+ # description.
68
+ #
69
+ # @example Create a map data type for `{0: "Hello", 1: "World"}`
70
+ # Arrow::MapDataType.new(key: :int8, item: :string)
71
+ def initialize(*args)
72
+ n_args = args.size
73
+ case n_args
74
+ when 1
75
+ description = args[0]
76
+ key = description[:key]
77
+ item = description[:item]
78
+ when 2
79
+ key, item = args
80
+ else
81
+ message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
82
+ raise ArgumentError, message
83
+ end
84
+ key = DataType.resolve(key)
85
+ item = DataType.resolve(item)
86
+ initialize_raw(key, item)
87
+ end
88
+ end
89
+ end
@@ -24,7 +24,7 @@ module Arrow
24
24
  def extract
25
25
  basename = ::File.basename(@path)
26
26
  components = basename.split(".")
27
- return {} if components.size == 1
27
+ return {} if components.size < 2
28
28
 
29
29
  extension = components.last.downcase
30
30
  if components.size > 2
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RecordBatchReader
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when ::Array
25
+ return nil if value.empty?
26
+ if value.all? {|v| v.is_a?(RecordBatch)}
27
+ new(value)
28
+ else
29
+ nil
30
+ end
31
+ when RecordBatch
32
+ new([value])
33
+ when Table
34
+ TableBatchReader.new(value)
35
+ else
36
+ nil
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -50,8 +50,6 @@ module Arrow
50
50
  alias_method :size, :n_rows
51
51
  alias_method :length, :n_rows
52
52
 
53
- alias_method :[], :find_column
54
-
55
53
  # Converts the record batch to {Arrow::Table}.
56
54
  #
57
55
  # @return [Arrow::Table]