red-arrow 0.15.1 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +5 -0
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +3 -1
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +2 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +22 -8
@@ -18,20 +18,21 @@
18
18
  module Arrow
19
19
  class Array
20
20
  include Enumerable
21
+ include GenericFilterable
22
+ include GenericTakeable
21
23
 
22
24
  class << self
23
25
  def new(*args)
26
+ _builder_class = builder_class
27
+ return super if _builder_class.nil?
28
+ return super unless _builder_class.buildable?(args)
29
+ _builder_class.build(*args)
30
+ end
31
+
32
+ def builder_class
24
33
  builder_class_name = "#{name}Builder"
25
- if const_defined?(builder_class_name)
26
- builder_class = const_get(builder_class_name)
27
- if builder_class.buildable?(args)
28
- builder_class.build(*args)
29
- else
30
- super
31
- end
32
- else
33
- super
34
- end
34
+ return nil unless const_defined?(builder_class_name)
35
+ const_get(builder_class_name)
35
36
  end
36
37
  end
37
38
 
@@ -82,5 +83,22 @@ module Arrow
82
83
  def to_a
83
84
  values
84
85
  end
86
+
87
+ alias_method :is_in_raw, :is_in
88
+ def is_in(values)
89
+ case values
90
+ when ::Array
91
+ if self.class.builder_class.buildable?([values])
92
+ values = self.class.new(values)
93
+ else
94
+ values = self.class.new(value_data_type, values)
95
+ end
96
+ is_in_raw(values)
97
+ when ChunkedArray
98
+ is_in_chunked_array(values)
99
+ else
100
+ is_in_raw(values)
101
+ end
102
+ end
85
103
  end
86
104
  end
@@ -16,21 +16,13 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- class BinaryArrayBuilder
20
- def append_values(values, is_valids=nil)
21
- if is_valids
22
- is_valids.each_with_index do |is_valid, i|
23
- if is_valid
24
- append_value(values[i])
25
- else
26
- append_null
27
- end
28
- end
29
- else
30
- values.each do |value|
31
- append_value(value)
32
- end
33
- end
19
+ class Buffer
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ def initialize(data)
24
+ @data = data
25
+ initialize_raw(data)
34
26
  end
35
27
  end
36
28
  end
@@ -18,6 +18,8 @@
18
18
  module Arrow
19
19
  class ChunkedArray
20
20
  include Enumerable
21
+ include GenericFilterable
22
+ include GenericTakeable
21
23
 
22
24
  alias_method :size, :n_rows
23
25
  unless method_defined?(:length)
@@ -30,6 +30,9 @@ module Arrow
30
30
  def initialize(path_or_data, **options)
31
31
  @path_or_data = path_or_data
32
32
  @options = options
33
+ if @options.key?(:delimiter)
34
+ @options[:col_sep] = @options.delete(:delimiter)
35
+ end
33
36
  @compression = @options.delete(:compression)
34
37
  end
35
38
 
@@ -113,6 +116,8 @@ module Arrow
113
116
  options.add_schema(value)
114
117
  when :encoding
115
118
  # process encoding on opening input
119
+ when :col_sep
120
+ options.delimiter = value
116
121
  else
117
122
  setter = "#{key}="
118
123
  if options.respond_to?(setter)
@@ -21,5 +21,23 @@ module Arrow
21
21
  def add_column_type(name, type)
22
22
  add_column_type_raw(name, DataType.resolve(type))
23
23
  end
24
+
25
+ alias_method :delimiter_raw, :delimiter
26
+ def delimiter
27
+ delimiter_raw.chr
28
+ end
29
+
30
+ alias_method :delimiter_raw=, :delimiter=
31
+ def delimiter=(delimiter)
32
+ case delimiter
33
+ when String
34
+ if delimiter.bytesize != 1
35
+ message = "delimiter must be 1 byte character: #{delimiter.inspect}"
36
+ raise ArgumentError, message
37
+ end
38
+ delimiter = delimiter.ord
39
+ end
40
+ self.delimiter_raw = delimiter
41
+ end
24
42
  end
25
43
  end
@@ -121,6 +121,26 @@ module Arrow
121
121
  end
122
122
  end
123
123
 
124
+ def sub_types
125
+ types = {}
126
+ gtype.children.each do |child|
127
+ sub_type = child.to_class
128
+ types[sub_type] = true
129
+ sub_type.sub_types.each do |sub_sub_type|
130
+ types[sub_sub_type] = true
131
+ end
132
+ end
133
+ types.keys
134
+ end
135
+
136
+ def try_convert(value)
137
+ begin
138
+ resolve(value)
139
+ rescue ArgumentError
140
+ nil
141
+ end
142
+ end
143
+
124
144
  private
125
145
  def resolve_class(data_type)
126
146
  components = data_type.to_s.split("_").collect(&:capitalize)
@@ -137,11 +157,24 @@ module Arrow
137
157
  available_types << components.collect(&:downcase).join("_").to_sym
138
158
  end
139
159
  message =
140
- "unknown type: #{data_type.inspect}: " +
160
+ "unknown type: <#{data_type.inspect}>: " +
141
161
  "available types: #{available_types.inspect}"
142
162
  raise ArgumentError, message
143
163
  end
144
- Arrow.const_get(data_type_class_name)
164
+ data_type_class = Arrow.const_get(data_type_class_name)
165
+ if data_type_class.gtype.abstract?
166
+ not_abstract_types = data_type_class.sub_types.find_all do |sub_type|
167
+ not sub_type.gtype.abstract?
168
+ end
169
+ not_abstract_types = not_abstract_types.sort_by do |type|
170
+ type.name
171
+ end
172
+ message =
173
+ "abstract type: <#{data_type.inspect}>: " +
174
+ "use one of not abstract type: #{not_abstract_types.inspect}"
175
+ raise ArgumentError, message
176
+ end
177
+ data_type_class
145
178
  end
146
179
  end
147
180
 
@@ -15,8 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/bigdecimal-extension"
19
-
20
18
  module Arrow
21
19
  class Decimal128ArrayBuilder
22
20
  class << self
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DictionaryArray
20
+ def get_value(i)
21
+ dictionary[indices[i]]
22
+ end
23
+ end
24
+ end
@@ -59,7 +59,7 @@ module Arrow
59
59
  # There is a shortcut for convenience. If field description
60
60
  # doesn't have `:data_type`, all keys except `:name` are
61
61
  # processes as data type description. For example, the
62
- # following field descrptions are the same:
62
+ # following field descriptions are the same:
63
63
  #
64
64
  # ```ruby
65
65
  # {name: "visible", data_type: {type: :boolean}}
@@ -0,0 +1,43 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module GenericFilterable
20
+ class << self
21
+ def included(base)
22
+ base.__send__(:alias_method, :filter_raw, :filter)
23
+ base.__send__(:alias_method, :filter, :filter_generic)
24
+ end
25
+ end
26
+
27
+ def filter_generic(filter, options=nil)
28
+ case filter
29
+ when ::Array
30
+ filter_raw(BooleanArray.new(filter), options)
31
+ when ChunkedArray
32
+ if respond_to?(:filter_chunked_array)
33
+ filter_chunked_array(filter, options)
34
+ else
35
+ # TODO: Implement this in C++
36
+ filter_raw(filter.pack, options)
37
+ end
38
+ else
39
+ filter_raw(filter, options)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module GenericTakeable
20
+ class << self
21
+ def included(base)
22
+ base.__send__(:alias_method, :take_raw, :take)
23
+ base.__send__(:alias_method, :take, :take_generic)
24
+ end
25
+ end
26
+
27
+ def take_generic(indices)
28
+ case indices
29
+ when ::Array
30
+ take_raw(IntArrayBuilder.build(indices))
31
+ when ChunkedArray
32
+ take_chunked_array(indices)
33
+ else
34
+ take_raw(indices)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -53,16 +53,66 @@ module Arrow
53
53
  #
54
54
  # @example Create a list data type with field description
55
55
  # Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
56
- def initialize(field)
57
- if field.is_a?(Hash) and field.key?(:field)
58
- description = field
59
- field = description[:field]
60
- end
61
- if field.is_a?(Hash)
62
- field_description = field
63
- field = Field.new(field_description)
56
+ #
57
+ # @overload initialize(data_type)
58
+ #
59
+ # @param data_type [Arrow::DataType, String, Symbol,
60
+ # ::Array<String>, ::Array<Symbol>, Hash] The element data
61
+ # type of the list data type. A field is created with the
62
+ # default name `"item"` from the data type automatically.
63
+ #
64
+ # See {Arrow::DataType.resolve} how to specify data type.
65
+ #
66
+ # @example Create a list data type with {Arrow::DataType}
67
+ # Arrow::ListDataType.new(Arrow::BooleanDataType.new)
68
+ #
69
+ # @example Create a list data type with data type name as String
70
+ # Arrow::ListDataType.new("boolean")
71
+ #
72
+ # @example Create a list data type with data type name as Symbol
73
+ # Arrow::ListDataType.new(:boolean)
74
+ #
75
+ # @example Create a list data type with data type as Array
76
+ # Arrow::ListDataType.new([:time32, :milli])
77
+ def initialize(arg)
78
+ data_type = resolve_data_type(arg)
79
+ if data_type
80
+ field = Field.new(default_field_name, data_type)
81
+ else
82
+ field = resolve_field(arg)
64
83
  end
65
84
  initialize_raw(field)
66
85
  end
86
+
87
+ private
88
+ def resolve_data_type(arg)
89
+ case arg
90
+ when DataType, String, Symbol, ::Array
91
+ DataType.resolve(arg)
92
+ when Hash
93
+ return nil if arg[:name]
94
+ return nil unless arg[:type]
95
+ DataType.resolve(arg)
96
+ else
97
+ nil
98
+ end
99
+ end
100
+
101
+ def default_field_name
102
+ "item"
103
+ end
104
+
105
+ def resolve_field(arg)
106
+ if arg.is_a?(Hash) and arg.key?(:field)
107
+ description = arg
108
+ arg = description[:field]
109
+ end
110
+ if arg.is_a?(Hash)
111
+ field_description = arg
112
+ Field.new(field_description)
113
+ else
114
+ arg
115
+ end
116
+ end
67
117
  end
68
118
  end
@@ -32,9 +32,16 @@ module Arrow
32
32
  end
33
33
 
34
34
  def require_libraries
35
+ require "arrow/column-containable"
36
+ require "arrow/field-containable"
37
+ require "arrow/generic-filterable"
38
+ require "arrow/generic-takeable"
39
+ require "arrow/record-containable"
40
+
35
41
  require "arrow/array"
36
42
  require "arrow/array-builder"
37
- require "arrow/binary-array-builder"
43
+ require "arrow/bigdecimal-extension"
44
+ require "arrow/buffer"
38
45
  require "arrow/chunked-array"
39
46
  require "arrow/column"
40
47
  require "arrow/compression-type"
@@ -50,17 +57,21 @@ module Arrow
50
57
  require "arrow/decimal128-array-builder"
51
58
  require "arrow/decimal128-data-type"
52
59
  require "arrow/dense-union-data-type"
60
+ require "arrow/dictionary-array"
53
61
  require "arrow/dictionary-data-type"
54
62
  require "arrow/field"
55
63
  require "arrow/file-output-stream"
64
+ require "arrow/group"
56
65
  require "arrow/list-array-builder"
57
66
  require "arrow/list-data-type"
67
+ require "arrow/null-array"
58
68
  require "arrow/null-array-builder"
59
69
  require "arrow/path-extension"
60
70
  require "arrow/record"
61
71
  require "arrow/record-batch"
62
72
  require "arrow/record-batch-builder"
63
73
  require "arrow/record-batch-file-reader"
74
+ require "arrow/record-batch-iterator"
64
75
  require "arrow/record-batch-stream-reader"
65
76
  require "arrow/rolling-window"
66
77
  require "arrow/schema"
@@ -19,7 +19,7 @@ module Arrow
19
19
  class NullArrayBuilder
20
20
  class << self
21
21
  def buildable?(args)
22
- super and args.collect(&:class) != [Integer]
22
+ super and not (args.size == 1 and args[0].is_a?(Integer))
23
23
  end
24
24
  end
25
25
  end
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class NullArray
20
+ def get_value(i)
21
+ nil
22
+ end
23
+ end
24
+ end