red-arrow 0.15.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +28 -16
  3. data/ext/arrow/converters.hpp +63 -33
  4. data/ext/arrow/raw-records.cpp +2 -1
  5. data/ext/arrow/values.cpp +2 -1
  6. data/lib/arrow/array-builder.rb +101 -52
  7. data/lib/arrow/array.rb +28 -10
  8. data/lib/arrow/{binary-array-builder.rb → buffer.rb} +7 -15
  9. data/lib/arrow/chunked-array.rb +2 -0
  10. data/lib/arrow/csv-loader.rb +15 -3
  11. data/lib/arrow/csv-read-options.rb +18 -0
  12. data/lib/arrow/data-type.rb +35 -2
  13. data/lib/arrow/decimal128-array-builder.rb +0 -2
  14. data/lib/arrow/dictionary-array.rb +24 -0
  15. data/lib/arrow/field.rb +1 -1
  16. data/lib/arrow/generic-filterable.rb +43 -0
  17. data/lib/arrow/generic-takeable.rb +38 -0
  18. data/lib/arrow/list-data-type.rb +58 -8
  19. data/lib/arrow/loader.rb +12 -1
  20. data/lib/arrow/null-array-builder.rb +1 -1
  21. data/lib/arrow/null-array.rb +24 -0
  22. data/lib/arrow/raw-table-converter.rb +47 -0
  23. data/lib/arrow/record-batch-iterator.rb +22 -0
  24. data/lib/arrow/record-batch.rb +8 -3
  25. data/lib/arrow/schema.rb +5 -2
  26. data/lib/arrow/struct-array-builder.rb +13 -7
  27. data/lib/arrow/struct-data-type.rb +0 -2
  28. data/lib/arrow/table-loader.rb +29 -6
  29. data/lib/arrow/table-saver.rb +37 -13
  30. data/lib/arrow/table.rb +20 -73
  31. data/lib/arrow/version.rb +1 -1
  32. data/red-arrow.gemspec +4 -2
  33. data/test/helper.rb +1 -0
  34. data/test/helper/omittable.rb +36 -0
  35. data/test/raw-records/test-dense-union-array.rb +1 -34
  36. data/test/raw-records/test-sparse-union-array.rb +1 -33
  37. data/test/run-test.rb +14 -3
  38. data/test/test-array-builder.rb +17 -0
  39. data/test/test-array.rb +104 -0
  40. data/test/test-buffer.rb +11 -0
  41. data/test/test-chunked-array.rb +96 -0
  42. data/test/test-csv-loader.rb +77 -2
  43. data/test/test-data-type.rb +11 -0
  44. data/test/test-dense-union-data-type.rb +2 -2
  45. data/test/test-dictionary-array.rb +41 -0
  46. data/test/test-feather.rb +21 -6
  47. data/test/test-list-data-type.rb +27 -1
  48. data/test/test-null-array.rb +23 -0
  49. data/test/test-record-batch-iterator.rb +37 -0
  50. data/test/test-record-batch.rb +14 -0
  51. data/test/test-schema.rb +16 -0
  52. data/test/test-slicer.rb +74 -30
  53. data/test/test-sparse-union-data-type.rb +2 -2
  54. data/test/test-struct-array-builder.rb +8 -4
  55. data/test/test-table.rb +153 -14
  56. data/test/test-timestamp-array.rb +19 -0
  57. data/test/values/test-dense-union-array.rb +1 -34
  58. data/test/values/test-sparse-union-array.rb +1 -33
  59. metadata +76 -63
@@ -18,20 +18,21 @@
18
18
  module Arrow
19
19
  class Array
20
20
  include Enumerable
21
+ include GenericFilterable
22
+ include GenericTakeable
21
23
 
22
24
  class << self
23
25
  def new(*args)
26
+ _builder_class = builder_class
27
+ return super if _builder_class.nil?
28
+ return super unless _builder_class.buildable?(args)
29
+ _builder_class.build(*args)
30
+ end
31
+
32
+ def builder_class
24
33
  builder_class_name = "#{name}Builder"
25
- if const_defined?(builder_class_name)
26
- builder_class = const_get(builder_class_name)
27
- if builder_class.buildable?(args)
28
- builder_class.build(*args)
29
- else
30
- super
31
- end
32
- else
33
- super
34
- end
34
+ return nil unless const_defined?(builder_class_name)
35
+ const_get(builder_class_name)
35
36
  end
36
37
  end
37
38
 
@@ -82,5 +83,22 @@ module Arrow
82
83
  def to_a
83
84
  values
84
85
  end
86
+
87
+ alias_method :is_in_raw, :is_in
88
+ def is_in(values)
89
+ case values
90
+ when ::Array
91
+ if self.class.builder_class.buildable?([values])
92
+ values = self.class.new(values)
93
+ else
94
+ values = self.class.new(value_data_type, values)
95
+ end
96
+ is_in_raw(values)
97
+ when ChunkedArray
98
+ is_in_chunked_array(values)
99
+ else
100
+ is_in_raw(values)
101
+ end
102
+ end
85
103
  end
86
104
  end
@@ -16,21 +16,13 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- class BinaryArrayBuilder
20
- def append_values(values, is_valids=nil)
21
- if is_valids
22
- is_valids.each_with_index do |is_valid, i|
23
- if is_valid
24
- append_value(values[i])
25
- else
26
- append_null
27
- end
28
- end
29
- else
30
- values.each do |value|
31
- append_value(value)
32
- end
33
- end
19
+ class Buffer
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ def initialize(data)
24
+ @data = data
25
+ initialize_raw(data)
34
26
  end
35
27
  end
36
28
  end
@@ -18,6 +18,8 @@
18
18
  module Arrow
19
19
  class ChunkedArray
20
20
  include Enumerable
21
+ include GenericFilterable
22
+ include GenericTakeable
21
23
 
22
24
  alias_method :size, :n_rows
23
25
  unless method_defined?(:length)
@@ -30,6 +30,9 @@ module Arrow
30
30
  def initialize(path_or_data, **options)
31
31
  @path_or_data = path_or_data
32
32
  @options = options
33
+ if @options.key?(:delimiter)
34
+ @options[:col_sep] = @options.delete(:delimiter)
35
+ end
33
36
  @compression = @options.delete(:compression)
34
37
  end
35
38
 
@@ -93,10 +96,17 @@ module Arrow
93
96
  @options.each do |key, value|
94
97
  case key
95
98
  when :headers
96
- if value
97
- options.n_header_rows = 1
99
+ case value
100
+ when ::Array
101
+ options.column_names = value
102
+ when String
103
+ return nil
98
104
  else
99
- options.n_header_rows = 0
105
+ if value
106
+ options.generate_column_names = false
107
+ else
108
+ options.generate_column_names = true
109
+ end
100
110
  end
101
111
  when :column_types
102
112
  value.each do |name, type|
@@ -106,6 +116,8 @@ module Arrow
106
116
  options.add_schema(value)
107
117
  when :encoding
108
118
  # process encoding on opening input
119
+ when :col_sep
120
+ options.delimiter = value
109
121
  else
110
122
  setter = "#{key}="
111
123
  if options.respond_to?(setter)
@@ -21,5 +21,23 @@ module Arrow
21
21
  def add_column_type(name, type)
22
22
  add_column_type_raw(name, DataType.resolve(type))
23
23
  end
24
+
25
+ alias_method :delimiter_raw, :delimiter
26
+ def delimiter
27
+ delimiter_raw.chr
28
+ end
29
+
30
+ alias_method :delimiter_raw=, :delimiter=
31
+ def delimiter=(delimiter)
32
+ case delimiter
33
+ when String
34
+ if delimiter.bytesize != 1
35
+ message = "delimiter must be 1 byte character: #{delimiter.inspect}"
36
+ raise ArgumentError, message
37
+ end
38
+ delimiter = delimiter.ord
39
+ end
40
+ self.delimiter_raw = delimiter
41
+ end
24
42
  end
25
43
  end
@@ -121,6 +121,26 @@ module Arrow
121
121
  end
122
122
  end
123
123
 
124
+ def sub_types
125
+ types = {}
126
+ gtype.children.each do |child|
127
+ sub_type = child.to_class
128
+ types[sub_type] = true
129
+ sub_type.sub_types.each do |sub_sub_type|
130
+ types[sub_sub_type] = true
131
+ end
132
+ end
133
+ types.keys
134
+ end
135
+
136
+ def try_convert(value)
137
+ begin
138
+ resolve(value)
139
+ rescue ArgumentError
140
+ nil
141
+ end
142
+ end
143
+
124
144
  private
125
145
  def resolve_class(data_type)
126
146
  components = data_type.to_s.split("_").collect(&:capitalize)
@@ -137,11 +157,24 @@ module Arrow
137
157
  available_types << components.collect(&:downcase).join("_").to_sym
138
158
  end
139
159
  message =
140
- "unknown type: #{data_type.inspect}: " +
160
+ "unknown type: <#{data_type.inspect}>: " +
141
161
  "available types: #{available_types.inspect}"
142
162
  raise ArgumentError, message
143
163
  end
144
- Arrow.const_get(data_type_class_name)
164
+ data_type_class = Arrow.const_get(data_type_class_name)
165
+ if data_type_class.gtype.abstract?
166
+ not_abstract_types = data_type_class.sub_types.find_all do |sub_type|
167
+ not sub_type.gtype.abstract?
168
+ end
169
+ not_abstract_types = not_abstract_types.sort_by do |type|
170
+ type.name
171
+ end
172
+ message =
173
+ "abstract type: <#{data_type.inspect}>: " +
174
+ "use one of not abstract type: #{not_abstract_types.inspect}"
175
+ raise ArgumentError, message
176
+ end
177
+ data_type_class
145
178
  end
146
179
  end
147
180
 
@@ -15,8 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/bigdecimal-extension"
19
-
20
18
  module Arrow
21
19
  class Decimal128ArrayBuilder
22
20
  class << self
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DictionaryArray
20
+ def get_value(i)
21
+ dictionary[indices[i]]
22
+ end
23
+ end
24
+ end
@@ -59,7 +59,7 @@ module Arrow
59
59
  # There is a shortcut for convenience. If field description
60
60
  # doesn't have `:data_type`, all keys except `:name` are
61
61
  # processes as data type description. For example, the
62
- # following field descrptions are the same:
62
+ # following field descriptions are the same:
63
63
  #
64
64
  # ```ruby
65
65
  # {name: "visible", data_type: {type: :boolean}}
@@ -0,0 +1,43 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module GenericFilterable
20
+ class << self
21
+ def included(base)
22
+ base.__send__(:alias_method, :filter_raw, :filter)
23
+ base.__send__(:alias_method, :filter, :filter_generic)
24
+ end
25
+ end
26
+
27
+ def filter_generic(filter, options=nil)
28
+ case filter
29
+ when ::Array
30
+ filter_raw(BooleanArray.new(filter), options)
31
+ when ChunkedArray
32
+ if respond_to?(:filter_chunked_array)
33
+ filter_chunked_array(filter, options)
34
+ else
35
+ # TODO: Implement this in C++
36
+ filter_raw(filter.pack, options)
37
+ end
38
+ else
39
+ filter_raw(filter, options)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module GenericTakeable
20
+ class << self
21
+ def included(base)
22
+ base.__send__(:alias_method, :take_raw, :take)
23
+ base.__send__(:alias_method, :take, :take_generic)
24
+ end
25
+ end
26
+
27
+ def take_generic(indices)
28
+ case indices
29
+ when ::Array
30
+ take_raw(IntArrayBuilder.build(indices))
31
+ when ChunkedArray
32
+ take_chunked_array(indices)
33
+ else
34
+ take_raw(indices)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -53,16 +53,66 @@ module Arrow
53
53
  #
54
54
  # @example Create a list data type with field description
55
55
  # Arrow::ListDataType.new(field: {name: "visible", type: :boolean})
56
- def initialize(field)
57
- if field.is_a?(Hash) and field.key?(:field)
58
- description = field
59
- field = description[:field]
60
- end
61
- if field.is_a?(Hash)
62
- field_description = field
63
- field = Field.new(field_description)
56
+ #
57
+ # @overload initialize(data_type)
58
+ #
59
+ # @param data_type [Arrow::DataType, String, Symbol,
60
+ # ::Array<String>, ::Array<Symbol>, Hash] The element data
61
+ # type of the list data type. A field is created with the
62
+ # default name `"item"` from the data type automatically.
63
+ #
64
+ # See {Arrow::DataType.resolve} how to specify data type.
65
+ #
66
+ # @example Create a list data type with {Arrow::DataType}
67
+ # Arrow::ListDataType.new(Arrow::BooleanDataType.new)
68
+ #
69
+ # @example Create a list data type with data type name as String
70
+ # Arrow::ListDataType.new("boolean")
71
+ #
72
+ # @example Create a list data type with data type name as Symbol
73
+ # Arrow::ListDataType.new(:boolean)
74
+ #
75
+ # @example Create a list data type with data type as Array
76
+ # Arrow::ListDataType.new([:time32, :milli])
77
+ def initialize(arg)
78
+ data_type = resolve_data_type(arg)
79
+ if data_type
80
+ field = Field.new(default_field_name, data_type)
81
+ else
82
+ field = resolve_field(arg)
64
83
  end
65
84
  initialize_raw(field)
66
85
  end
86
+
87
+ private
88
+ def resolve_data_type(arg)
89
+ case arg
90
+ when DataType, String, Symbol, ::Array
91
+ DataType.resolve(arg)
92
+ when Hash
93
+ return nil if arg[:name]
94
+ return nil unless arg[:type]
95
+ DataType.resolve(arg)
96
+ else
97
+ nil
98
+ end
99
+ end
100
+
101
+ def default_field_name
102
+ "item"
103
+ end
104
+
105
+ def resolve_field(arg)
106
+ if arg.is_a?(Hash) and arg.key?(:field)
107
+ description = arg
108
+ arg = description[:field]
109
+ end
110
+ if arg.is_a?(Hash)
111
+ field_description = arg
112
+ Field.new(field_description)
113
+ else
114
+ arg
115
+ end
116
+ end
67
117
  end
68
118
  end
@@ -32,9 +32,16 @@ module Arrow
32
32
  end
33
33
 
34
34
  def require_libraries
35
+ require "arrow/column-containable"
36
+ require "arrow/field-containable"
37
+ require "arrow/generic-filterable"
38
+ require "arrow/generic-takeable"
39
+ require "arrow/record-containable"
40
+
35
41
  require "arrow/array"
36
42
  require "arrow/array-builder"
37
- require "arrow/binary-array-builder"
43
+ require "arrow/bigdecimal-extension"
44
+ require "arrow/buffer"
38
45
  require "arrow/chunked-array"
39
46
  require "arrow/column"
40
47
  require "arrow/compression-type"
@@ -50,17 +57,21 @@ module Arrow
50
57
  require "arrow/decimal128-array-builder"
51
58
  require "arrow/decimal128-data-type"
52
59
  require "arrow/dense-union-data-type"
60
+ require "arrow/dictionary-array"
53
61
  require "arrow/dictionary-data-type"
54
62
  require "arrow/field"
55
63
  require "arrow/file-output-stream"
64
+ require "arrow/group"
56
65
  require "arrow/list-array-builder"
57
66
  require "arrow/list-data-type"
67
+ require "arrow/null-array"
58
68
  require "arrow/null-array-builder"
59
69
  require "arrow/path-extension"
60
70
  require "arrow/record"
61
71
  require "arrow/record-batch"
62
72
  require "arrow/record-batch-builder"
63
73
  require "arrow/record-batch-file-reader"
74
+ require "arrow/record-batch-iterator"
64
75
  require "arrow/record-batch-stream-reader"
65
76
  require "arrow/rolling-window"
66
77
  require "arrow/schema"