red-arrow 0.15.1 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04ae0088ccf17ff76c09ad884e9c4ce6c9a9c637c8f724964c24a379e37d652e
4
- data.tar.gz: a3387f67b30dfd07963730ff79c694d703a08f872fdb7990509d019b460e20b9
3
+ metadata.gz: 64b14ef4120f4ab290e8161020902ec2a22631c519d5a133a63ce383610e8545
4
+ data.tar.gz: 2f5850520e2dc69568a454cee0d4246909d52f1d49851221b1b9efd3149bc15c
5
5
  SHA512:
6
- metadata.gz: dc03abf2065a62ffeb9061f64d9e6074dd90dee464bddc4a06773635a727abc025f6b249dad19bcdfa207308ecf24e7a5f8b7e18ec127c897f63d5916a20aa31
7
- data.tar.gz: 7dc98d5e71d23595baffda2b167494677a2d7d2269b010bea23555a6442d848248068f1e87ae3fa79c9fb61957243f038438ae3fa429f4daae3ead429c9066a1
6
+ metadata.gz: 0e19a4da6182437a51f9dad6212436e70b00881674ee6cd29e2a40910fe711fdef4076c81d2778f3ff9e9dd3f45b573c43e90378244cf08d7550b504ad8b53af
7
+ data.tar.gz: 547eb8b31fd59d9c1d5fc1163bb25da70154b797d103d3a01f6fda70dddc0b3c2cdb5391b9006c4510be37b8b75988195f809aaead8c91e7fb68f762cc5313de
@@ -504,14 +504,14 @@ namespace red_arrow {
504
504
  uint8_t compute_child_index(const arrow::UnionArray& array,
505
505
  arrow::UnionType* type,
506
506
  const char* tag) {
507
- const auto type_id = array.raw_type_ids()[index_];
508
- const auto& type_codes = type->type_codes();
509
- for (uint8_t i = 0; i < type_codes.size(); ++i) {
510
- if (type_codes[i] == type_id) {
511
- return i;
507
+ const auto type_code = array.raw_type_codes()[index_];
508
+ if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
+ const auto child_id = type->child_ids()[type_code];
510
+ if (child_id >= 0) {
511
+ return child_id;
512
512
  }
513
513
  }
514
- check_status(arrow::Status::Invalid("Unknown type ID: ", type_id),
514
+ check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
515
515
  tag);
516
516
  return 0;
517
517
  }
@@ -26,60 +26,13 @@ module Arrow
26
26
  return builder.build(values)
27
27
  end
28
28
 
29
- builder_class = nil
30
- builder_class_arguments = []
29
+ builder_info = nil
31
30
  values.each do |value|
32
- case value
33
- when nil
34
- # Ignore
35
- when true, false
36
- return BooleanArray.new(values)
37
- when String
38
- return StringArray.new(values)
39
- when Float
40
- return DoubleArray.new(values)
41
- when Integer
42
- if value < 0
43
- builder = IntArrayBuilder.new
44
- return builder.build(values)
45
- else
46
- builder_class = UIntArrayBuilder
47
- builder_class_arguments = []
48
- end
49
- when Time
50
- data_type = value.data_type
51
- case data_type.unit
52
- when TimeUnit::SECOND
53
- if builder.nil?
54
- builder = Time32ArrayBuilder
55
- builder_class_arguments = [data_type]
56
- end
57
- when TimeUnit::MILLI
58
- if builder != Time64ArrayBuilder
59
- builder = Time32ArrayBuilder
60
- builder_class_arguments = [data_type]
61
- end
62
- when TimeUnit::MICRO
63
- builder = Time64ArrayBuilder
64
- builder_class_arguments = [data_type]
65
- when TimeUnit::NANO
66
- builder = Time64ArrayBuilder.new(data_type)
67
- return builder.build(values)
68
- end
69
- when ::Time
70
- data_type = TimestampDataType.new(:nano)
71
- builder = TimestampArrayBuilder.new(data_type)
72
- return builder.build(values)
73
- when DateTime
74
- return Date64Array.new(values)
75
- when Date
76
- return Date32Array.new(values)
77
- else
78
- return StringArray.new(values)
79
- end
31
+ builder_info = detect_builder_info(value, builder_info)
32
+ break if builder_info and builder_info[:detected]
80
33
  end
81
- if builder_class
82
- builder = builder_class.new(*builder_class_arguments)
34
+ if builder_info
35
+ builder = builder_info[:builder]
83
36
  builder.build(values)
84
37
  else
85
38
  Arrow::StringArray.new(values)
@@ -89,6 +42,102 @@ module Arrow
89
42
  def buildable?(args)
90
43
  args.size == method(:build).arity
91
44
  end
45
+
46
+ private
47
+ def detect_builder_info(value, builder_info)
48
+ case value
49
+ when nil
50
+ builder_info
51
+ when true, false
52
+ {
53
+ builder: BooleanArrayBuilder.new,
54
+ detected: true,
55
+ }
56
+ when String
57
+ {
58
+ builder: StringArrayBuilder.new,
59
+ detected: true,
60
+ }
61
+ when Float
62
+ {
63
+ builder: DoubleArrayBuilder.new,
64
+ detected: true,
65
+ }
66
+ when Integer
67
+ if value < 0
68
+ {
69
+ builder: IntArrayBuilder.new,
70
+ detected: true,
71
+ }
72
+ else
73
+ {
74
+ builder: UIntArrayBuilder.new,
75
+ }
76
+ end
77
+ when Time
78
+ data_type = value.data_type
79
+ case data_type.unit
80
+ when TimeUnit::SECOND
81
+ builder_info || {
82
+ builder: Time32ArrayBuilder.new(data_type)
83
+ }
84
+ when TimeUnit::MILLI
85
+ if builder_info and builder_info[:builder].is_a?(Time64ArrayBuilder)
86
+ builder_info
87
+ else
88
+ {
89
+ builder: Time32ArrayBuilder.new(data_type),
90
+ }
91
+ end
92
+ when TimeUnit::MICRO
93
+ {
94
+ builder: Time64ArrayBuilder.new(data_type),
95
+ }
96
+ when TimeUnit::NANO
97
+ {
98
+ builder: Time64ArrayBuilder.new(data_type),
99
+ detected: true
100
+ }
101
+ end
102
+ when ::Time
103
+ data_type = TimestampDataType.new(:nano)
104
+ {
105
+ builder: TimestampArrayBuilder.new(data_type),
106
+ detected: true,
107
+ }
108
+ when DateTime
109
+ {
110
+ builder: Date64ArrayBuilder.new,
111
+ detected: true,
112
+ }
113
+ when Date
114
+ {
115
+ builder: Date32ArrayBuilder.new,
116
+ detected: true,
117
+ }
118
+ when ::Array
119
+ sub_builder_info = nil
120
+ value.each do |sub_value|
121
+ sub_builder_info = detect_builder_info(sub_value, sub_builder_info)
122
+ break if sub_builder_info and sub_builder_info[:detected]
123
+ end
124
+ if sub_builder_info and sub_builder_info[:detected]
125
+ sub_value_data_type = sub_builder_info[:builder].value_data_type
126
+ field = Field.new("item", sub_value_data_type)
127
+ {
128
+ builder: ListArrayBuilder.new(ListDataType.new(field)),
129
+ detected: true,
130
+ }
131
+ else
132
+ builder_info
133
+ end
134
+ else
135
+ {
136
+ builder: StringArrayBuilder.new,
137
+ detected: true,
138
+ }
139
+ end
140
+ end
92
141
  end
93
142
 
94
143
  def build(values)
data/lib/arrow/array.rb CHANGED
@@ -18,20 +18,21 @@
18
18
  module Arrow
19
19
  class Array
20
20
  include Enumerable
21
+ include GenericFilterable
22
+ include GenericTakeable
21
23
 
22
24
  class << self
23
25
  def new(*args)
26
+ _builder_class = builder_class
27
+ return super if _builder_class.nil?
28
+ return super unless _builder_class.buildable?(args)
29
+ _builder_class.build(*args)
30
+ end
31
+
32
+ def builder_class
24
33
  builder_class_name = "#{name}Builder"
25
- if const_defined?(builder_class_name)
26
- builder_class = const_get(builder_class_name)
27
- if builder_class.buildable?(args)
28
- builder_class.build(*args)
29
- else
30
- super
31
- end
32
- else
33
- super
34
- end
34
+ return nil unless const_defined?(builder_class_name)
35
+ const_get(builder_class_name)
35
36
  end
36
37
  end
37
38
 
@@ -82,5 +83,22 @@ module Arrow
82
83
  def to_a
83
84
  values
84
85
  end
86
+
87
+ alias_method :is_in_raw, :is_in
88
+ def is_in(values)
89
+ case values
90
+ when ::Array
91
+ if self.class.builder_class.buildable?([values])
92
+ values = self.class.new(values)
93
+ else
94
+ values = self.class.new(value_data_type, values)
95
+ end
96
+ is_in_raw(values)
97
+ when ChunkedArray
98
+ is_in_chunked_array(values)
99
+ else
100
+ is_in_raw(values)
101
+ end
102
+ end
85
103
  end
86
104
  end
@@ -18,6 +18,8 @@
18
18
  module Arrow
19
19
  class ChunkedArray
20
20
  include Enumerable
21
+ include GenericFilterable
22
+ include GenericTakeable
21
23
 
22
24
  alias_method :size, :n_rows
23
25
  unless method_defined?(:length)
@@ -30,6 +30,9 @@ module Arrow
30
30
  def initialize(path_or_data, **options)
31
31
  @path_or_data = path_or_data
32
32
  @options = options
33
+ if @options.key?(:delimiter)
34
+ @options[:col_sep] = @options.delete(:delimiter)
35
+ end
33
36
  @compression = @options.delete(:compression)
34
37
  end
35
38
 
@@ -113,6 +116,8 @@ module Arrow
113
116
  options.add_schema(value)
114
117
  when :encoding
115
118
  # process encoding on opening input
119
+ when :col_sep
120
+ options.delimiter = value
116
121
  else
117
122
  setter = "#{key}="
118
123
  if options.respond_to?(setter)
@@ -21,5 +21,23 @@ module Arrow
21
21
  def add_column_type(name, type)
22
22
  add_column_type_raw(name, DataType.resolve(type))
23
23
  end
24
+
25
+ alias_method :delimiter_raw, :delimiter
26
+ def delimiter
27
+ delimiter_raw.chr
28
+ end
29
+
30
+ alias_method :delimiter_raw=, :delimiter=
31
+ def delimiter=(delimiter)
32
+ case delimiter
33
+ when String
34
+ if delimiter.bytesize != 1
35
+ message = "delimiter must be 1 byte character: #{delimiter.inspect}"
36
+ raise ArgumentError, message
37
+ end
38
+ delimiter = delimiter.ord
39
+ end
40
+ self.delimiter_raw = delimiter
41
+ end
24
42
  end
25
43
  end
@@ -121,6 +121,26 @@ module Arrow
121
121
  end
122
122
  end
123
123
 
124
+ def sub_types
125
+ types = {}
126
+ gtype.children.each do |child|
127
+ sub_type = child.to_class
128
+ types[sub_type] = true
129
+ sub_type.sub_types.each do |sub_sub_type|
130
+ types[sub_sub_type] = true
131
+ end
132
+ end
133
+ types.keys
134
+ end
135
+
136
+ def try_convert(value)
137
+ begin
138
+ resolve(value)
139
+ rescue ArgumentError
140
+ nil
141
+ end
142
+ end
143
+
124
144
  private
125
145
  def resolve_class(data_type)
126
146
  components = data_type.to_s.split("_").collect(&:capitalize)
@@ -137,11 +157,24 @@ module Arrow
137
157
  available_types << components.collect(&:downcase).join("_").to_sym
138
158
  end
139
159
  message =
140
- "unknown type: #{data_type.inspect}: " +
160
+ "unknown type: <#{data_type.inspect}>: " +
141
161
  "available types: #{available_types.inspect}"
142
162
  raise ArgumentError, message
143
163
  end
144
- Arrow.const_get(data_type_class_name)
164
+ data_type_class = Arrow.const_get(data_type_class_name)
165
+ if data_type_class.gtype.abstract?
166
+ not_abstract_types = data_type_class.sub_types.find_all do |sub_type|
167
+ not sub_type.gtype.abstract?
168
+ end
169
+ not_abstract_types = not_abstract_types.sort_by do |type|
170
+ type.name
171
+ end
172
+ message =
173
+ "abstract type: <#{data_type.inspect}>: " +
174
+ "use one of not abstract type: #{not_abstract_types.inspect}"
175
+ raise ArgumentError, message
176
+ end
177
+ data_type_class
145
178
  end
146
179
  end
147
180
 
@@ -15,8 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/bigdecimal-extension"
19
-
20
18
  module Arrow
21
19
  class Decimal128ArrayBuilder
22
20
  class << self
data/lib/arrow/field.rb CHANGED
@@ -59,7 +59,7 @@ module Arrow
59
59
  # There is a shortcut for convenience. If field description
60
60
  # doesn't have `:data_type`, all keys except `:name` are
61
61
  # processes as data type description. For example, the
62
- # following field descrptions are the same:
62
+ # following field descriptions are the same:
63
63
  #
64
64
  # ```ruby
65
65
  # {name: "visible", data_type: {type: :boolean}}
@@ -0,0 +1,43 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module GenericFilterable
20
+ class << self
21
+ def included(base)
22
+ base.alias_method :filter_raw, :filter
23
+ base.alias_method :filter, :filter_generic
24
+ end
25
+ end
26
+
27
+ def filter_generic(filter)
28
+ case filter
29
+ when ::Array
30
+ filter_raw(BooleanArray.new(filter))
31
+ when ChunkedArray
32
+ if respond_to?(:filter_chunked_array)
33
+ filter_chunked_array(filter)
34
+ else
35
+ # TODO: Implement this in C++
36
+ filter_raw(filter.pack)
37
+ end
38
+ else
39
+ filter_raw(filter)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module GenericTakeable
20
+ class << self
21
+ def included(base)
22
+ base.alias_method :take_raw, :take
23
+ base.alias_method :take, :take_generic
24
+ end
25
+ end
26
+
27
+ def take_generic(indices)
28
+ case indices
29
+ when ::Array
30
+ take_raw(IntArrayBuilder.build(indices))
31
+ when ChunkedArray
32
+ take_chunked_array(indices)
33
+ else
34
+ take_raw(indices)
35
+ end
36
+ end
37
+ end
38
+ end