red-arrow 0.15.1 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04ae0088ccf17ff76c09ad884e9c4ce6c9a9c637c8f724964c24a379e37d652e
4
- data.tar.gz: a3387f67b30dfd07963730ff79c694d703a08f872fdb7990509d019b460e20b9
3
+ metadata.gz: 64b14ef4120f4ab290e8161020902ec2a22631c519d5a133a63ce383610e8545
4
+ data.tar.gz: 2f5850520e2dc69568a454cee0d4246909d52f1d49851221b1b9efd3149bc15c
5
5
  SHA512:
6
- metadata.gz: dc03abf2065a62ffeb9061f64d9e6074dd90dee464bddc4a06773635a727abc025f6b249dad19bcdfa207308ecf24e7a5f8b7e18ec127c897f63d5916a20aa31
7
- data.tar.gz: 7dc98d5e71d23595baffda2b167494677a2d7d2269b010bea23555a6442d848248068f1e87ae3fa79c9fb61957243f038438ae3fa429f4daae3ead429c9066a1
6
+ metadata.gz: 0e19a4da6182437a51f9dad6212436e70b00881674ee6cd29e2a40910fe711fdef4076c81d2778f3ff9e9dd3f45b573c43e90378244cf08d7550b504ad8b53af
7
+ data.tar.gz: 547eb8b31fd59d9c1d5fc1163bb25da70154b797d103d3a01f6fda70dddc0b3c2cdb5391b9006c4510be37b8b75988195f809aaead8c91e7fb68f762cc5313de
@@ -504,14 +504,14 @@ namespace red_arrow {
504
504
  uint8_t compute_child_index(const arrow::UnionArray& array,
505
505
  arrow::UnionType* type,
506
506
  const char* tag) {
507
- const auto type_id = array.raw_type_ids()[index_];
508
- const auto& type_codes = type->type_codes();
509
- for (uint8_t i = 0; i < type_codes.size(); ++i) {
510
- if (type_codes[i] == type_id) {
511
- return i;
507
+ const auto type_code = array.raw_type_codes()[index_];
508
+ if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
+ const auto child_id = type->child_ids()[type_code];
510
+ if (child_id >= 0) {
511
+ return child_id;
512
512
  }
513
513
  }
514
- check_status(arrow::Status::Invalid("Unknown type ID: ", type_id),
514
+ check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
515
515
  tag);
516
516
  return 0;
517
517
  }
@@ -26,60 +26,13 @@ module Arrow
26
26
  return builder.build(values)
27
27
  end
28
28
 
29
- builder_class = nil
30
- builder_class_arguments = []
29
+ builder_info = nil
31
30
  values.each do |value|
32
- case value
33
- when nil
34
- # Ignore
35
- when true, false
36
- return BooleanArray.new(values)
37
- when String
38
- return StringArray.new(values)
39
- when Float
40
- return DoubleArray.new(values)
41
- when Integer
42
- if value < 0
43
- builder = IntArrayBuilder.new
44
- return builder.build(values)
45
- else
46
- builder_class = UIntArrayBuilder
47
- builder_class_arguments = []
48
- end
49
- when Time
50
- data_type = value.data_type
51
- case data_type.unit
52
- when TimeUnit::SECOND
53
- if builder.nil?
54
- builder = Time32ArrayBuilder
55
- builder_class_arguments = [data_type]
56
- end
57
- when TimeUnit::MILLI
58
- if builder != Time64ArrayBuilder
59
- builder = Time32ArrayBuilder
60
- builder_class_arguments = [data_type]
61
- end
62
- when TimeUnit::MICRO
63
- builder = Time64ArrayBuilder
64
- builder_class_arguments = [data_type]
65
- when TimeUnit::NANO
66
- builder = Time64ArrayBuilder.new(data_type)
67
- return builder.build(values)
68
- end
69
- when ::Time
70
- data_type = TimestampDataType.new(:nano)
71
- builder = TimestampArrayBuilder.new(data_type)
72
- return builder.build(values)
73
- when DateTime
74
- return Date64Array.new(values)
75
- when Date
76
- return Date32Array.new(values)
77
- else
78
- return StringArray.new(values)
79
- end
31
+ builder_info = detect_builder_info(value, builder_info)
32
+ break if builder_info and builder_info[:detected]
80
33
  end
81
- if builder_class
82
- builder = builder_class.new(*builder_class_arguments)
34
+ if builder_info
35
+ builder = builder_info[:builder]
83
36
  builder.build(values)
84
37
  else
85
38
  Arrow::StringArray.new(values)
@@ -89,6 +42,102 @@ module Arrow
89
42
  def buildable?(args)
90
43
  args.size == method(:build).arity
91
44
  end
45
+
46
+ private
47
+ def detect_builder_info(value, builder_info)
48
+ case value
49
+ when nil
50
+ builder_info
51
+ when true, false
52
+ {
53
+ builder: BooleanArrayBuilder.new,
54
+ detected: true,
55
+ }
56
+ when String
57
+ {
58
+ builder: StringArrayBuilder.new,
59
+ detected: true,
60
+ }
61
+ when Float
62
+ {
63
+ builder: DoubleArrayBuilder.new,
64
+ detected: true,
65
+ }
66
+ when Integer
67
+ if value < 0
68
+ {
69
+ builder: IntArrayBuilder.new,
70
+ detected: true,
71
+ }
72
+ else
73
+ {
74
+ builder: UIntArrayBuilder.new,
75
+ }
76
+ end
77
+ when Time
78
+ data_type = value.data_type
79
+ case data_type.unit
80
+ when TimeUnit::SECOND
81
+ builder_info || {
82
+ builder: Time32ArrayBuilder.new(data_type)
83
+ }
84
+ when TimeUnit::MILLI
85
+ if builder_info and builder_info[:builder].is_a?(Time64ArrayBuilder)
86
+ builder_info
87
+ else
88
+ {
89
+ builder: Time32ArrayBuilder.new(data_type),
90
+ }
91
+ end
92
+ when TimeUnit::MICRO
93
+ {
94
+ builder: Time64ArrayBuilder.new(data_type),
95
+ }
96
+ when TimeUnit::NANO
97
+ {
98
+ builder: Time64ArrayBuilder.new(data_type),
99
+ detected: true
100
+ }
101
+ end
102
+ when ::Time
103
+ data_type = TimestampDataType.new(:nano)
104
+ {
105
+ builder: TimestampArrayBuilder.new(data_type),
106
+ detected: true,
107
+ }
108
+ when DateTime
109
+ {
110
+ builder: Date64ArrayBuilder.new,
111
+ detected: true,
112
+ }
113
+ when Date
114
+ {
115
+ builder: Date32ArrayBuilder.new,
116
+ detected: true,
117
+ }
118
+ when ::Array
119
+ sub_builder_info = nil
120
+ value.each do |sub_value|
121
+ sub_builder_info = detect_builder_info(sub_value, sub_builder_info)
122
+ break if sub_builder_info and sub_builder_info[:detected]
123
+ end
124
+ if sub_builder_info and sub_builder_info[:detected]
125
+ sub_value_data_type = sub_builder_info[:builder].value_data_type
126
+ field = Field.new("item", sub_value_data_type)
127
+ {
128
+ builder: ListArrayBuilder.new(ListDataType.new(field)),
129
+ detected: true,
130
+ }
131
+ else
132
+ builder_info
133
+ end
134
+ else
135
+ {
136
+ builder: StringArrayBuilder.new,
137
+ detected: true,
138
+ }
139
+ end
140
+ end
92
141
  end
93
142
 
94
143
  def build(values)
data/lib/arrow/array.rb CHANGED
@@ -18,20 +18,21 @@
18
18
  module Arrow
19
19
  class Array
20
20
  include Enumerable
21
+ include GenericFilterable
22
+ include GenericTakeable
21
23
 
22
24
  class << self
23
25
  def new(*args)
26
+ _builder_class = builder_class
27
+ return super if _builder_class.nil?
28
+ return super unless _builder_class.buildable?(args)
29
+ _builder_class.build(*args)
30
+ end
31
+
32
+ def builder_class
24
33
  builder_class_name = "#{name}Builder"
25
- if const_defined?(builder_class_name)
26
- builder_class = const_get(builder_class_name)
27
- if builder_class.buildable?(args)
28
- builder_class.build(*args)
29
- else
30
- super
31
- end
32
- else
33
- super
34
- end
34
+ return nil unless const_defined?(builder_class_name)
35
+ const_get(builder_class_name)
35
36
  end
36
37
  end
37
38
 
@@ -82,5 +83,22 @@ module Arrow
82
83
  def to_a
83
84
  values
84
85
  end
86
+
87
+ alias_method :is_in_raw, :is_in
88
+ def is_in(values)
89
+ case values
90
+ when ::Array
91
+ if self.class.builder_class.buildable?([values])
92
+ values = self.class.new(values)
93
+ else
94
+ values = self.class.new(value_data_type, values)
95
+ end
96
+ is_in_raw(values)
97
+ when ChunkedArray
98
+ is_in_chunked_array(values)
99
+ else
100
+ is_in_raw(values)
101
+ end
102
+ end
85
103
  end
86
104
  end
@@ -18,6 +18,8 @@
18
18
  module Arrow
19
19
  class ChunkedArray
20
20
  include Enumerable
21
+ include GenericFilterable
22
+ include GenericTakeable
21
23
 
22
24
  alias_method :size, :n_rows
23
25
  unless method_defined?(:length)
@@ -30,6 +30,9 @@ module Arrow
30
30
  def initialize(path_or_data, **options)
31
31
  @path_or_data = path_or_data
32
32
  @options = options
33
+ if @options.key?(:delimiter)
34
+ @options[:col_sep] = @options.delete(:delimiter)
35
+ end
33
36
  @compression = @options.delete(:compression)
34
37
  end
35
38
 
@@ -113,6 +116,8 @@ module Arrow
113
116
  options.add_schema(value)
114
117
  when :encoding
115
118
  # process encoding on opening input
119
+ when :col_sep
120
+ options.delimiter = value
116
121
  else
117
122
  setter = "#{key}="
118
123
  if options.respond_to?(setter)
@@ -21,5 +21,23 @@ module Arrow
21
21
  def add_column_type(name, type)
22
22
  add_column_type_raw(name, DataType.resolve(type))
23
23
  end
24
+
25
+ alias_method :delimiter_raw, :delimiter
26
+ def delimiter
27
+ delimiter_raw.chr
28
+ end
29
+
30
+ alias_method :delimiter_raw=, :delimiter=
31
+ def delimiter=(delimiter)
32
+ case delimiter
33
+ when String
34
+ if delimiter.bytesize != 1
35
+ message = "delimiter must be 1 byte character: #{delimiter.inspect}"
36
+ raise ArgumentError, message
37
+ end
38
+ delimiter = delimiter.ord
39
+ end
40
+ self.delimiter_raw = delimiter
41
+ end
24
42
  end
25
43
  end
@@ -121,6 +121,26 @@ module Arrow
121
121
  end
122
122
  end
123
123
 
124
+ def sub_types
125
+ types = {}
126
+ gtype.children.each do |child|
127
+ sub_type = child.to_class
128
+ types[sub_type] = true
129
+ sub_type.sub_types.each do |sub_sub_type|
130
+ types[sub_sub_type] = true
131
+ end
132
+ end
133
+ types.keys
134
+ end
135
+
136
+ def try_convert(value)
137
+ begin
138
+ resolve(value)
139
+ rescue ArgumentError
140
+ nil
141
+ end
142
+ end
143
+
124
144
  private
125
145
  def resolve_class(data_type)
126
146
  components = data_type.to_s.split("_").collect(&:capitalize)
@@ -137,11 +157,24 @@ module Arrow
137
157
  available_types << components.collect(&:downcase).join("_").to_sym
138
158
  end
139
159
  message =
140
- "unknown type: #{data_type.inspect}: " +
160
+ "unknown type: <#{data_type.inspect}>: " +
141
161
  "available types: #{available_types.inspect}"
142
162
  raise ArgumentError, message
143
163
  end
144
- Arrow.const_get(data_type_class_name)
164
+ data_type_class = Arrow.const_get(data_type_class_name)
165
+ if data_type_class.gtype.abstract?
166
+ not_abstract_types = data_type_class.sub_types.find_all do |sub_type|
167
+ not sub_type.gtype.abstract?
168
+ end
169
+ not_abstract_types = not_abstract_types.sort_by do |type|
170
+ type.name
171
+ end
172
+ message =
173
+ "abstract type: <#{data_type.inspect}>: " +
174
+ "use one of not abstract type: #{not_abstract_types.inspect}"
175
+ raise ArgumentError, message
176
+ end
177
+ data_type_class
145
178
  end
146
179
  end
147
180
 
@@ -15,8 +15,6 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "arrow/bigdecimal-extension"
19
-
20
18
  module Arrow
21
19
  class Decimal128ArrayBuilder
22
20
  class << self
data/lib/arrow/field.rb CHANGED
@@ -59,7 +59,7 @@ module Arrow
59
59
  # There is a shortcut for convenience. If field description
60
60
  # doesn't have `:data_type`, all keys except `:name` are
61
61
  # processes as data type description. For example, the
62
- # following field descrptions are the same:
62
+ # following field descriptions are the same:
63
63
  #
64
64
  # ```ruby
65
65
  # {name: "visible", data_type: {type: :boolean}}
@@ -0,0 +1,43 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module GenericFilterable
20
+ class << self
21
+ def included(base)
22
+ base.alias_method :filter_raw, :filter
23
+ base.alias_method :filter, :filter_generic
24
+ end
25
+ end
26
+
27
+ def filter_generic(filter)
28
+ case filter
29
+ when ::Array
30
+ filter_raw(BooleanArray.new(filter))
31
+ when ChunkedArray
32
+ if respond_to?(:filter_chunked_array)
33
+ filter_chunked_array(filter)
34
+ else
35
+ # TODO: Implement this in C++
36
+ filter_raw(filter.pack)
37
+ end
38
+ else
39
+ filter_raw(filter)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module GenericTakeable
20
+ class << self
21
+ def included(base)
22
+ base.alias_method :take_raw, :take
23
+ base.alias_method :take, :take_generic
24
+ end
25
+ end
26
+
27
+ def take_generic(indices)
28
+ case indices
29
+ when ::Array
30
+ take_raw(IntArrayBuilder.build(indices))
31
+ when ChunkedArray
32
+ take_chunked_array(indices)
33
+ else
34
+ take_raw(indices)
35
+ end
36
+ end
37
+ end
38
+ end