red-arrow 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -0,0 +1,60 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #pragma once
21
+
22
+ #include <arrow/api.h>
23
+
24
+ #ifdef _WIN32
25
+ # define gmtime_r gmtime_r_ruby_win32
26
+ # define localtime_r localtime_r_ruby_win32
27
+ # include <ruby.h>
28
+ # undef gmtime_r
29
+ # undef localtime_r
30
+ #endif
31
+
32
+ #include <arrow-glib/arrow-glib.hpp>
33
+ #include <rbgobject.h>
34
+
35
+ namespace red_arrow {
36
+ extern VALUE cDate;
37
+
38
+ extern ID id_BigDecimal;
39
+ extern ID id_jd;
40
+ extern ID id_to_datetime;
41
+
42
+ VALUE record_batch_raw_records(VALUE obj);
43
+
44
+ inline VALUE time_unit_to_scale(arrow::TimeUnit::type unit) {
45
+ switch (unit) {
46
+ case arrow::TimeUnit::SECOND:
47
+ return INT2FIX(1);
48
+ case arrow::TimeUnit::MILLI:
49
+ return INT2FIX(1000);
50
+ case arrow::TimeUnit::MICRO:
51
+ return INT2FIX(1000 * 1000);
52
+ case arrow::TimeUnit::NANO:
53
+ // NOTE: INT2FIX works for 1e+9 because: FIXNUM_MAX >= (1<<30) - 1 > 1e+9
54
+ return INT2FIX(1000 * 1000 * 1000);
55
+ default:
56
+ break; // NOT REACHED
57
+ }
58
+ return Qnil;
59
+ }
60
+ }
@@ -15,7 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "gobject-introspection"
18
+ require "extpp/setup"
19
+ require "gio2"
19
20
 
20
21
  require "arrow/version"
21
22
 
@@ -62,6 +62,10 @@ module Arrow
62
62
  Arrow::StringArray.new(values)
63
63
  end
64
64
  end
65
+
66
+ def buildable?(args)
67
+ args.size == method(:build).arity
68
+ end
65
69
  end
66
70
 
67
71
  def build(values)
@@ -24,7 +24,7 @@ module Arrow
24
24
  builder_class_name = "#{name}Builder"
25
25
  if const_defined?(builder_class_name)
26
26
  builder_class = const_get(builder_class_name)
27
- if args.size == builder_class.method(:build).arity
27
+ if builder_class.buildable?(args)
28
28
  builder_class.build(*args)
29
29
  else
30
30
  super
@@ -35,8 +35,18 @@ module Arrow
35
35
  end
36
36
  end
37
37
 
38
+ # @param i [Integer]
39
+ # The index of the value to be gotten.
40
+ #
41
+ # You can specify negative index like for `::Array#[]`.
42
+ #
43
+ # @return [Object, nil]
44
+ # The `i`-th value.
45
+ #
46
+ # `nil` for NULL value or out of range `i`.
38
47
  def [](i)
39
48
  i += length if i < 0
49
+ return nil if i < 0 or i >= length
40
50
  if null?(i)
41
51
  nil
42
52
  else
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ require "bigdecimal"
19
+
20
+ class BigDecimal
21
+ def to_arrow
22
+ Arrow::Decimal128.new(to_s)
23
+ end
24
+ end
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class BinaryArrayBuilder
20
+ def append_values(values, is_valids=nil)
21
+ if is_valids
22
+ is_valids.each_with_index do |is_valid, i|
23
+ if is_valid
24
+ append_value(values[i])
25
+ else
26
+ append_null
27
+ end
28
+ end
29
+ else
30
+ values.each do |value|
31
+ append_value(value)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -24,7 +24,11 @@ module Arrow
24
24
  begin
25
25
  yield(io)
26
26
  ensure
27
- io.close
27
+ if io.respond_to?(:closed?)
28
+ io.close unless io.closed?
29
+ else
30
+ io.close
31
+ end
28
32
  end
29
33
  end
30
34
  end
@@ -104,6 +104,8 @@ module Arrow
104
104
  end
105
105
  when :schema
106
106
  options.add_schema(value)
107
+ when :encoding
108
+ # process encoding on opening input
107
109
  else
108
110
  setter = "#{key}="
109
111
  if options.respond_to?(setter)
@@ -116,7 +118,7 @@ module Arrow
116
118
  options
117
119
  end
118
120
 
119
- def open_input(raw_input)
121
+ def open_decompress_input(raw_input)
120
122
  if @compression
121
123
  codec = Codec.new(@compression)
122
124
  CompressedInputStream.open(codec, raw_input) do |input|
@@ -127,16 +129,36 @@ module Arrow
127
129
  end
128
130
  end
129
131
 
132
+ def open_encoding_convert_stream(raw_input, &block)
133
+ encoding = @options[:encoding]
134
+ if encoding
135
+ converter = Gio::CharsetConverter.new("UTF-8", encoding)
136
+ convert_input_stream =
137
+ Gio::ConverterInputStream.new(raw_input, converter)
138
+ GIOInputStream.open(convert_input_stream, &block)
139
+ else
140
+ yield(raw_input)
141
+ end
142
+ end
143
+
144
+ def wrap_input(raw_input)
145
+ open_decompress_input(raw_input) do |input_|
146
+ open_encoding_convert_stream(input_) do |input__|
147
+ yield(input__)
148
+ end
149
+ end
150
+ end
151
+
130
152
  def load_from_path(path)
131
153
  options = reader_options
132
154
  if options
133
155
  begin
134
- MemoryMappedInputStream.open(path.to_s) do |raw_input|
135
- open_input(raw_input) do |input|
156
+ MemoryMappedInputStream.open(path) do |raw_input|
157
+ wrap_input(raw_input) do |input|
136
158
  return CSVReader.new(input, options).read
137
159
  end
138
160
  end
139
- rescue Arrow::Error::Invalid
161
+ rescue Arrow::Error::Invalid, Gio::Error
140
162
  end
141
163
  end
142
164
 
@@ -151,11 +173,11 @@ module Arrow
151
173
  if options
152
174
  begin
153
175
  BufferInputStream.open(Buffer.new(data)) do |raw_input|
154
- open_input(raw_input) do |input|
176
+ wrap_input(raw_input) do |input|
155
177
  return CSVReader.new(input, options).read
156
178
  end
157
179
  end
158
- rescue Arrow::Error::Invalid
180
+ rescue Arrow::Error::Invalid, Gio::Error
159
181
  end
160
182
  end
161
183
 
@@ -114,14 +114,18 @@ module Arrow
114
114
 
115
115
  private
116
116
  def resolve_class(data_type)
117
- data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt")
117
+ components = data_type.to_s.split("_").collect(&:capitalize)
118
+ data_type_name = components.join.gsub(/\AUint/, "UInt")
118
119
  data_type_class_name = "#{data_type_name}DataType"
119
120
  unless Arrow.const_defined?(data_type_class_name)
120
121
  available_types = []
121
122
  Arrow.constants.each do |name|
122
- if name.to_s.end_with?("DataType")
123
- available_types << name.to_s.gsub(/DataType\z/, "").downcase.to_sym
124
- end
123
+ name = name.to_s
124
+ next if name == "DataType"
125
+ next unless name.end_with?("DataType")
126
+ name = name.gsub(/DataType\z/, "")
127
+ components = name.scan(/(UInt[0-9]+|[A-Z][a-z\d]+)/).flatten
128
+ available_types << components.collect(&:downcase).join("_").to_sym
125
129
  end
126
130
  message =
127
131
  "unknown type: #{data_type.inspect}: " +
@@ -15,7 +15,7 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "bigdecimal"
18
+ require "arrow/bigdecimal-extension"
19
19
 
20
20
  module Arrow
21
21
  class Decimal128ArrayBuilder
@@ -36,7 +36,7 @@ module Arrow
36
36
  when Float
37
37
  value = Decimal128.new(value.to_s)
38
38
  when BigDecimal
39
- value = Decimal128.new(value.to_s)
39
+ value = value.to_arrow
40
40
  end
41
41
  append_value_raw(value)
42
42
  end
@@ -0,0 +1,42 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Decimal128
20
+ alias_method :to_s_raw, :to_s
21
+
22
+ # @overload to_s
23
+ #
24
+ # @return [String]
25
+ # The string representation of the decimal.
26
+ #
27
+ # @overload to_s(scale)
28
+ #
29
+ # @param scale [Integer] The scale of the decimal.
30
+ # @return [String]
31
+ # The string representation of the decimal including the scale.
32
+ #
33
+ # @since 0.13.0
34
+ def to_s(scale=nil)
35
+ if scale
36
+ to_string_scale(scale)
37
+ else
38
+ to_s_raw
39
+ end
40
+ end
41
+ end
42
+ end
@@ -56,7 +56,7 @@ module Arrow
56
56
  when ::Array
57
57
  append_value_raw
58
58
  @value_builder ||= value_builder
59
- @value_builder.append_values(value, nil)
59
+ @value_builder.append(*value)
60
60
  else
61
61
  message = "list value must be nil or Array: #{value.inspect}"
62
62
  raise ArgumentError, message
@@ -28,11 +28,13 @@ module Arrow
28
28
  private
29
29
  def post_load(repository, namespace)
30
30
  require_libraries
31
+ require_extension_library
31
32
  end
32
33
 
33
34
  def require_libraries
34
35
  require "arrow/array"
35
36
  require "arrow/array-builder"
37
+ require "arrow/binary-array-builder"
36
38
  require "arrow/chunked-array"
37
39
  require "arrow/column"
38
40
  require "arrow/compression-type"
@@ -43,6 +45,7 @@ module Arrow
43
45
  require "arrow/date32-array-builder"
44
46
  require "arrow/date64-array"
45
47
  require "arrow/date64-array-builder"
48
+ require "arrow/decimal128"
46
49
  require "arrow/decimal128-array-builder"
47
50
  require "arrow/decimal128-data-type"
48
51
  require "arrow/dense-union-data-type"
@@ -51,6 +54,7 @@ module Arrow
51
54
  require "arrow/file-output-stream"
52
55
  require "arrow/list-array-builder"
53
56
  require "arrow/list-data-type"
57
+ require "arrow/null-array-builder"
54
58
  require "arrow/path-extension"
55
59
  require "arrow/record"
56
60
  require "arrow/record-batch"
@@ -79,6 +83,10 @@ module Arrow
79
83
  require "arrow/writable"
80
84
  end
81
85
 
86
+ def require_extension_library
87
+ require "arrow.so"
88
+ end
89
+
82
90
  def load_object_info(info)
83
91
  super
84
92
 
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class NullArrayBuilder
20
+ class << self
21
+ def buildable?(args)
22
+ super and args.collect(&:class) != [Integer]
23
+ end
24
+ end
25
+ end
26
+ end
@@ -65,7 +65,7 @@ module Arrow
65
65
 
66
66
  # @since 0.12.0
67
67
  def append_records(records)
68
- n = n_fields
68
+ n = n_columns
69
69
  columns = n.times.collect do
70
70
  []
71
71
  end
@@ -99,17 +99,16 @@ module Arrow
99
99
  end
100
100
  end
101
101
 
102
+ # @since 0.13.0
103
+ def column_builders
104
+ @column_builders ||= n_columns.times.collect do |i|
105
+ get_column_builder(i)
106
+ end
107
+ end
108
+
102
109
  private
103
110
  def resolve_name(name)
104
111
  @name_to_index[name.to_s]
105
112
  end
106
-
107
- # TODO: Make public with good name. Is column_builders good enough?
108
- # builders? sub_builders?
109
- def column_builders
110
- @column_builders ||= n_fields.times.collect do |i|
111
- get_field(i)
112
- end
113
- end
114
113
  end
115
114
  end