red-arrow 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +49 -4
  3. data/ext/arrow/arrow.cpp +43 -0
  4. data/ext/arrow/extconf.rb +52 -0
  5. data/ext/arrow/record-batch.cpp +756 -0
  6. data/ext/arrow/red-arrow.hpp +60 -0
  7. data/lib/arrow.rb +2 -1
  8. data/lib/arrow/array-builder.rb +4 -0
  9. data/lib/arrow/array.rb +11 -1
  10. data/lib/arrow/bigdecimal-extension.rb +24 -0
  11. data/lib/arrow/binary-array-builder.rb +36 -0
  12. data/lib/arrow/block-closable.rb +5 -1
  13. data/lib/arrow/csv-loader.rb +28 -6
  14. data/lib/arrow/data-type.rb +8 -4
  15. data/lib/arrow/decimal128-array-builder.rb +2 -2
  16. data/lib/arrow/decimal128.rb +42 -0
  17. data/lib/arrow/list-array-builder.rb +1 -1
  18. data/lib/arrow/loader.rb +8 -0
  19. data/lib/arrow/null-array-builder.rb +26 -0
  20. data/lib/arrow/record-batch-builder.rb +8 -9
  21. data/lib/arrow/struct-array-builder.rb +3 -3
  22. data/lib/arrow/struct-array.rb +15 -7
  23. data/lib/arrow/struct.rb +11 -0
  24. data/lib/arrow/table-loader.rb +14 -14
  25. data/lib/arrow/version.rb +1 -1
  26. data/red-arrow.gemspec +8 -4
  27. data/test/raw-records/record-batch/test-basic-arrays.rb +349 -0
  28. data/test/raw-records/record-batch/test-dense-union-array.rb +486 -0
  29. data/test/raw-records/record-batch/test-list-array.rb +498 -0
  30. data/test/raw-records/record-batch/test-multiple-columns.rb +49 -0
  31. data/test/raw-records/record-batch/test-sparse-union-array.rb +474 -0
  32. data/test/raw-records/record-batch/test-struct-array.rb +426 -0
  33. data/test/run-test.rb +25 -2
  34. data/test/test-array.rb +38 -9
  35. data/test/test-bigdecimal.rb +23 -0
  36. data/{dependency-check/Rakefile → test/test-buffer.rb} +15 -20
  37. data/test/test-chunked-array.rb +22 -0
  38. data/test/test-column.rb +24 -0
  39. data/test/test-csv-loader.rb +30 -0
  40. data/test/test-data-type.rb +25 -0
  41. data/test/test-decimal128.rb +64 -0
  42. data/test/test-field.rb +20 -0
  43. data/test/test-group.rb +2 -2
  44. data/test/test-record-batch-builder.rb +9 -0
  45. data/test/test-record-batch.rb +14 -0
  46. data/test/test-schema.rb +14 -0
  47. data/test/test-struct-array.rb +16 -3
  48. data/test/test-table.rb +14 -0
  49. data/test/test-tensor.rb +56 -0
  50. metadata +117 -47
@@ -0,0 +1,60 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #pragma once
21
+
22
+ #include <arrow/api.h>
23
+
24
+ #ifdef _WIN32
25
+ # define gmtime_r gmtime_r_ruby_win32
26
+ # define localtime_r localtime_r_ruby_win32
27
+ # include <ruby.h>
28
+ # undef gmtime_r
29
+ # undef localtime_r
30
+ #endif
31
+
32
+ #include <arrow-glib/arrow-glib.hpp>
33
+ #include <rbgobject.h>
34
+
35
+ namespace red_arrow {
36
+ extern VALUE cDate;
37
+
38
+ extern ID id_BigDecimal;
39
+ extern ID id_jd;
40
+ extern ID id_to_datetime;
41
+
42
+ VALUE record_batch_raw_records(VALUE obj);
43
+
44
+ inline VALUE time_unit_to_scale(arrow::TimeUnit::type unit) {
45
+ switch (unit) {
46
+ case arrow::TimeUnit::SECOND:
47
+ return INT2FIX(1);
48
+ case arrow::TimeUnit::MILLI:
49
+ return INT2FIX(1000);
50
+ case arrow::TimeUnit::MICRO:
51
+ return INT2FIX(1000 * 1000);
52
+ case arrow::TimeUnit::NANO:
53
+ // NOTE: INT2FIX works for 1e+9 because: FIXNUM_MAX >= (1<<30) - 1 > 1e+9
54
+ return INT2FIX(1000 * 1000 * 1000);
55
+ default:
56
+ break; // NOT REACHED
57
+ }
58
+ return Qnil;
59
+ }
60
+ }
@@ -15,7 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "gobject-introspection"
18
+ require "extpp/setup"
19
+ require "gio2"
19
20
 
20
21
  require "arrow/version"
21
22
 
@@ -62,6 +62,10 @@ module Arrow
62
62
  Arrow::StringArray.new(values)
63
63
  end
64
64
  end
65
+
66
+ def buildable?(args)
67
+ args.size == method(:build).arity
68
+ end
65
69
  end
66
70
 
67
71
  def build(values)
@@ -24,7 +24,7 @@ module Arrow
24
24
  builder_class_name = "#{name}Builder"
25
25
  if const_defined?(builder_class_name)
26
26
  builder_class = const_get(builder_class_name)
27
- if args.size == builder_class.method(:build).arity
27
+ if builder_class.buildable?(args)
28
28
  builder_class.build(*args)
29
29
  else
30
30
  super
@@ -35,8 +35,18 @@ module Arrow
35
35
  end
36
36
  end
37
37
 
38
+ # @param i [Integer]
39
+ # The index of the value to be gotten.
40
+ #
41
+ # You can specify negative index like for `::Array#[]`.
42
+ #
43
+ # @return [Object, nil]
44
+ # The `i`-th value.
45
+ #
46
+ # `nil` for NULL value or out of range `i`.
38
47
  def [](i)
39
48
  i += length if i < 0
49
+ return nil if i < 0 or i >= length
40
50
  if null?(i)
41
51
  nil
42
52
  else
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ require "bigdecimal"
19
+
20
+ class BigDecimal
21
+ def to_arrow
22
+ Arrow::Decimal128.new(to_s)
23
+ end
24
+ end
@@ -0,0 +1,36 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class BinaryArrayBuilder
20
+ def append_values(values, is_valids=nil)
21
+ if is_valids
22
+ is_valids.each_with_index do |is_valid, i|
23
+ if is_valid
24
+ append_value(values[i])
25
+ else
26
+ append_null
27
+ end
28
+ end
29
+ else
30
+ values.each do |value|
31
+ append_value(value)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -24,7 +24,11 @@ module Arrow
24
24
  begin
25
25
  yield(io)
26
26
  ensure
27
- io.close
27
+ if io.respond_to?(:closed?)
28
+ io.close unless io.closed?
29
+ else
30
+ io.close
31
+ end
28
32
  end
29
33
  end
30
34
  end
@@ -104,6 +104,8 @@ module Arrow
104
104
  end
105
105
  when :schema
106
106
  options.add_schema(value)
107
+ when :encoding
108
+ # process encoding on opening input
107
109
  else
108
110
  setter = "#{key}="
109
111
  if options.respond_to?(setter)
@@ -116,7 +118,7 @@ module Arrow
116
118
  options
117
119
  end
118
120
 
119
- def open_input(raw_input)
121
+ def open_decompress_input(raw_input)
120
122
  if @compression
121
123
  codec = Codec.new(@compression)
122
124
  CompressedInputStream.open(codec, raw_input) do |input|
@@ -127,16 +129,36 @@ module Arrow
127
129
  end
128
130
  end
129
131
 
132
+ def open_encoding_convert_stream(raw_input, &block)
133
+ encoding = @options[:encoding]
134
+ if encoding
135
+ converter = Gio::CharsetConverter.new("UTF-8", encoding)
136
+ convert_input_stream =
137
+ Gio::ConverterInputStream.new(raw_input, converter)
138
+ GIOInputStream.open(convert_input_stream, &block)
139
+ else
140
+ yield(raw_input)
141
+ end
142
+ end
143
+
144
+ def wrap_input(raw_input)
145
+ open_decompress_input(raw_input) do |input_|
146
+ open_encoding_convert_stream(input_) do |input__|
147
+ yield(input__)
148
+ end
149
+ end
150
+ end
151
+
130
152
  def load_from_path(path)
131
153
  options = reader_options
132
154
  if options
133
155
  begin
134
- MemoryMappedInputStream.open(path.to_s) do |raw_input|
135
- open_input(raw_input) do |input|
156
+ MemoryMappedInputStream.open(path) do |raw_input|
157
+ wrap_input(raw_input) do |input|
136
158
  return CSVReader.new(input, options).read
137
159
  end
138
160
  end
139
- rescue Arrow::Error::Invalid
161
+ rescue Arrow::Error::Invalid, Gio::Error
140
162
  end
141
163
  end
142
164
 
@@ -151,11 +173,11 @@ module Arrow
151
173
  if options
152
174
  begin
153
175
  BufferInputStream.open(Buffer.new(data)) do |raw_input|
154
- open_input(raw_input) do |input|
176
+ wrap_input(raw_input) do |input|
155
177
  return CSVReader.new(input, options).read
156
178
  end
157
179
  end
158
- rescue Arrow::Error::Invalid
180
+ rescue Arrow::Error::Invalid, Gio::Error
159
181
  end
160
182
  end
161
183
 
@@ -114,14 +114,18 @@ module Arrow
114
114
 
115
115
  private
116
116
  def resolve_class(data_type)
117
- data_type_name = data_type.to_s.capitalize.gsub(/\AUint/, "UInt")
117
+ components = data_type.to_s.split("_").collect(&:capitalize)
118
+ data_type_name = components.join.gsub(/\AUint/, "UInt")
118
119
  data_type_class_name = "#{data_type_name}DataType"
119
120
  unless Arrow.const_defined?(data_type_class_name)
120
121
  available_types = []
121
122
  Arrow.constants.each do |name|
122
- if name.to_s.end_with?("DataType")
123
- available_types << name.to_s.gsub(/DataType\z/, "").downcase.to_sym
124
- end
123
+ name = name.to_s
124
+ next if name == "DataType"
125
+ next unless name.end_with?("DataType")
126
+ name = name.gsub(/DataType\z/, "")
127
+ components = name.scan(/(UInt[0-9]+|[A-Z][a-z\d]+)/).flatten
128
+ available_types << components.collect(&:downcase).join("_").to_sym
125
129
  end
126
130
  message =
127
131
  "unknown type: #{data_type.inspect}: " +
@@ -15,7 +15,7 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- require "bigdecimal"
18
+ require "arrow/bigdecimal-extension"
19
19
 
20
20
  module Arrow
21
21
  class Decimal128ArrayBuilder
@@ -36,7 +36,7 @@ module Arrow
36
36
  when Float
37
37
  value = Decimal128.new(value.to_s)
38
38
  when BigDecimal
39
- value = Decimal128.new(value.to_s)
39
+ value = value.to_arrow
40
40
  end
41
41
  append_value_raw(value)
42
42
  end
@@ -0,0 +1,42 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Decimal128
20
+ alias_method :to_s_raw, :to_s
21
+
22
+ # @overload to_s
23
+ #
24
+ # @return [String]
25
+ # The string representation of the decimal.
26
+ #
27
+ # @overload to_s(scale)
28
+ #
29
+ # @param scale [Integer] The scale of the decimal.
30
+ # @return [String]
31
+ # The string representation of the decimal including the scale.
32
+ #
33
+ # @since 0.13.0
34
+ def to_s(scale=nil)
35
+ if scale
36
+ to_string_scale(scale)
37
+ else
38
+ to_s_raw
39
+ end
40
+ end
41
+ end
42
+ end
@@ -56,7 +56,7 @@ module Arrow
56
56
  when ::Array
57
57
  append_value_raw
58
58
  @value_builder ||= value_builder
59
- @value_builder.append_values(value, nil)
59
+ @value_builder.append(*value)
60
60
  else
61
61
  message = "list value must be nil or Array: #{value.inspect}"
62
62
  raise ArgumentError, message
@@ -28,11 +28,13 @@ module Arrow
28
28
  private
29
29
  def post_load(repository, namespace)
30
30
  require_libraries
31
+ require_extension_library
31
32
  end
32
33
 
33
34
  def require_libraries
34
35
  require "arrow/array"
35
36
  require "arrow/array-builder"
37
+ require "arrow/binary-array-builder"
36
38
  require "arrow/chunked-array"
37
39
  require "arrow/column"
38
40
  require "arrow/compression-type"
@@ -43,6 +45,7 @@ module Arrow
43
45
  require "arrow/date32-array-builder"
44
46
  require "arrow/date64-array"
45
47
  require "arrow/date64-array-builder"
48
+ require "arrow/decimal128"
46
49
  require "arrow/decimal128-array-builder"
47
50
  require "arrow/decimal128-data-type"
48
51
  require "arrow/dense-union-data-type"
@@ -51,6 +54,7 @@ module Arrow
51
54
  require "arrow/file-output-stream"
52
55
  require "arrow/list-array-builder"
53
56
  require "arrow/list-data-type"
57
+ require "arrow/null-array-builder"
54
58
  require "arrow/path-extension"
55
59
  require "arrow/record"
56
60
  require "arrow/record-batch"
@@ -79,6 +83,10 @@ module Arrow
79
83
  require "arrow/writable"
80
84
  end
81
85
 
86
+ def require_extension_library
87
+ require "arrow.so"
88
+ end
89
+
82
90
  def load_object_info(info)
83
91
  super
84
92
 
@@ -0,0 +1,26 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class NullArrayBuilder
20
+ class << self
21
+ def buildable?(args)
22
+ super and args.collect(&:class) != [Integer]
23
+ end
24
+ end
25
+ end
26
+ end
@@ -65,7 +65,7 @@ module Arrow
65
65
 
66
66
  # @since 0.12.0
67
67
  def append_records(records)
68
- n = n_fields
68
+ n = n_columns
69
69
  columns = n.times.collect do
70
70
  []
71
71
  end
@@ -99,17 +99,16 @@ module Arrow
99
99
  end
100
100
  end
101
101
 
102
+ # @since 0.13.0
103
+ def column_builders
104
+ @column_builders ||= n_columns.times.collect do |i|
105
+ get_column_builder(i)
106
+ end
107
+ end
108
+
102
109
  private
103
110
  def resolve_name(name)
104
111
  @name_to_index[name.to_s]
105
112
  end
106
-
107
- # TODO: Make public with good name. Is column_builders good enough?
108
- # builders? sub_builders?
109
- def column_builders
110
- @column_builders ||= n_fields.times.collect do |i|
111
- get_field(i)
112
- end
113
- end
114
113
  end
115
114
  end