red-arrow 10.0.1 → 11.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8cc21ba05de4956b7dd412963d2d39eb5f5d31566c6891ae4388064553baa97
4
- data.tar.gz: a7c1bfa1d73f3a4ab8403e347902ffd6f754f6964d3054788a79d1022b32520b
3
+ metadata.gz: 23f72b7016d780c208dd8f8cbc627becce73e25754f83a62a4a4d3f4bb60a5e3
4
+ data.tar.gz: 23f57e383e26d322e9fa81efed1e8e5545bd5b1160075f4a0b8553efda98af93
5
5
  SHA512:
6
- metadata.gz: e33a2acb65472b70c99348fec8b472ebeac48bbd3ba3c6aabd91481ed214b07e41fa3c6c3090ecd9ed545ee54cbe2fb09a0a7d730421a816afe6c167b00b6a5a
7
- data.tar.gz: 47ab431103a9bcb0f5b4af955013b40415a582723b55640cd6b82a4b8af5ef775ed974e9f62d235cc81c296c0d7a0438edf805f3ed1ab442cae58501cb73625a
6
+ metadata.gz: 2d157d2d56dbca00a2f4a0eadb831a19a7a7124c3c3c8c7675536e4597f2a1447f047eec48654256eff3148457d09f1d3e375a095cf152051ef6603cd8bf25bf
7
+ data.tar.gz: 958f320a92981a6ed7f84fcab92497ac1038a038de302419e9f57f83e3ca4d02d682ec05a129ac63c9ed6e613725619839b46cc589938a56573e88182a8002d4
@@ -106,10 +106,34 @@ namespace red_arrow {
106
106
  return ULL2NUM(array.Value(i));
107
107
  }
108
108
 
109
- // TODO
110
- // inline VALUE convert(const arrow::HalfFloatArray& array,
111
- // const int64_t i) {
112
- // }
109
+ inline VALUE convert(const arrow::HalfFloatArray& array,
110
+ const int64_t i) {
111
+ const auto value = array.Value(i);
112
+ // | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
113
+ constexpr auto exponent_n_bits = 5;
114
+ static const auto exponent_mask =
115
+ static_cast<uint32_t>(std::pow(2.0, exponent_n_bits) - 1);
116
+ constexpr auto exponent_bias = 15;
117
+ constexpr auto fraction_n_bits = 10;
118
+ static const auto fraction_mask =
119
+ static_cast<uint32_t>(std::pow(2.0, fraction_n_bits)) - 1;
120
+ static const auto fraction_denominator = std::pow(2.0, fraction_n_bits);
121
+ const auto sign = value >> (exponent_n_bits + fraction_n_bits);
122
+ const auto exponent = (value >> fraction_n_bits) & exponent_mask;
123
+ const auto fraction = value & fraction_mask;
124
+ if (exponent == exponent_mask) {
125
+ if (sign == 0) {
126
+ return DBL2NUM(HUGE_VAL);
127
+ } else {
128
+ return DBL2NUM(-HUGE_VAL);
129
+ }
130
+ } else {
131
+ const auto implicit_fraction = (exponent == 0) ? 0 : 1;
132
+ return DBL2NUM(((sign == 0) ? 1 : -1) *
133
+ std::pow(2.0, exponent - exponent_bias) *
134
+ (implicit_fraction + fraction / fraction_denominator));
135
+ }
136
+ }
113
137
 
114
138
  inline VALUE convert(const arrow::FloatArray& array,
115
139
  const int64_t i) {
@@ -320,8 +344,7 @@ namespace red_arrow {
320
344
  VISIT(UInt16)
321
345
  VISIT(UInt32)
322
346
  VISIT(UInt64)
323
- // TODO
324
- // VISIT(HalfFloat)
347
+ VISIT(HalfFloat)
325
348
  VISIT(Float)
326
349
  VISIT(Double)
327
350
  VISIT(Binary)
@@ -427,8 +450,7 @@ namespace red_arrow {
427
450
  VISIT(UInt16)
428
451
  VISIT(UInt32)
429
452
  VISIT(UInt64)
430
- // TODO
431
- // VISIT(HalfFloat)
453
+ VISIT(HalfFloat)
432
454
  VISIT(Float)
433
455
  VISIT(Double)
434
456
  VISIT(Binary)
@@ -530,8 +552,7 @@ namespace red_arrow {
530
552
  VISIT(UInt16)
531
553
  VISIT(UInt32)
532
554
  VISIT(UInt64)
533
- // TODO
534
- // VISIT(HalfFloat)
555
+ VISIT(HalfFloat)
535
556
  VISIT(Float)
536
557
  VISIT(Double)
537
558
  VISIT(Binary)
@@ -634,8 +655,7 @@ namespace red_arrow {
634
655
  VISIT(UInt16)
635
656
  VISIT(UInt32)
636
657
  VISIT(UInt64)
637
- // TODO
638
- // VISIT(HalfFloat)
658
+ VISIT(HalfFloat)
639
659
  VISIT(Float)
640
660
  VISIT(Double)
641
661
  VISIT(Binary)
@@ -761,8 +781,7 @@ namespace red_arrow {
761
781
  VISIT(UInt16)
762
782
  VISIT(UInt32)
763
783
  VISIT(UInt64)
764
- // TODO
765
- // VISIT(HalfFloat)
784
+ VISIT(HalfFloat)
766
785
  VISIT(Float)
767
786
  VISIT(Double)
768
787
  VISIT(Binary)
data/ext/arrow/extconf.rb CHANGED
@@ -38,8 +38,6 @@ checking_for(checking_message("Homebrew")) do
38
38
  end
39
39
  end
40
40
 
41
- $CXXFLAGS += " -std=c++17 "
42
-
43
41
  unless required_pkg_config_package([
44
42
  "arrow",
45
43
  Arrow::Version::MAJOR,
@@ -77,4 +75,18 @@ end
77
75
  add_depend_package_path(name, source_dir, build_dir)
78
76
  end
79
77
 
78
+ case RUBY_PLATFORM
79
+ when /darwin/
80
+ symbols_in_external_bundles = [
81
+ "_rbgerr_gerror2exception",
82
+ "_rbgobj_instance_from_ruby_object",
83
+ ]
84
+ symbols_in_external_bundles.each do |symbol|
85
+ $DLDFLAGS << " -Wl,-U,#{symbol}"
86
+ end
87
+ mmacosx_version_min = "-mmacosx-version-min=10.14"
88
+ $CFLAGS << " #{mmacosx_version_min}"
89
+ $CXXFLAGS << " #{mmacosx_version_min}"
90
+ end
91
+
80
92
  create_makefile("arrow")
@@ -84,8 +84,7 @@ namespace red_arrow {
84
84
  VISIT(UInt16)
85
85
  VISIT(UInt32)
86
86
  VISIT(UInt64)
87
- // TODO
88
- // VISIT(HalfFloat)
87
+ VISIT(HalfFloat)
89
88
  VISIT(Float)
90
89
  VISIT(Double)
91
90
  VISIT(Binary)
data/ext/arrow/values.cpp CHANGED
@@ -65,8 +65,7 @@ namespace red_arrow {
65
65
  VISIT(UInt16)
66
66
  VISIT(UInt32)
67
67
  VISIT(UInt64)
68
- // TODO
69
- // VISIT(HalfFloat)
68
+ VISIT(HalfFloat)
70
69
  VISIT(Float)
71
70
  VISIT(Double)
72
71
  VISIT(Binary)
data/lib/arrow/array.rb CHANGED
@@ -22,6 +22,7 @@ module Arrow
22
22
  include ArrayComputable
23
23
  include GenericFilterable
24
24
  include GenericTakeable
25
+ include InputReferable
25
26
 
26
27
  class << self
27
28
  def new(*args)
@@ -115,6 +116,10 @@ module Arrow
115
116
  self
116
117
  end
117
118
 
119
+ def to_arrow_chunked_array
120
+ ChunkedArray.new([self])
121
+ end
122
+
118
123
  alias_method :value_data_type_raw, :value_data_type
119
124
  def value_data_type
120
125
  @value_data_type ||= value_data_type_raw
@@ -22,6 +22,19 @@ module Arrow
22
22
  include ArrayComputable
23
23
  include GenericFilterable
24
24
  include GenericTakeable
25
+ include InputReferable
26
+
27
+ def to_arrow
28
+ self
29
+ end
30
+
31
+ def to_arrow_array
32
+ combine
33
+ end
34
+
35
+ def to_arrow_chunked_array
36
+ self
37
+ end
25
38
 
26
39
  alias_method :size, :n_rows
27
40
  unless method_defined?(:length)
@@ -30,7 +43,16 @@ module Arrow
30
43
 
31
44
  alias_method :chunks_raw, :chunks
32
45
  def chunks
33
- @chunks ||= chunks_raw
46
+ @chunks ||= chunks_raw.tap do |_chunks|
47
+ _chunks.each do |chunk|
48
+ share_input(chunk)
49
+ end
50
+ end
51
+ end
52
+
53
+ alias_method :get_chunk_raw, :get_chunk
54
+ def get_chunk(i)
55
+ chunks[i]
34
56
  end
35
57
 
36
58
  def null?(i)
@@ -143,5 +143,14 @@ module Arrow
143
143
  find_column(selector)
144
144
  end
145
145
  end
146
+
147
+ # Return column names in this object.
148
+ #
149
+ # @return [::Array<String>] column names.
150
+ #
151
+ # @since 11.0.0
152
+ def column_names
153
+ @column_names ||= columns.collect(&:name)
154
+ end
146
155
  end
147
156
  end
data/lib/arrow/column.rb CHANGED
@@ -27,6 +27,7 @@ module Arrow
27
27
  @index = index
28
28
  @field = @container.schema[@index]
29
29
  @data = @container.get_column_data(@index)
30
+ @container.share_input(@data)
30
31
  end
31
32
 
32
33
  def name
@@ -0,0 +1,32 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class HalfFloatArrayBuilder
20
+ private
21
+ def convert_to_arrow_value(value)
22
+ case value
23
+ when Float
24
+ HalfFloat.new(value).to_uint16
25
+ when HalfFloat
26
+ value.to_uint16
27
+ else
28
+ value
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class HalfFloatArray
20
+ def get_value(i)
21
+ HalfFloat.new(get_raw_value(i)).to_f
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,118 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class HalfFloat
20
+ MAX = 65504
21
+ MIN = -65504
22
+ EXPONENT_N_BITS = 5
23
+ EXPONENT_MASK = (2 ** EXPONENT_N_BITS) - 1
24
+ EXPONENT_BIAS = 15
25
+ FRACTION_N_BITS = 10
26
+ FRACTION_MASK = (2 ** FRACTION_N_BITS) - 1
27
+ FRACTION_DENOMINATOR = 2.0 ** FRACTION_N_BITS
28
+
29
+ attr_reader :sign
30
+ attr_reader :exponent
31
+ attr_reader :fraction
32
+ def initialize(*args)
33
+ n_args = args.size
34
+ case n_args
35
+ when 1
36
+ if args[0].is_a?(Float)
37
+ @sign, @exponent, @fraction = deconstruct_float(args[0])
38
+ else
39
+ @sign, @exponent, @fraction = deconstruct_uint16(args[0])
40
+ end
41
+ when 3
42
+ @sign, @exponent, @fraction = *args
43
+ else
44
+ message = "wrong number of arguments (given #{n_args}, expected 1 or 3)"
45
+ raise ArgumentError, message
46
+ end
47
+ end
48
+
49
+ def to_f
50
+ if @exponent == EXPONENT_MASK
51
+ if @sign.zero?
52
+ Float::INFINITY
53
+ else
54
+ -Float::INFINITY
55
+ end
56
+ else
57
+ if @exponent.zero?
58
+ implicit_fraction = 0
59
+ else
60
+ implicit_fraction = 1
61
+ end
62
+ ((-1) ** @sign) *
63
+ (2 ** (@exponent - EXPONENT_BIAS)) *
64
+ (implicit_fraction + @fraction / FRACTION_DENOMINATOR)
65
+ end
66
+ end
67
+
68
+ def to_uint16
69
+ (@sign << (EXPONENT_N_BITS + FRACTION_N_BITS)) ^
70
+ (@exponent << FRACTION_N_BITS) ^
71
+ @fraction
72
+ end
73
+
74
+ def pack
75
+ [to_uint16].pack("S")
76
+ end
77
+
78
+ private
79
+ def deconstruct_float(float)
80
+ if float > MAX
81
+ float = Float::INFINITY
82
+ elsif float < MIN
83
+ float = -Float::INFINITY
84
+ end
85
+ is_infinite = float.infinite?
86
+ if is_infinite
87
+ sign = (is_infinite == 1) ? 0 : 1
88
+ exponent = EXPONENT_MASK
89
+ fraction = 0
90
+ elsif float.zero?
91
+ sign = 0
92
+ exponent = 0
93
+ fraction = 0
94
+ else
95
+ sign = (float.positive? ? 0 : 1)
96
+ float_abs = float.abs
97
+ 1.upto(EXPONENT_MASK) do |e|
98
+ next_exponent_value = 2 ** (e + 1 - EXPONENT_BIAS)
99
+ next if float_abs > next_exponent_value
100
+ exponent = e
101
+ exponent_value = 2 ** (e - EXPONENT_BIAS)
102
+ fraction =
103
+ ((float_abs / exponent_value - 1) * FRACTION_DENOMINATOR).round
104
+ break
105
+ end
106
+ end
107
+ [sign, exponent, fraction]
108
+ end
109
+
110
+ def deconstruct_uint16(uint16)
111
+ # | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
112
+ sign = (uint16 >> (EXPONENT_N_BITS + FRACTION_N_BITS))
113
+ exponent = ((uint16 >> FRACTION_N_BITS) & EXPONENT_MASK)
114
+ fraction = (uint16 & FRACTION_MASK)
115
+ [sign, exponent, fraction]
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,29 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module InputReferable
20
+ def refer_input(input)
21
+ @input = input
22
+ end
23
+
24
+ def share_input(other)
25
+ return unless defined?(@input)
26
+ other.refer_input(@input)
27
+ end
28
+ end
29
+ end
data/lib/arrow/loader.rb CHANGED
@@ -39,6 +39,7 @@ module Arrow
39
39
  require "arrow/field-containable"
40
40
  require "arrow/generic-filterable"
41
41
  require "arrow/generic-takeable"
42
+ require "arrow/input-referable"
42
43
  require "arrow/record-containable"
43
44
  require "arrow/symbol-values-appendable"
44
45
 
@@ -81,6 +82,9 @@ module Arrow
81
82
  require "arrow/fixed-size-binary-array-builder"
82
83
  require "arrow/function"
83
84
  require "arrow/group"
85
+ require "arrow/half-float"
86
+ require "arrow/half-float-array"
87
+ require "arrow/half-float-array-builder"
84
88
  require "arrow/list-array-builder"
85
89
  require "arrow/list-data-type"
86
90
  require "arrow/map-array"
@@ -196,6 +200,7 @@ module Arrow
196
200
  "Arrow::Date64Array",
197
201
  "Arrow::Decimal128Array",
198
202
  "Arrow::Decimal256Array",
203
+ "Arrow::HalfFloatArray",
199
204
  "Arrow::Time32Array",
200
205
  "Arrow::Time64Array",
201
206
  "Arrow::TimestampArray"
@@ -35,14 +35,16 @@ module Arrow
35
35
  fields = []
36
36
  @values = []
37
37
  @raw_table.each do |name, array|
38
- if array.respond_to?(:to_arrow_array)
39
- array = array.to_arrow_array
38
+ if array.respond_to?(:to_arrow_chunked_array)
39
+ chunked_array = array.to_arrow_chunked_array
40
+ elsif array.respond_to?(:to_arrow_array)
41
+ chunked_array = ChunkedArray.new([array.to_arrow_array])
40
42
  else
41
43
  array = array.to_ary if array.respond_to?(:to_ary)
42
- array = ArrayBuilder.build(array)
44
+ chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
43
45
  end
44
- fields << Field.new(name.to_s, array.value_data_type)
45
- @values << array
46
+ fields << Field.new(name.to_s, chunked_array.value_data_type)
47
+ @values << chunked_array
46
48
  end
47
49
  @schema = Schema.new(fields)
48
50
  end
@@ -19,9 +19,11 @@ require "arrow/raw-table-converter"
19
19
 
20
20
  module Arrow
21
21
  class RecordBatch
22
+ include Enumerable
23
+
22
24
  include ColumnContainable
25
+ include InputReferable
23
26
  include RecordContainable
24
- include Enumerable
25
27
 
26
28
  class << self
27
29
  def new(*args)
@@ -56,7 +58,9 @@ module Arrow
56
58
  #
57
59
  # @since 0.12.0
58
60
  def to_table
59
- Table.new(schema, [self])
61
+ table = Table.new(schema, [self])
62
+ share_input(table)
63
+ table
60
64
  end
61
65
 
62
66
  def respond_to_missing?(name, include_private)
@@ -161,7 +161,7 @@ module Arrow
161
161
  record_batches << record_batch
162
162
  end
163
163
  table = Table.new(schema, record_batches)
164
- table.instance_variable_set(:@input, input)
164
+ table.refer_input(input)
165
165
  table
166
166
  end
167
167
 
@@ -211,7 +211,7 @@ module Arrow
211
211
  field_indexes = @options[:field_indexes]
212
212
  reader.set_field_indexes(field_indexes) if field_indexes
213
213
  table = reader.read_stripes
214
- table.instance_variable_set(:@input, input)
214
+ table.refer_input(input)
215
215
  table
216
216
  end
217
217
  end
@@ -245,7 +245,7 @@ module Arrow
245
245
  open_input_stream do |input|
246
246
  reader = FeatherFileReader.new(input)
247
247
  table = reader.read
248
- table.instance_variable_set(:@input, input)
248
+ table.refer_input(input)
249
249
  table
250
250
  end
251
251
  end
@@ -254,7 +254,7 @@ module Arrow
254
254
  open_input_stream do |input|
255
255
  reader = JSONReader.new(input)
256
256
  table = reader.read
257
- table.instance_variable_set(:@input, input)
257
+ table.refer_input(input)
258
258
  table
259
259
  end
260
260
  end
@@ -51,6 +51,7 @@ module Arrow
51
51
  raise ArgumentError, message
52
52
  end
53
53
  __send__(custom_save_method)
54
+ @table
54
55
  end
55
56
 
56
57
  private
data/lib/arrow/table.rb CHANGED
@@ -22,6 +22,7 @@ module Arrow
22
22
  include ColumnContainable
23
23
  include GenericFilterable
24
24
  include GenericTakeable
25
+ include InputReferable
25
26
  include RecordContainable
26
27
 
27
28
  class << self
@@ -188,6 +189,7 @@ module Arrow
188
189
 
189
190
  reader = TableBatchReader.new(self)
190
191
  while record_batch = reader.read_next
192
+ share_input(record_batch)
191
193
  yield(record_batch)
192
194
  end
193
195
  end
@@ -346,10 +348,12 @@ module Arrow
346
348
  end
347
349
  end
348
350
  if sliced_tables.size > 1
349
- sliced_tables[0].concatenate(sliced_tables[1..-1])
351
+ sliced_table = sliced_tables[0].concatenate(sliced_tables[1..-1])
350
352
  else
351
- sliced_tables[0]
353
+ sliced_table = sliced_tables[0]
352
354
  end
355
+ share_input(sliced_table)
356
+ sliced_table
353
357
  end
354
358
 
355
359
  # TODO
@@ -401,7 +405,9 @@ module Arrow
401
405
  new_fields << new_column[:field]
402
406
  new_arrays << new_column[:data]
403
407
  end
404
- self.class.new(new_fields, new_arrays)
408
+ table = self.class.new(new_fields, new_arrays)
409
+ share_input(table)
410
+ table
405
411
  end
406
412
 
407
413
  alias_method :remove_column_raw, :remove_column
@@ -423,7 +429,9 @@ module Arrow
423
429
  raise IndexError.new(message)
424
430
  end
425
431
  end
426
- remove_column_raw(index)
432
+ table = remove_column_raw(index)
433
+ share_input(table)
434
+ table
427
435
  end
428
436
 
429
437
  # Experimental
@@ -445,43 +453,59 @@ module Arrow
445
453
  packed_arrays = columns.collect do |column|
446
454
  column.data.pack
447
455
  end
448
- self.class.new(schema, packed_arrays)
456
+ table = self.class.new(schema, packed_arrays)
457
+ share_input(table)
458
+ table
449
459
  end
450
460
 
451
- # @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
452
- # @!macro join_common_before
453
- # @param right [Arrow::Table] The right table.
461
+ # Join another Table by matching with keys.
462
+ #
463
+ # @!macro join_common_before
464
+ # @param right [Arrow::Table] The right table.
465
+ #
466
+ # Join columns with `right` on join key columns.
467
+ #
468
+ # @!macro join_common_after
469
+ # @param type [Arrow::JoinType] How to join.
470
+ # @param left_outputs [::Array<String, Symbol>] Output columns in
471
+ # `self`.
472
+ #
473
+ # If both of `left_outputs` and `right_outputs` aren't
474
+ # specified, all columns in `self` and `right` are
475
+ # outputted.
476
+ # @param right_outputs [::Array<String, Symbol>] Output columns in
477
+ # `right`.
454
478
  #
455
- # Join columns with `right` on join key columns.
479
+ # If both of `left_outputs` and `right_outputs` aren't
480
+ # specified, all columns in `self` and `right` are
481
+ # outputted.
482
+ # @return [Arrow::Table]
483
+ # The joined `Arrow::Table`.
456
484
  #
457
- # @!macro join_common_after
458
- # @param type [Arrow::JoinType] How to join.
459
- # @param left_outputs [::Array<String, Symbol>] Output columns in
460
- # `self`.
485
+ # @overload join(right, type: :inner, left_outputs: nil, right_outputs: nil)
486
+ # If key(s) are not supplied, common keys in self and right are used.
461
487
  #
462
- # If both of `left_outputs` and `right_outputs` aren't
463
- # specified, all columns in `self` and `right` are
464
- # outputted.
465
- # @param right_outputs [::Array<String, Symbol>] Output columns in
466
- # `right`.
488
+ # @macro join_common_before
489
+ # @macro join_common_after
467
490
  #
468
- # If both of `left_outputs` and `right_outputs` aren't
469
- # specified, all columns in `self` and `right` are
470
- # outputted.
471
- # @return [Arrow::Table]
472
- # The joined `Arrow::Table`.
491
+ # @since 11.0.0
492
+ #
493
+ # @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
494
+ # Join right by a key.
473
495
  #
474
496
  # @macro join_common_before
475
497
  # @param key [String, Symbol] A join key.
476
498
  # @macro join_common_after
477
499
  #
478
500
  # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
501
+ # Join right by keys.
479
502
  #
480
503
  # @macro join_common_before
481
504
  # @param keys [::Array<String, Symbol>] Join keys.
482
505
  # @macro join_common_after
483
506
  #
484
507
  # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
508
+ # Join right by a key or keys mapped by a hash.
485
509
  #
486
510
  # @macro join_common_before
487
511
  # @param keys [Hash] Specify join keys in `self` and `right` separately.
@@ -492,7 +516,8 @@ module Arrow
492
516
  # @macro join_common_after
493
517
  #
494
518
  # @since 7.0.0
495
- def join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
519
+ def join(right, keys=nil, type: :inner, left_outputs: nil, right_outputs: nil)
520
+ keys ||= (column_names & right.column_names)
496
521
  plan = ExecutePlan.new
497
522
  left_node = plan.build_source_node(self)
498
523
  right_node = plan.build_source_node(right)
@@ -523,7 +548,9 @@ module Arrow
523
548
  plan.start
524
549
  plan.wait
525
550
  reader = sink_node_options.get_reader(hash_join_node.output_schema)
526
- reader.read_all
551
+ table = reader.read_all
552
+ share_input(table)
553
+ table
527
554
  end
528
555
 
529
556
  alias_method :to_s_raw, :to_s
data/lib/arrow/tensor.rb CHANGED
@@ -160,5 +160,9 @@ module Arrow
160
160
  nil,
161
161
  0)
162
162
  end
163
+
164
+ def to_arrow_chunked_array
165
+ ChunkedArray.new([to_arrow_array])
166
+ end
163
167
  end
164
168
  end
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "10.0.1"
19
+ VERSION = "11.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
49
  spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
50
- spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
+ spec.add_runtime_dependency("extpp", ">= 0.1.1")
51
51
  spec.add_runtime_dependency("gio2", ">= 3.5.0")
52
52
  spec.add_runtime_dependency("native-package-installer")
53
53
  spec.add_runtime_dependency("pkg-config")
@@ -117,6 +117,16 @@ module RawRecordsBasicArraysTests
117
117
  assert_equal(records, target.raw_records)
118
118
  end
119
119
 
120
+ def test_half_float
121
+ records = [
122
+ [-1.5],
123
+ [nil],
124
+ [1.5],
125
+ ]
126
+ target = build({column: :half_float}, records)
127
+ assert_equal(records, target.raw_records)
128
+ end
129
+
120
130
  def test_float
121
131
  records = [
122
132
  [-1.0],
@@ -54,7 +54,8 @@ class DataTypeTest < Test::Unit::TestCase
54
54
  "abstract type: <:floating_point>: " +
55
55
  "use one of not abstract type: [" +
56
56
  "Arrow::DoubleDataType, " +
57
- "Arrow::FloatDataType]"
57
+ "Arrow::FloatDataType, " +
58
+ "Arrow::HalfFloatDataType]"
58
59
  assert_raise(ArgumentError.new(message)) do
59
60
  Arrow::DataType.resolve(:floating_point)
60
61
  end
@@ -0,0 +1,43 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class HalfFloatArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("Float") do
21
+ array = Arrow::HalfFloatArray.new([1.5])
22
+ assert_equal([1.5], array.to_a)
23
+ end
24
+
25
+ test("Integer") do
26
+ one_half = Arrow::HalfFloat.new(1.5)
27
+ array = Arrow::HalfFloatArray.new([one_half.to_uint16])
28
+ assert_equal([one_half.to_f], array.to_a)
29
+ end
30
+
31
+ test("HalfFloat") do
32
+ one_half = Arrow::HalfFloat.new(1.5)
33
+ array = Arrow::HalfFloatArray.new([one_half])
34
+ assert_equal([one_half.to_f], array.to_a)
35
+ end
36
+ end
37
+
38
+ test("#[]") do
39
+ one_half = Arrow::HalfFloat.new(1.5)
40
+ array = Arrow::HalfFloatArray.new([one_half.to_uint16])
41
+ assert_equal(one_half.to_f, array[0])
42
+ end
43
+ end
@@ -0,0 +1,130 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class HalfFloatTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("Array") do
21
+ positive_infinity = Arrow::HalfFloat.new(0b1, 0b11111, 0b0000000000)
22
+ assert_equal([0b1, 0b11111, 0b0000000000],
23
+ [
24
+ positive_infinity.sign,
25
+ positive_infinity.exponent,
26
+ positive_infinity.fraction,
27
+ ])
28
+ end
29
+
30
+ test("Integer - 0") do
31
+ zero = Arrow::HalfFloat.new(0)
32
+ assert_equal([0b0, 0b00000, 0b0000000000],
33
+ [
34
+ zero.sign,
35
+ zero.exponent,
36
+ zero.fraction,
37
+ ])
38
+ end
39
+
40
+ test("Integer - +infinity") do
41
+ positive_infinity = Arrow::HalfFloat.new(0x7c00)
42
+ assert_equal([0b0, 0b11111, 0b0000000000],
43
+ [
44
+ positive_infinity.sign,
45
+ positive_infinity.exponent,
46
+ positive_infinity.fraction,
47
+ ])
48
+ end
49
+
50
+ test("Integer - -infinity") do
51
+ negative_infinity = Arrow::HalfFloat.new(0xfc00)
52
+ assert_equal([0b1, 0b11111, 0b0000000000],
53
+ [
54
+ negative_infinity.sign,
55
+ negative_infinity.exponent,
56
+ negative_infinity.fraction,
57
+ ])
58
+ end
59
+
60
+ test("Integer - 1/3") do
61
+ one_thirds = Arrow::HalfFloat.new(0x3555)
62
+ assert_equal([0b0, 0b01101, 0b0101010101],
63
+ [
64
+ one_thirds.sign,
65
+ one_thirds.exponent,
66
+ one_thirds.fraction,
67
+ ])
68
+ end
69
+
70
+ test("Float - 0") do
71
+ zero = Arrow::HalfFloat.new(0.0)
72
+ assert_equal([0b0, 0b00000, 0b0000000000],
73
+ [
74
+ zero.sign,
75
+ zero.exponent,
76
+ zero.fraction,
77
+ ])
78
+ end
79
+
80
+ test("Float - too large") do
81
+ positive_infinity = Arrow::HalfFloat.new(65504.1)
82
+ assert_equal([0b0, 0b11111, 0b0000000000],
83
+ [
84
+ positive_infinity.sign,
85
+ positive_infinity.exponent,
86
+ positive_infinity.fraction,
87
+ ])
88
+ end
89
+
90
+ test("Float - +infinity") do
91
+ positive_infinity = Arrow::HalfFloat.new(Float::INFINITY)
92
+ assert_equal([0b0, 0b11111, 0b0000000000],
93
+ [
94
+ positive_infinity.sign,
95
+ positive_infinity.exponent,
96
+ positive_infinity.fraction,
97
+ ])
98
+ end
99
+
100
+ test("Float - too small") do
101
+ negative_infinity = Arrow::HalfFloat.new(-65504.1)
102
+ assert_equal([0b1, 0b11111, 0b0000000000],
103
+ [
104
+ negative_infinity.sign,
105
+ negative_infinity.exponent,
106
+ negative_infinity.fraction,
107
+ ])
108
+ end
109
+
110
+ test("Float - -infinity") do
111
+ negative_infinity = Arrow::HalfFloat.new(-Float::INFINITY)
112
+ assert_equal([0b1, 0b11111, 0b0000000000],
113
+ [
114
+ negative_infinity.sign,
115
+ negative_infinity.exponent,
116
+ negative_infinity.fraction,
117
+ ])
118
+ end
119
+
120
+ test("Float - 1/3") do
121
+ one_thirds = Arrow::HalfFloat.new((2 ** -2) * (1 + 341 / 1024.0))
122
+ assert_equal([0b0, 0b01101, 0b0101010101],
123
+ [
124
+ one_thirds.sign,
125
+ one_thirds.exponent,
126
+ one_thirds.fraction,
127
+ ])
128
+ end
129
+ end
130
+ end
data/test/test-table.rb CHANGED
@@ -41,8 +41,25 @@ class TableTest < Test::Unit::TestCase
41
41
  end
42
42
 
43
43
  sub_test_case(".new") do
44
+ test("{Symbol: Arrow::Array}") do
45
+ schema = Arrow::Schema.new(numbers: :int64)
46
+ assert_equal(Arrow::Table.new(schema,
47
+ [Arrow::Int64Array.new([1, 2, 3])]),
48
+ Arrow::Table.new(numbers: Arrow::Int64Array.new([1, 2, 3])))
49
+ end
50
+
51
+ test("{Symbol: Arrow::ChunkedArray}") do
52
+ chunked_array = Arrow::ChunkedArray.new([Arrow::Int64Array.new([1, 2, 3])])
53
+ schema = Arrow::Schema.new(numbers: :int64)
54
+ assert_equal(Arrow::Table.new(schema,
55
+ [Arrow::Int64Array.new([1, 2, 3])]),
56
+ Arrow::Table.new(numbers: chunked_array))
57
+ end
58
+
44
59
  test("{Symbol: Arrow::Tensor}") do
45
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
60
+ schema = Arrow::Schema.new(numbers: :uint8)
61
+ assert_equal(Arrow::Table.new(schema,
62
+ [Arrow::UInt8Array.new([1, 2, 3])]),
46
63
  Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
47
64
  end
48
65
 
@@ -51,7 +68,8 @@ class TableTest < Test::Unit::TestCase
51
68
  def array_like.to_ary
52
69
  [1, 2, 3]
53
70
  end
54
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
71
+ schema = Arrow::Schema.new(numbers: :uint8)
72
+ assert_equal(Arrow::Table.new(schema, [Arrow::UInt8Array.new([1, 2, 3])]),
55
73
  Arrow::Table.new(numbers: array_like))
56
74
  end
57
75
  end
@@ -584,6 +602,18 @@ class TableTest < Test::Unit::TestCase
584
602
  end
585
603
  end
586
604
 
605
+ sub_test_case("#column_names") do
606
+ test("unique") do
607
+ table = Arrow::Table.new(a: [1], b: [2], c: [3])
608
+ assert_equal(%w[a b c], table.column_names)
609
+ end
610
+
611
+ test("duplicated") do
612
+ table = Arrow::Table.new([["a", [1, 2, 3]], ["a", [4, 5, 6]]])
613
+ assert_equal(%w[a a], table.column_names)
614
+ end
615
+ end
616
+
587
617
  sub_test_case("#save and .load") do
588
618
  module SaveLoadFormatTests
589
619
  def test_default
@@ -690,6 +720,11 @@ class TableTest < Test::Unit::TestCase
690
720
  schema: @table.schema))
691
721
  end
692
722
 
723
+ test("csv, return value") do
724
+ output = create_output(".csv")
725
+ assert_equal(@table, @table.save(output))
726
+ end
727
+
693
728
  test("csv.gz") do
694
729
  output = create_output(".csv.gz")
695
730
  @table.save(output)
@@ -830,6 +865,76 @@ chris\t-1
830
865
  end
831
866
  end
832
867
  end
868
+
869
+ sub_test_case("GC") do
870
+ def setup
871
+ table = Arrow::Table.new(integer: [1, 2, 3],
872
+ string: ["a", "b", "c"])
873
+ @buffer = Arrow::ResizableBuffer.new(1024)
874
+ table.save(@buffer, format: :arrow)
875
+ @loaded_table = Arrow::Table.load(@buffer)
876
+ end
877
+
878
+ def test_chunked_array
879
+ chunked_array = @loaded_table[0].data
880
+ assert_equal(@buffer,
881
+ chunked_array.instance_variable_get(:@input).buffer)
882
+ end
883
+
884
+ def test_array
885
+ array = @loaded_table[0].data.chunks[0]
886
+ assert_equal(@buffer,
887
+ array.instance_variable_get(:@input).buffer)
888
+ end
889
+
890
+ def test_record_batch
891
+ record_batch = @loaded_table.each_record_batch.first
892
+ assert_equal(@buffer,
893
+ record_batch.instance_variable_get(:@input).buffer)
894
+ end
895
+
896
+ def test_record_batch_array
897
+ array = @loaded_table.each_record_batch.first[0].data
898
+ assert_equal(@buffer,
899
+ array.instance_variable_get(:@input).buffer)
900
+ end
901
+
902
+ def test_record_batch_table
903
+ table = @loaded_table.each_record_batch.first.to_table
904
+ assert_equal(@buffer,
905
+ table.instance_variable_get(:@input).buffer)
906
+ end
907
+
908
+ def test_slice
909
+ table = @loaded_table.slice(0..-1)
910
+ assert_equal(@buffer,
911
+ table.instance_variable_get(:@input).buffer)
912
+ end
913
+
914
+ def test_merge
915
+ table = @loaded_table.merge({})
916
+ assert_equal(@buffer,
917
+ table.instance_variable_get(:@input).buffer)
918
+ end
919
+
920
+ def test_remove_column
921
+ table = @loaded_table.remove_column(0)
922
+ assert_equal(@buffer,
923
+ table.instance_variable_get(:@input).buffer)
924
+ end
925
+
926
+ def test_pack
927
+ table = @loaded_table.pack
928
+ assert_equal(@buffer,
929
+ table.instance_variable_get(:@input).buffer)
930
+ end
931
+
932
+ def test_join
933
+ table = @loaded_table.join(@loaded_table, :integer)
934
+ assert_equal(@buffer,
935
+ table.instance_variable_get(:@input).buffer)
936
+ end
937
+ end
833
938
  end
834
939
 
835
940
  test("#pack") do
@@ -1026,6 +1131,20 @@ visible: false
1026
1131
  end
1027
1132
 
1028
1133
  sub_test_case("#join") do
1134
+ test("no keys") do
1135
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1136
+ number: [10, 20, 30])
1137
+ table2 = Arrow::Table.new(key: [3, 1],
1138
+ string: ["three", "one"])
1139
+ assert_equal(Arrow::Table.new([
1140
+ ["key", [1, 3]],
1141
+ ["number", [10, 30]],
1142
+ ["key", [1, 3]],
1143
+ ["string", ["one", "three"]],
1144
+ ]),
1145
+ table1.join(table2))
1146
+ end
1147
+
1029
1148
  test("keys: String") do
1030
1149
  table1 = Arrow::Table.new(key: [1, 2, 3],
1031
1150
  number: [10, 20, 30])
@@ -1083,7 +1202,9 @@ visible: false
1083
1202
  ["right_key", [1, 3]],
1084
1203
  ["string", ["one", "three"]],
1085
1204
  ]),
1086
- table1.join(table2, {left: "left_key", right: :right_key}))
1205
+ table1.join(table2,
1206
+ {left: "left_key", right: :right_key},
1207
+ type: :inner))
1087
1208
  end
1088
1209
 
1089
1210
  test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
@@ -1105,7 +1226,8 @@ visible: false
1105
1226
  {
1106
1227
  left: ["left_key1", :left_key2],
1107
1228
  right: [:right_key1, "right_key2"],
1108
- }))
1229
+ },
1230
+ type: :inner))
1109
1231
  end
1110
1232
 
1111
1233
  test("type:") do
@@ -107,6 +107,16 @@ module ValuesBasicArraysTests
107
107
  assert_equal(values, target.values)
108
108
  end
109
109
 
110
+ def test_half_float
111
+ values = [
112
+ -1.5,
113
+ nil,
114
+ 1.5,
115
+ ]
116
+ target = build(Arrow::HalfFloatArray.new(values))
117
+ assert_equal(values, target.values)
118
+ end
119
+
110
120
  def test_float
111
121
  values = [
112
122
  -1.0,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.0.1
4
+ version: 11.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-10 00:00:00.000000000 Z
11
+ date: 2023-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 0.0.7
33
+ version: 0.1.1
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: 0.0.7
40
+ version: 0.1.1
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: gio2
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -152,6 +152,10 @@ files:
152
152
  - lib/arrow/generic-filterable.rb
153
153
  - lib/arrow/generic-takeable.rb
154
154
  - lib/arrow/group.rb
155
+ - lib/arrow/half-float-array-builder.rb
156
+ - lib/arrow/half-float-array.rb
157
+ - lib/arrow/half-float.rb
158
+ - lib/arrow/input-referable.rb
155
159
  - lib/arrow/list-array-builder.rb
156
160
  - lib/arrow/list-data-type.rb
157
161
  - lib/arrow/loader.rb
@@ -263,6 +267,8 @@ files:
263
267
  - test/test-float-scalar.rb
264
268
  - test/test-function.rb
265
269
  - test/test-group.rb
270
+ - test/test-half-float-array.rb
271
+ - test/test-half-float.rb
266
272
  - test/test-list-array-builder.rb
267
273
  - test/test-list-array.rb
268
274
  - test/test-list-data-type.rb
@@ -308,7 +314,7 @@ homepage: https://arrow.apache.org/
308
314
  licenses:
309
315
  - Apache-2.0
310
316
  metadata:
311
- msys2_mingw_dependencies: arrow>=10.0.1
317
+ msys2_mingw_dependencies: arrow>=11.0.0
312
318
  post_install_message:
313
319
  rdoc_options: []
314
320
  require_paths:
@@ -324,7 +330,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
324
330
  - !ruby/object:Gem::Version
325
331
  version: '0'
326
332
  requirements: []
327
- rubygems_version: 3.3.15
333
+ rubygems_version: 3.5.0.dev
328
334
  signing_key:
329
335
  specification_version: 4
330
336
  summary: Red Arrow is the Ruby bindings of Apache Arrow
@@ -383,6 +389,8 @@ test_files:
383
389
  - test/test-float-scalar.rb
384
390
  - test/test-function.rb
385
391
  - test/test-group.rb
392
+ - test/test-half-float-array.rb
393
+ - test/test-half-float.rb
386
394
  - test/test-list-array-builder.rb
387
395
  - test/test-list-array.rb
388
396
  - test/test-list-data-type.rb