red-arrow 10.0.1 → 11.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8cc21ba05de4956b7dd412963d2d39eb5f5d31566c6891ae4388064553baa97
4
- data.tar.gz: a7c1bfa1d73f3a4ab8403e347902ffd6f754f6964d3054788a79d1022b32520b
3
+ metadata.gz: 23f72b7016d780c208dd8f8cbc627becce73e25754f83a62a4a4d3f4bb60a5e3
4
+ data.tar.gz: 23f57e383e26d322e9fa81efed1e8e5545bd5b1160075f4a0b8553efda98af93
5
5
  SHA512:
6
- metadata.gz: e33a2acb65472b70c99348fec8b472ebeac48bbd3ba3c6aabd91481ed214b07e41fa3c6c3090ecd9ed545ee54cbe2fb09a0a7d730421a816afe6c167b00b6a5a
7
- data.tar.gz: 47ab431103a9bcb0f5b4af955013b40415a582723b55640cd6b82a4b8af5ef775ed974e9f62d235cc81c296c0d7a0438edf805f3ed1ab442cae58501cb73625a
6
+ metadata.gz: 2d157d2d56dbca00a2f4a0eadb831a19a7a7124c3c3c8c7675536e4597f2a1447f047eec48654256eff3148457d09f1d3e375a095cf152051ef6603cd8bf25bf
7
+ data.tar.gz: 958f320a92981a6ed7f84fcab92497ac1038a038de302419e9f57f83e3ca4d02d682ec05a129ac63c9ed6e613725619839b46cc589938a56573e88182a8002d4
@@ -106,10 +106,34 @@ namespace red_arrow {
106
106
  return ULL2NUM(array.Value(i));
107
107
  }
108
108
 
109
- // TODO
110
- // inline VALUE convert(const arrow::HalfFloatArray& array,
111
- // const int64_t i) {
112
- // }
109
+ inline VALUE convert(const arrow::HalfFloatArray& array,
110
+ const int64_t i) {
111
+ const auto value = array.Value(i);
112
+ // | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
113
+ constexpr auto exponent_n_bits = 5;
114
+ static const auto exponent_mask =
115
+ static_cast<uint32_t>(std::pow(2.0, exponent_n_bits) - 1);
116
+ constexpr auto exponent_bias = 15;
117
+ constexpr auto fraction_n_bits = 10;
118
+ static const auto fraction_mask =
119
+ static_cast<uint32_t>(std::pow(2.0, fraction_n_bits)) - 1;
120
+ static const auto fraction_denominator = std::pow(2.0, fraction_n_bits);
121
+ const auto sign = value >> (exponent_n_bits + fraction_n_bits);
122
+ const auto exponent = (value >> fraction_n_bits) & exponent_mask;
123
+ const auto fraction = value & fraction_mask;
124
+ if (exponent == exponent_mask) {
125
+ if (sign == 0) {
126
+ return DBL2NUM(HUGE_VAL);
127
+ } else {
128
+ return DBL2NUM(-HUGE_VAL);
129
+ }
130
+ } else {
131
+ const auto implicit_fraction = (exponent == 0) ? 0 : 1;
132
+ return DBL2NUM(((sign == 0) ? 1 : -1) *
133
+ std::pow(2.0, exponent - exponent_bias) *
134
+ (implicit_fraction + fraction / fraction_denominator));
135
+ }
136
+ }
113
137
 
114
138
  inline VALUE convert(const arrow::FloatArray& array,
115
139
  const int64_t i) {
@@ -320,8 +344,7 @@ namespace red_arrow {
320
344
  VISIT(UInt16)
321
345
  VISIT(UInt32)
322
346
  VISIT(UInt64)
323
- // TODO
324
- // VISIT(HalfFloat)
347
+ VISIT(HalfFloat)
325
348
  VISIT(Float)
326
349
  VISIT(Double)
327
350
  VISIT(Binary)
@@ -427,8 +450,7 @@ namespace red_arrow {
427
450
  VISIT(UInt16)
428
451
  VISIT(UInt32)
429
452
  VISIT(UInt64)
430
- // TODO
431
- // VISIT(HalfFloat)
453
+ VISIT(HalfFloat)
432
454
  VISIT(Float)
433
455
  VISIT(Double)
434
456
  VISIT(Binary)
@@ -530,8 +552,7 @@ namespace red_arrow {
530
552
  VISIT(UInt16)
531
553
  VISIT(UInt32)
532
554
  VISIT(UInt64)
533
- // TODO
534
- // VISIT(HalfFloat)
555
+ VISIT(HalfFloat)
535
556
  VISIT(Float)
536
557
  VISIT(Double)
537
558
  VISIT(Binary)
@@ -634,8 +655,7 @@ namespace red_arrow {
634
655
  VISIT(UInt16)
635
656
  VISIT(UInt32)
636
657
  VISIT(UInt64)
637
- // TODO
638
- // VISIT(HalfFloat)
658
+ VISIT(HalfFloat)
639
659
  VISIT(Float)
640
660
  VISIT(Double)
641
661
  VISIT(Binary)
@@ -761,8 +781,7 @@ namespace red_arrow {
761
781
  VISIT(UInt16)
762
782
  VISIT(UInt32)
763
783
  VISIT(UInt64)
764
- // TODO
765
- // VISIT(HalfFloat)
784
+ VISIT(HalfFloat)
766
785
  VISIT(Float)
767
786
  VISIT(Double)
768
787
  VISIT(Binary)
data/ext/arrow/extconf.rb CHANGED
@@ -38,8 +38,6 @@ checking_for(checking_message("Homebrew")) do
38
38
  end
39
39
  end
40
40
 
41
- $CXXFLAGS += " -std=c++17 "
42
-
43
41
  unless required_pkg_config_package([
44
42
  "arrow",
45
43
  Arrow::Version::MAJOR,
@@ -77,4 +75,18 @@ end
77
75
  add_depend_package_path(name, source_dir, build_dir)
78
76
  end
79
77
 
78
+ case RUBY_PLATFORM
79
+ when /darwin/
80
+ symbols_in_external_bundles = [
81
+ "_rbgerr_gerror2exception",
82
+ "_rbgobj_instance_from_ruby_object",
83
+ ]
84
+ symbols_in_external_bundles.each do |symbol|
85
+ $DLDFLAGS << " -Wl,-U,#{symbol}"
86
+ end
87
+ mmacosx_version_min = "-mmacosx-version-min=10.14"
88
+ $CFLAGS << " #{mmacosx_version_min}"
89
+ $CXXFLAGS << " #{mmacosx_version_min}"
90
+ end
91
+
80
92
  create_makefile("arrow")
@@ -84,8 +84,7 @@ namespace red_arrow {
84
84
  VISIT(UInt16)
85
85
  VISIT(UInt32)
86
86
  VISIT(UInt64)
87
- // TODO
88
- // VISIT(HalfFloat)
87
+ VISIT(HalfFloat)
89
88
  VISIT(Float)
90
89
  VISIT(Double)
91
90
  VISIT(Binary)
data/ext/arrow/values.cpp CHANGED
@@ -65,8 +65,7 @@ namespace red_arrow {
65
65
  VISIT(UInt16)
66
66
  VISIT(UInt32)
67
67
  VISIT(UInt64)
68
- // TODO
69
- // VISIT(HalfFloat)
68
+ VISIT(HalfFloat)
70
69
  VISIT(Float)
71
70
  VISIT(Double)
72
71
  VISIT(Binary)
data/lib/arrow/array.rb CHANGED
@@ -22,6 +22,7 @@ module Arrow
22
22
  include ArrayComputable
23
23
  include GenericFilterable
24
24
  include GenericTakeable
25
+ include InputReferable
25
26
 
26
27
  class << self
27
28
  def new(*args)
@@ -115,6 +116,10 @@ module Arrow
115
116
  self
116
117
  end
117
118
 
119
+ def to_arrow_chunked_array
120
+ ChunkedArray.new([self])
121
+ end
122
+
118
123
  alias_method :value_data_type_raw, :value_data_type
119
124
  def value_data_type
120
125
  @value_data_type ||= value_data_type_raw
@@ -22,6 +22,19 @@ module Arrow
22
22
  include ArrayComputable
23
23
  include GenericFilterable
24
24
  include GenericTakeable
25
+ include InputReferable
26
+
27
+ def to_arrow
28
+ self
29
+ end
30
+
31
+ def to_arrow_array
32
+ combine
33
+ end
34
+
35
+ def to_arrow_chunked_array
36
+ self
37
+ end
25
38
 
26
39
  alias_method :size, :n_rows
27
40
  unless method_defined?(:length)
@@ -30,7 +43,16 @@ module Arrow
30
43
 
31
44
  alias_method :chunks_raw, :chunks
32
45
  def chunks
33
- @chunks ||= chunks_raw
46
+ @chunks ||= chunks_raw.tap do |_chunks|
47
+ _chunks.each do |chunk|
48
+ share_input(chunk)
49
+ end
50
+ end
51
+ end
52
+
53
+ alias_method :get_chunk_raw, :get_chunk
54
+ def get_chunk(i)
55
+ chunks[i]
34
56
  end
35
57
 
36
58
  def null?(i)
@@ -143,5 +143,14 @@ module Arrow
143
143
  find_column(selector)
144
144
  end
145
145
  end
146
+
147
+ # Return column names in this object.
148
+ #
149
+ # @return [::Array<String>] column names.
150
+ #
151
+ # @since 11.0.0
152
+ def column_names
153
+ @column_names ||= columns.collect(&:name)
154
+ end
146
155
  end
147
156
  end
data/lib/arrow/column.rb CHANGED
@@ -27,6 +27,7 @@ module Arrow
27
27
  @index = index
28
28
  @field = @container.schema[@index]
29
29
  @data = @container.get_column_data(@index)
30
+ @container.share_input(@data)
30
31
  end
31
32
 
32
33
  def name
@@ -0,0 +1,32 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class HalfFloatArrayBuilder
20
+ private
21
+ def convert_to_arrow_value(value)
22
+ case value
23
+ when Float
24
+ HalfFloat.new(value).to_uint16
25
+ when HalfFloat
26
+ value.to_uint16
27
+ else
28
+ value
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class HalfFloatArray
20
+ def get_value(i)
21
+ HalfFloat.new(get_raw_value(i)).to_f
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,118 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class HalfFloat
20
+ MAX = 65504
21
+ MIN = -65504
22
+ EXPONENT_N_BITS = 5
23
+ EXPONENT_MASK = (2 ** EXPONENT_N_BITS) - 1
24
+ EXPONENT_BIAS = 15
25
+ FRACTION_N_BITS = 10
26
+ FRACTION_MASK = (2 ** FRACTION_N_BITS) - 1
27
+ FRACTION_DENOMINATOR = 2.0 ** FRACTION_N_BITS
28
+
29
+ attr_reader :sign
30
+ attr_reader :exponent
31
+ attr_reader :fraction
32
+ def initialize(*args)
33
+ n_args = args.size
34
+ case n_args
35
+ when 1
36
+ if args[0].is_a?(Float)
37
+ @sign, @exponent, @fraction = deconstruct_float(args[0])
38
+ else
39
+ @sign, @exponent, @fraction = deconstruct_uint16(args[0])
40
+ end
41
+ when 3
42
+ @sign, @exponent, @fraction = *args
43
+ else
44
+ message = "wrong number of arguments (given #{n_args}, expected 1 or 3)"
45
+ raise ArgumentError, message
46
+ end
47
+ end
48
+
49
+ def to_f
50
+ if @exponent == EXPONENT_MASK
51
+ if @sign.zero?
52
+ Float::INFINITY
53
+ else
54
+ -Float::INFINITY
55
+ end
56
+ else
57
+ if @exponent.zero?
58
+ implicit_fraction = 0
59
+ else
60
+ implicit_fraction = 1
61
+ end
62
+ ((-1) ** @sign) *
63
+ (2 ** (@exponent - EXPONENT_BIAS)) *
64
+ (implicit_fraction + @fraction / FRACTION_DENOMINATOR)
65
+ end
66
+ end
67
+
68
+ def to_uint16
69
+ (@sign << (EXPONENT_N_BITS + FRACTION_N_BITS)) ^
70
+ (@exponent << FRACTION_N_BITS) ^
71
+ @fraction
72
+ end
73
+
74
+ def pack
75
+ [to_uint16].pack("S")
76
+ end
77
+
78
+ private
79
+ def deconstruct_float(float)
80
+ if float > MAX
81
+ float = Float::INFINITY
82
+ elsif float < MIN
83
+ float = -Float::INFINITY
84
+ end
85
+ is_infinite = float.infinite?
86
+ if is_infinite
87
+ sign = (is_infinite == 1) ? 0 : 1
88
+ exponent = EXPONENT_MASK
89
+ fraction = 0
90
+ elsif float.zero?
91
+ sign = 0
92
+ exponent = 0
93
+ fraction = 0
94
+ else
95
+ sign = (float.positive? ? 0 : 1)
96
+ float_abs = float.abs
97
+ 1.upto(EXPONENT_MASK) do |e|
98
+ next_exponent_value = 2 ** (e + 1 - EXPONENT_BIAS)
99
+ next if float_abs > next_exponent_value
100
+ exponent = e
101
+ exponent_value = 2 ** (e - EXPONENT_BIAS)
102
+ fraction =
103
+ ((float_abs / exponent_value - 1) * FRACTION_DENOMINATOR).round
104
+ break
105
+ end
106
+ end
107
+ [sign, exponent, fraction]
108
+ end
109
+
110
+ def deconstruct_uint16(uint16)
111
+ # | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
112
+ sign = (uint16 >> (EXPONENT_N_BITS + FRACTION_N_BITS))
113
+ exponent = ((uint16 >> FRACTION_N_BITS) & EXPONENT_MASK)
114
+ fraction = (uint16 & FRACTION_MASK)
115
+ [sign, exponent, fraction]
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,29 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module InputReferable
20
+ def refer_input(input)
21
+ @input = input
22
+ end
23
+
24
+ def share_input(other)
25
+ return unless defined?(@input)
26
+ other.refer_input(@input)
27
+ end
28
+ end
29
+ end
data/lib/arrow/loader.rb CHANGED
@@ -39,6 +39,7 @@ module Arrow
39
39
  require "arrow/field-containable"
40
40
  require "arrow/generic-filterable"
41
41
  require "arrow/generic-takeable"
42
+ require "arrow/input-referable"
42
43
  require "arrow/record-containable"
43
44
  require "arrow/symbol-values-appendable"
44
45
 
@@ -81,6 +82,9 @@ module Arrow
81
82
  require "arrow/fixed-size-binary-array-builder"
82
83
  require "arrow/function"
83
84
  require "arrow/group"
85
+ require "arrow/half-float"
86
+ require "arrow/half-float-array"
87
+ require "arrow/half-float-array-builder"
84
88
  require "arrow/list-array-builder"
85
89
  require "arrow/list-data-type"
86
90
  require "arrow/map-array"
@@ -196,6 +200,7 @@ module Arrow
196
200
  "Arrow::Date64Array",
197
201
  "Arrow::Decimal128Array",
198
202
  "Arrow::Decimal256Array",
203
+ "Arrow::HalfFloatArray",
199
204
  "Arrow::Time32Array",
200
205
  "Arrow::Time64Array",
201
206
  "Arrow::TimestampArray"
@@ -35,14 +35,16 @@ module Arrow
35
35
  fields = []
36
36
  @values = []
37
37
  @raw_table.each do |name, array|
38
- if array.respond_to?(:to_arrow_array)
39
- array = array.to_arrow_array
38
+ if array.respond_to?(:to_arrow_chunked_array)
39
+ chunked_array = array.to_arrow_chunked_array
40
+ elsif array.respond_to?(:to_arrow_array)
41
+ chunked_array = ChunkedArray.new([array.to_arrow_array])
40
42
  else
41
43
  array = array.to_ary if array.respond_to?(:to_ary)
42
- array = ArrayBuilder.build(array)
44
+ chunked_array = ChunkedArray.new([ArrayBuilder.build(array)])
43
45
  end
44
- fields << Field.new(name.to_s, array.value_data_type)
45
- @values << array
46
+ fields << Field.new(name.to_s, chunked_array.value_data_type)
47
+ @values << chunked_array
46
48
  end
47
49
  @schema = Schema.new(fields)
48
50
  end
@@ -19,9 +19,11 @@ require "arrow/raw-table-converter"
19
19
 
20
20
  module Arrow
21
21
  class RecordBatch
22
+ include Enumerable
23
+
22
24
  include ColumnContainable
25
+ include InputReferable
23
26
  include RecordContainable
24
- include Enumerable
25
27
 
26
28
  class << self
27
29
  def new(*args)
@@ -56,7 +58,9 @@ module Arrow
56
58
  #
57
59
  # @since 0.12.0
58
60
  def to_table
59
- Table.new(schema, [self])
61
+ table = Table.new(schema, [self])
62
+ share_input(table)
63
+ table
60
64
  end
61
65
 
62
66
  def respond_to_missing?(name, include_private)
@@ -161,7 +161,7 @@ module Arrow
161
161
  record_batches << record_batch
162
162
  end
163
163
  table = Table.new(schema, record_batches)
164
- table.instance_variable_set(:@input, input)
164
+ table.refer_input(input)
165
165
  table
166
166
  end
167
167
 
@@ -211,7 +211,7 @@ module Arrow
211
211
  field_indexes = @options[:field_indexes]
212
212
  reader.set_field_indexes(field_indexes) if field_indexes
213
213
  table = reader.read_stripes
214
- table.instance_variable_set(:@input, input)
214
+ table.refer_input(input)
215
215
  table
216
216
  end
217
217
  end
@@ -245,7 +245,7 @@ module Arrow
245
245
  open_input_stream do |input|
246
246
  reader = FeatherFileReader.new(input)
247
247
  table = reader.read
248
- table.instance_variable_set(:@input, input)
248
+ table.refer_input(input)
249
249
  table
250
250
  end
251
251
  end
@@ -254,7 +254,7 @@ module Arrow
254
254
  open_input_stream do |input|
255
255
  reader = JSONReader.new(input)
256
256
  table = reader.read
257
- table.instance_variable_set(:@input, input)
257
+ table.refer_input(input)
258
258
  table
259
259
  end
260
260
  end
@@ -51,6 +51,7 @@ module Arrow
51
51
  raise ArgumentError, message
52
52
  end
53
53
  __send__(custom_save_method)
54
+ @table
54
55
  end
55
56
 
56
57
  private
data/lib/arrow/table.rb CHANGED
@@ -22,6 +22,7 @@ module Arrow
22
22
  include ColumnContainable
23
23
  include GenericFilterable
24
24
  include GenericTakeable
25
+ include InputReferable
25
26
  include RecordContainable
26
27
 
27
28
  class << self
@@ -188,6 +189,7 @@ module Arrow
188
189
 
189
190
  reader = TableBatchReader.new(self)
190
191
  while record_batch = reader.read_next
192
+ share_input(record_batch)
191
193
  yield(record_batch)
192
194
  end
193
195
  end
@@ -346,10 +348,12 @@ module Arrow
346
348
  end
347
349
  end
348
350
  if sliced_tables.size > 1
349
- sliced_tables[0].concatenate(sliced_tables[1..-1])
351
+ sliced_table = sliced_tables[0].concatenate(sliced_tables[1..-1])
350
352
  else
351
- sliced_tables[0]
353
+ sliced_table = sliced_tables[0]
352
354
  end
355
+ share_input(sliced_table)
356
+ sliced_table
353
357
  end
354
358
 
355
359
  # TODO
@@ -401,7 +405,9 @@ module Arrow
401
405
  new_fields << new_column[:field]
402
406
  new_arrays << new_column[:data]
403
407
  end
404
- self.class.new(new_fields, new_arrays)
408
+ table = self.class.new(new_fields, new_arrays)
409
+ share_input(table)
410
+ table
405
411
  end
406
412
 
407
413
  alias_method :remove_column_raw, :remove_column
@@ -423,7 +429,9 @@ module Arrow
423
429
  raise IndexError.new(message)
424
430
  end
425
431
  end
426
- remove_column_raw(index)
432
+ table = remove_column_raw(index)
433
+ share_input(table)
434
+ table
427
435
  end
428
436
 
429
437
  # Experimental
@@ -445,43 +453,59 @@ module Arrow
445
453
  packed_arrays = columns.collect do |column|
446
454
  column.data.pack
447
455
  end
448
- self.class.new(schema, packed_arrays)
456
+ table = self.class.new(schema, packed_arrays)
457
+ share_input(table)
458
+ table
449
459
  end
450
460
 
451
- # @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
452
- # @!macro join_common_before
453
- # @param right [Arrow::Table] The right table.
461
+ # Join another Table by matching with keys.
462
+ #
463
+ # @!macro join_common_before
464
+ # @param right [Arrow::Table] The right table.
465
+ #
466
+ # Join columns with `right` on join key columns.
467
+ #
468
+ # @!macro join_common_after
469
+ # @param type [Arrow::JoinType] How to join.
470
+ # @param left_outputs [::Array<String, Symbol>] Output columns in
471
+ # `self`.
472
+ #
473
+ # If both of `left_outputs` and `right_outputs` aren't
474
+ # specified, all columns in `self` and `right` are
475
+ # outputted.
476
+ # @param right_outputs [::Array<String, Symbol>] Output columns in
477
+ # `right`.
454
478
  #
455
- # Join columns with `right` on join key columns.
479
+ # If both of `left_outputs` and `right_outputs` aren't
480
+ # specified, all columns in `self` and `right` are
481
+ # outputted.
482
+ # @return [Arrow::Table]
483
+ # The joined `Arrow::Table`.
456
484
  #
457
- # @!macro join_common_after
458
- # @param type [Arrow::JoinType] How to join.
459
- # @param left_outputs [::Array<String, Symbol>] Output columns in
460
- # `self`.
485
+ # @overload join(right, type: :inner, left_outputs: nil, right_outputs: nil)
486
+ # If key(s) are not supplied, common keys in self and right are used.
461
487
  #
462
- # If both of `left_outputs` and `right_outputs` aren't
463
- # specified, all columns in `self` and `right` are
464
- # outputted.
465
- # @param right_outputs [::Array<String, Symbol>] Output columns in
466
- # `right`.
488
+ # @macro join_common_before
489
+ # @macro join_common_after
467
490
  #
468
- # If both of `left_outputs` and `right_outputs` aren't
469
- # specified, all columns in `self` and `right` are
470
- # outputted.
471
- # @return [Arrow::Table]
472
- # The joined `Arrow::Table`.
491
+ # @since 11.0.0
492
+ #
493
+ # @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
494
+ # Join right by a key.
473
495
  #
474
496
  # @macro join_common_before
475
497
  # @param key [String, Symbol] A join key.
476
498
  # @macro join_common_after
477
499
  #
478
500
  # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
501
+ # Join right by keys.
479
502
  #
480
503
  # @macro join_common_before
481
504
  # @param keys [::Array<String, Symbol>] Join keys.
482
505
  # @macro join_common_after
483
506
  #
484
507
  # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
508
+ # Join right by a key or keys mapped by a hash.
485
509
  #
486
510
  # @macro join_common_before
487
511
  # @param keys [Hash] Specify join keys in `self` and `right` separately.
@@ -492,7 +516,8 @@ module Arrow
492
516
  # @macro join_common_after
493
517
  #
494
518
  # @since 7.0.0
495
- def join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
519
+ def join(right, keys=nil, type: :inner, left_outputs: nil, right_outputs: nil)
520
+ keys ||= (column_names & right.column_names)
496
521
  plan = ExecutePlan.new
497
522
  left_node = plan.build_source_node(self)
498
523
  right_node = plan.build_source_node(right)
@@ -523,7 +548,9 @@ module Arrow
523
548
  plan.start
524
549
  plan.wait
525
550
  reader = sink_node_options.get_reader(hash_join_node.output_schema)
526
- reader.read_all
551
+ table = reader.read_all
552
+ share_input(table)
553
+ table
527
554
  end
528
555
 
529
556
  alias_method :to_s_raw, :to_s
data/lib/arrow/tensor.rb CHANGED
@@ -160,5 +160,9 @@ module Arrow
160
160
  nil,
161
161
  0)
162
162
  end
163
+
164
+ def to_arrow_chunked_array
165
+ ChunkedArray.new([to_arrow_array])
166
+ end
163
167
  end
164
168
  end
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "10.0.1"
19
+ VERSION = "11.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
49
  spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
50
- spec.add_runtime_dependency("extpp", ">= 0.0.7")
50
+ spec.add_runtime_dependency("extpp", ">= 0.1.1")
51
51
  spec.add_runtime_dependency("gio2", ">= 3.5.0")
52
52
  spec.add_runtime_dependency("native-package-installer")
53
53
  spec.add_runtime_dependency("pkg-config")
@@ -117,6 +117,16 @@ module RawRecordsBasicArraysTests
117
117
  assert_equal(records, target.raw_records)
118
118
  end
119
119
 
120
+ def test_half_float
121
+ records = [
122
+ [-1.5],
123
+ [nil],
124
+ [1.5],
125
+ ]
126
+ target = build({column: :half_float}, records)
127
+ assert_equal(records, target.raw_records)
128
+ end
129
+
120
130
  def test_float
121
131
  records = [
122
132
  [-1.0],
@@ -54,7 +54,8 @@ class DataTypeTest < Test::Unit::TestCase
54
54
  "abstract type: <:floating_point>: " +
55
55
  "use one of not abstract type: [" +
56
56
  "Arrow::DoubleDataType, " +
57
- "Arrow::FloatDataType]"
57
+ "Arrow::FloatDataType, " +
58
+ "Arrow::HalfFloatDataType]"
58
59
  assert_raise(ArgumentError.new(message)) do
59
60
  Arrow::DataType.resolve(:floating_point)
60
61
  end
@@ -0,0 +1,43 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class HalfFloatArrayTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("Float") do
21
+ array = Arrow::HalfFloatArray.new([1.5])
22
+ assert_equal([1.5], array.to_a)
23
+ end
24
+
25
+ test("Integer") do
26
+ one_half = Arrow::HalfFloat.new(1.5)
27
+ array = Arrow::HalfFloatArray.new([one_half.to_uint16])
28
+ assert_equal([one_half.to_f], array.to_a)
29
+ end
30
+
31
+ test("HalfFloat") do
32
+ one_half = Arrow::HalfFloat.new(1.5)
33
+ array = Arrow::HalfFloatArray.new([one_half])
34
+ assert_equal([one_half.to_f], array.to_a)
35
+ end
36
+ end
37
+
38
+ test("#[]") do
39
+ one_half = Arrow::HalfFloat.new(1.5)
40
+ array = Arrow::HalfFloatArray.new([one_half.to_uint16])
41
+ assert_equal(one_half.to_f, array[0])
42
+ end
43
+ end
@@ -0,0 +1,130 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ class HalfFloatTest < Test::Unit::TestCase
19
+ sub_test_case(".new") do
20
+ test("Array") do
21
+ positive_infinity = Arrow::HalfFloat.new(0b1, 0b11111, 0b0000000000)
22
+ assert_equal([0b1, 0b11111, 0b0000000000],
23
+ [
24
+ positive_infinity.sign,
25
+ positive_infinity.exponent,
26
+ positive_infinity.fraction,
27
+ ])
28
+ end
29
+
30
+ test("Integer - 0") do
31
+ zero = Arrow::HalfFloat.new(0)
32
+ assert_equal([0b0, 0b00000, 0b0000000000],
33
+ [
34
+ zero.sign,
35
+ zero.exponent,
36
+ zero.fraction,
37
+ ])
38
+ end
39
+
40
+ test("Integer - +infinity") do
41
+ positive_infinity = Arrow::HalfFloat.new(0x7c00)
42
+ assert_equal([0b0, 0b11111, 0b0000000000],
43
+ [
44
+ positive_infinity.sign,
45
+ positive_infinity.exponent,
46
+ positive_infinity.fraction,
47
+ ])
48
+ end
49
+
50
+ test("Integer - -infinity") do
51
+ negative_infinity = Arrow::HalfFloat.new(0xfc00)
52
+ assert_equal([0b1, 0b11111, 0b0000000000],
53
+ [
54
+ negative_infinity.sign,
55
+ negative_infinity.exponent,
56
+ negative_infinity.fraction,
57
+ ])
58
+ end
59
+
60
+ test("Integer - 1/3") do
61
+ one_thirds = Arrow::HalfFloat.new(0x3555)
62
+ assert_equal([0b0, 0b01101, 0b0101010101],
63
+ [
64
+ one_thirds.sign,
65
+ one_thirds.exponent,
66
+ one_thirds.fraction,
67
+ ])
68
+ end
69
+
70
+ test("Float - 0") do
71
+ zero = Arrow::HalfFloat.new(0.0)
72
+ assert_equal([0b0, 0b00000, 0b0000000000],
73
+ [
74
+ zero.sign,
75
+ zero.exponent,
76
+ zero.fraction,
77
+ ])
78
+ end
79
+
80
+ test("Float - too large") do
81
+ positive_infinity = Arrow::HalfFloat.new(65504.1)
82
+ assert_equal([0b0, 0b11111, 0b0000000000],
83
+ [
84
+ positive_infinity.sign,
85
+ positive_infinity.exponent,
86
+ positive_infinity.fraction,
87
+ ])
88
+ end
89
+
90
+ test("Float - +infinity") do
91
+ positive_infinity = Arrow::HalfFloat.new(Float::INFINITY)
92
+ assert_equal([0b0, 0b11111, 0b0000000000],
93
+ [
94
+ positive_infinity.sign,
95
+ positive_infinity.exponent,
96
+ positive_infinity.fraction,
97
+ ])
98
+ end
99
+
100
+ test("Float - too small") do
101
+ negative_infinity = Arrow::HalfFloat.new(-65504.1)
102
+ assert_equal([0b1, 0b11111, 0b0000000000],
103
+ [
104
+ negative_infinity.sign,
105
+ negative_infinity.exponent,
106
+ negative_infinity.fraction,
107
+ ])
108
+ end
109
+
110
+ test("Float - -infinity") do
111
+ negative_infinity = Arrow::HalfFloat.new(-Float::INFINITY)
112
+ assert_equal([0b1, 0b11111, 0b0000000000],
113
+ [
114
+ negative_infinity.sign,
115
+ negative_infinity.exponent,
116
+ negative_infinity.fraction,
117
+ ])
118
+ end
119
+
120
+ test("Float - 1/3") do
121
+ one_thirds = Arrow::HalfFloat.new((2 ** -2) * (1 + 341 / 1024.0))
122
+ assert_equal([0b0, 0b01101, 0b0101010101],
123
+ [
124
+ one_thirds.sign,
125
+ one_thirds.exponent,
126
+ one_thirds.fraction,
127
+ ])
128
+ end
129
+ end
130
+ end
data/test/test-table.rb CHANGED
@@ -41,8 +41,25 @@ class TableTest < Test::Unit::TestCase
41
41
  end
42
42
 
43
43
  sub_test_case(".new") do
44
+ test("{Symbol: Arrow::Array}") do
45
+ schema = Arrow::Schema.new(numbers: :int64)
46
+ assert_equal(Arrow::Table.new(schema,
47
+ [Arrow::Int64Array.new([1, 2, 3])]),
48
+ Arrow::Table.new(numbers: Arrow::Int64Array.new([1, 2, 3])))
49
+ end
50
+
51
+ test("{Symbol: Arrow::ChunkedArray}") do
52
+ chunked_array = Arrow::ChunkedArray.new([Arrow::Int64Array.new([1, 2, 3])])
53
+ schema = Arrow::Schema.new(numbers: :int64)
54
+ assert_equal(Arrow::Table.new(schema,
55
+ [Arrow::Int64Array.new([1, 2, 3])]),
56
+ Arrow::Table.new(numbers: chunked_array))
57
+ end
58
+
44
59
  test("{Symbol: Arrow::Tensor}") do
45
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
60
+ schema = Arrow::Schema.new(numbers: :uint8)
61
+ assert_equal(Arrow::Table.new(schema,
62
+ [Arrow::UInt8Array.new([1, 2, 3])]),
46
63
  Arrow::Table.new(numbers: Arrow::Tensor.new([1, 2, 3])))
47
64
  end
48
65
 
@@ -51,7 +68,8 @@ class TableTest < Test::Unit::TestCase
51
68
  def array_like.to_ary
52
69
  [1, 2, 3]
53
70
  end
54
- assert_equal(Arrow::Table.new(numbers: Arrow::UInt8Array.new([1, 2, 3])),
71
+ schema = Arrow::Schema.new(numbers: :uint8)
72
+ assert_equal(Arrow::Table.new(schema, [Arrow::UInt8Array.new([1, 2, 3])]),
55
73
  Arrow::Table.new(numbers: array_like))
56
74
  end
57
75
  end
@@ -584,6 +602,18 @@ class TableTest < Test::Unit::TestCase
584
602
  end
585
603
  end
586
604
 
605
+ sub_test_case("#column_names") do
606
+ test("unique") do
607
+ table = Arrow::Table.new(a: [1], b: [2], c: [3])
608
+ assert_equal(%w[a b c], table.column_names)
609
+ end
610
+
611
+ test("duplicated") do
612
+ table = Arrow::Table.new([["a", [1, 2, 3]], ["a", [4, 5, 6]]])
613
+ assert_equal(%w[a a], table.column_names)
614
+ end
615
+ end
616
+
587
617
  sub_test_case("#save and .load") do
588
618
  module SaveLoadFormatTests
589
619
  def test_default
@@ -690,6 +720,11 @@ class TableTest < Test::Unit::TestCase
690
720
  schema: @table.schema))
691
721
  end
692
722
 
723
+ test("csv, return value") do
724
+ output = create_output(".csv")
725
+ assert_equal(@table, @table.save(output))
726
+ end
727
+
693
728
  test("csv.gz") do
694
729
  output = create_output(".csv.gz")
695
730
  @table.save(output)
@@ -830,6 +865,76 @@ chris\t-1
830
865
  end
831
866
  end
832
867
  end
868
+
869
+ sub_test_case("GC") do
870
+ def setup
871
+ table = Arrow::Table.new(integer: [1, 2, 3],
872
+ string: ["a", "b", "c"])
873
+ @buffer = Arrow::ResizableBuffer.new(1024)
874
+ table.save(@buffer, format: :arrow)
875
+ @loaded_table = Arrow::Table.load(@buffer)
876
+ end
877
+
878
+ def test_chunked_array
879
+ chunked_array = @loaded_table[0].data
880
+ assert_equal(@buffer,
881
+ chunked_array.instance_variable_get(:@input).buffer)
882
+ end
883
+
884
+ def test_array
885
+ array = @loaded_table[0].data.chunks[0]
886
+ assert_equal(@buffer,
887
+ array.instance_variable_get(:@input).buffer)
888
+ end
889
+
890
+ def test_record_batch
891
+ record_batch = @loaded_table.each_record_batch.first
892
+ assert_equal(@buffer,
893
+ record_batch.instance_variable_get(:@input).buffer)
894
+ end
895
+
896
+ def test_record_batch_array
897
+ array = @loaded_table.each_record_batch.first[0].data
898
+ assert_equal(@buffer,
899
+ array.instance_variable_get(:@input).buffer)
900
+ end
901
+
902
+ def test_record_batch_table
903
+ table = @loaded_table.each_record_batch.first.to_table
904
+ assert_equal(@buffer,
905
+ table.instance_variable_get(:@input).buffer)
906
+ end
907
+
908
+ def test_slice
909
+ table = @loaded_table.slice(0..-1)
910
+ assert_equal(@buffer,
911
+ table.instance_variable_get(:@input).buffer)
912
+ end
913
+
914
+ def test_merge
915
+ table = @loaded_table.merge({})
916
+ assert_equal(@buffer,
917
+ table.instance_variable_get(:@input).buffer)
918
+ end
919
+
920
+ def test_remove_column
921
+ table = @loaded_table.remove_column(0)
922
+ assert_equal(@buffer,
923
+ table.instance_variable_get(:@input).buffer)
924
+ end
925
+
926
+ def test_pack
927
+ table = @loaded_table.pack
928
+ assert_equal(@buffer,
929
+ table.instance_variable_get(:@input).buffer)
930
+ end
931
+
932
+ def test_join
933
+ table = @loaded_table.join(@loaded_table, :integer)
934
+ assert_equal(@buffer,
935
+ table.instance_variable_get(:@input).buffer)
936
+ end
937
+ end
833
938
  end
834
939
 
835
940
  test("#pack") do
@@ -1026,6 +1131,20 @@ visible: false
1026
1131
  end
1027
1132
 
1028
1133
  sub_test_case("#join") do
1134
+ test("no keys") do
1135
+ table1 = Arrow::Table.new(key: [1, 2, 3],
1136
+ number: [10, 20, 30])
1137
+ table2 = Arrow::Table.new(key: [3, 1],
1138
+ string: ["three", "one"])
1139
+ assert_equal(Arrow::Table.new([
1140
+ ["key", [1, 3]],
1141
+ ["number", [10, 30]],
1142
+ ["key", [1, 3]],
1143
+ ["string", ["one", "three"]],
1144
+ ]),
1145
+ table1.join(table2))
1146
+ end
1147
+
1029
1148
  test("keys: String") do
1030
1149
  table1 = Arrow::Table.new(key: [1, 2, 3],
1031
1150
  number: [10, 20, 30])
@@ -1083,7 +1202,9 @@ visible: false
1083
1202
  ["right_key", [1, 3]],
1084
1203
  ["string", ["one", "three"]],
1085
1204
  ]),
1086
- table1.join(table2, {left: "left_key", right: :right_key}))
1205
+ table1.join(table2,
1206
+ {left: "left_key", right: :right_key},
1207
+ type: :inner))
1087
1208
  end
1088
1209
 
1089
1210
  test("keys: {left: [String, Symbol], right: [Symbol, String]}") do
@@ -1105,7 +1226,8 @@ visible: false
1105
1226
  {
1106
1227
  left: ["left_key1", :left_key2],
1107
1228
  right: [:right_key1, "right_key2"],
1108
- }))
1229
+ },
1230
+ type: :inner))
1109
1231
  end
1110
1232
 
1111
1233
  test("type:") do
@@ -107,6 +107,16 @@ module ValuesBasicArraysTests
107
107
  assert_equal(values, target.values)
108
108
  end
109
109
 
110
+ def test_half_float
111
+ values = [
112
+ -1.5,
113
+ nil,
114
+ 1.5,
115
+ ]
116
+ target = build(Arrow::HalfFloatArray.new(values))
117
+ assert_equal(values, target.values)
118
+ end
119
+
110
120
  def test_float
111
121
  values = [
112
122
  -1.0,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow
3
3
  version: !ruby/object:Gem::Version
4
- version: 10.0.1
4
+ version: 11.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Apache Arrow Developers
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-10 00:00:00.000000000 Z
11
+ date: 2023-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 0.0.7
33
+ version: 0.1.1
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: 0.0.7
40
+ version: 0.1.1
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: gio2
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -152,6 +152,10 @@ files:
152
152
  - lib/arrow/generic-filterable.rb
153
153
  - lib/arrow/generic-takeable.rb
154
154
  - lib/arrow/group.rb
155
+ - lib/arrow/half-float-array-builder.rb
156
+ - lib/arrow/half-float-array.rb
157
+ - lib/arrow/half-float.rb
158
+ - lib/arrow/input-referable.rb
155
159
  - lib/arrow/list-array-builder.rb
156
160
  - lib/arrow/list-data-type.rb
157
161
  - lib/arrow/loader.rb
@@ -263,6 +267,8 @@ files:
263
267
  - test/test-float-scalar.rb
264
268
  - test/test-function.rb
265
269
  - test/test-group.rb
270
+ - test/test-half-float-array.rb
271
+ - test/test-half-float.rb
266
272
  - test/test-list-array-builder.rb
267
273
  - test/test-list-array.rb
268
274
  - test/test-list-data-type.rb
@@ -308,7 +314,7 @@ homepage: https://arrow.apache.org/
308
314
  licenses:
309
315
  - Apache-2.0
310
316
  metadata:
311
- msys2_mingw_dependencies: arrow>=10.0.1
317
+ msys2_mingw_dependencies: arrow>=11.0.0
312
318
  post_install_message:
313
319
  rdoc_options: []
314
320
  require_paths:
@@ -324,7 +330,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
324
330
  - !ruby/object:Gem::Version
325
331
  version: '0'
326
332
  requirements: []
327
- rubygems_version: 3.3.15
333
+ rubygems_version: 3.5.0.dev
328
334
  signing_key:
329
335
  specification_version: 4
330
336
  summary: Red Arrow is the Ruby bindings of Apache Arrow
@@ -383,6 +389,8 @@ test_files:
383
389
  - test/test-float-scalar.rb
384
390
  - test/test-function.rb
385
391
  - test/test-group.rb
392
+ - test/test-half-float-array.rb
393
+ - test/test-half-float.rb
386
394
  - test/test-list-array-builder.rb
387
395
  - test/test-list-array.rb
388
396
  - test/test-list-data-type.rb