red-arrow 0.17.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 661c88599c8cb32f10fbea1e03994a861bb0a14da3617a0124c614428dc8e016
4
- data.tar.gz: b478f0588467345bdce0da3c8a10453767bb0ce9cf64586bbd38411363f4e423
3
+ metadata.gz: a45e003f0a453f175b1dbfc81c1fcf092cbfe964dd43e44d7b16e2087834ef5d
4
+ data.tar.gz: 5d1dc1d87a821d1ac4c49603d1d92d723230eb1f8a7cdd540dafe3715a53c12e
5
5
  SHA512:
6
- metadata.gz: e2c72551f2211db790cb0627754290a5d1ddf6583d0b14b830ee5922786a5096d1ad0e2b78849f8213b2164a4c2e6803fd6317d65d5a3f36cbf71b3c660bbd51
7
- data.tar.gz: b1271385c4e346627dd58340a369742307fb432ce29a0f2acab501c385c1677c50ed885e3528f5d895bc8a35d5e5a99ddc2ee79357c902cd2236186d6da647d5
6
+ metadata.gz: 85329e05ae20268d44a9ffff5fb3278263e5b77deda6f03fda31595b117252b7970fd17328ce2650c8650895a1813a1fb95799d3331a312bce2c276f3e5d55a4
7
+ data.tar.gz: fe90b0ff0dfb9b9126765818d54f7d4d203e6c72ab53dfb2680a48dc474fef09b8ed3f0553f3c1fe110d500d625ab2300563c3e4169403326c63c6bf27f33a5f
@@ -285,7 +285,8 @@ namespace red_arrow {
285
285
  // VISIT(Interval)
286
286
  VISIT(List)
287
287
  VISIT(Struct)
288
- VISIT(Union)
288
+ VISIT(SparseUnion)
289
+ VISIT(DenseUnion)
289
290
  VISIT(Dictionary)
290
291
  VISIT(Decimal128)
291
292
  // TODO
@@ -339,9 +340,9 @@ namespace red_arrow {
339
340
  index_ = index;
340
341
  result_ = rb_hash_new();
341
342
  const auto struct_type = array.struct_type();
342
- const auto n = struct_type->num_children();
343
+ const auto n = struct_type->num_fields();
343
344
  for (int i = 0; i < n; ++i) {
344
- const auto field_type = struct_type->child(i).get();
345
+ const auto field_type = struct_type->field(i).get();
345
346
  const auto& field_name = field_type->name();
346
347
  auto key_keep = key_;
347
348
  key_ = rb_utf8_str_new(field_name.data(), field_name.length());
@@ -388,7 +389,8 @@ namespace red_arrow {
388
389
  // VISIT(Interval)
389
390
  VISIT(List)
390
391
  VISIT(Struct)
391
- VISIT(Union)
392
+ VISIT(SparseUnion)
393
+ VISIT(DenseUnion)
392
394
  VISIT(Dictionary)
393
395
  VISIT(Decimal128)
394
396
  // TODO
@@ -432,10 +434,10 @@ namespace red_arrow {
432
434
  index_ = index;
433
435
  switch (array.mode()) {
434
436
  case arrow::UnionMode::SPARSE:
435
- convert_sparse(array);
437
+ convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
436
438
  break;
437
439
  case arrow::UnionMode::DENSE:
438
- convert_dense(array);
440
+ convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
439
441
  break;
440
442
  default:
441
443
  rb_raise(rb_eArgError, "Invalid union mode");
@@ -479,7 +481,8 @@ namespace red_arrow {
479
481
  // VISIT(Interval)
480
482
  VISIT(List)
481
483
  VISIT(Struct)
482
- VISIT(Union)
484
+ VISIT(SparseUnion)
485
+ VISIT(DenseUnion)
483
486
  VISIT(Dictionary)
484
487
  VISIT(Decimal128)
485
488
  // TODO
@@ -501,14 +504,14 @@ namespace red_arrow {
501
504
  result_ = result;
502
505
  }
503
506
 
504
- uint8_t compute_child_index(const arrow::UnionArray& array,
507
+ uint8_t compute_field_index(const arrow::UnionArray& array,
505
508
  arrow::UnionType* type,
506
509
  const char* tag) {
507
510
  const auto type_code = array.raw_type_codes()[index_];
508
511
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
- const auto child_id = type->child_ids()[type_code];
510
- if (child_id >= 0) {
511
- return child_id;
512
+ const auto field_id = type->child_ids()[type_code];
513
+ if (field_id >= 0) {
514
+ return field_id;
512
515
  }
513
516
  }
514
517
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -516,33 +519,33 @@ namespace red_arrow {
516
519
  return 0;
517
520
  }
518
521
 
519
- void convert_sparse(const arrow::UnionArray& array) {
522
+ void convert_sparse(const arrow::SparseUnionArray& array) {
520
523
  const auto type =
521
524
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
522
525
  const auto tag = "[raw-records][union-sparse-array]";
523
- const auto child_index = compute_child_index(array, type, tag);
524
- const auto child_field = type->child(child_index).get();
525
- const auto& field_name = child_field->name();
526
+ const auto index = compute_field_index(array, type, tag);
527
+ const auto field = type->field(index).get();
528
+ const auto& field_name = field->name();
526
529
  const auto field_name_keep = field_name_;
527
530
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
528
- const auto child_array = array.child(child_index).get();
529
- check_status(child_array->Accept(this), tag);
531
+ const auto field_array = array.field(index).get();
532
+ check_status(field_array->Accept(this), tag);
530
533
  field_name_ = field_name_keep;
531
534
  }
532
535
 
533
- void convert_dense(const arrow::UnionArray& array) {
536
+ void convert_dense(const arrow::DenseUnionArray& array) {
534
537
  const auto type =
535
538
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
536
539
  const auto tag = "[raw-records][union-dense-array]";
537
- const auto child_index = compute_child_index(array, type, tag);
538
- const auto child_field = type->child(child_index).get();
539
- const auto& field_name = child_field->name();
540
+ const auto index = compute_field_index(array, type, tag);
541
+ const auto field = type->field(index).get();
542
+ const auto& field_name = field->name();
540
543
  const auto field_name_keep = field_name_;
541
544
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
542
- const auto child_array = array.child(child_index);
545
+ const auto field_array = array.field(index);
543
546
  const auto index_keep = index_;
544
547
  index_ = array.value_offset(index_);
545
- check_status(child_array->Accept(this), tag);
548
+ check_status(field_array->Accept(this), tag);
546
549
  index_ = index_keep;
547
550
  field_name_ = field_name_keep;
548
551
  }
@@ -557,30 +560,57 @@ namespace red_arrow {
557
560
  public:
558
561
  explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
559
562
  : array_value_converter_(converter),
560
- index_(0),
563
+ value_index_(0),
561
564
  result_(Qnil) {
562
565
  }
563
566
 
564
567
  VALUE convert(const arrow::DictionaryArray& array,
565
568
  const int64_t index) {
566
- index_ = index;
567
- auto indices = array.indices().get();
568
- check_status(indices->Accept(this),
569
+ value_index_ = array.GetValueIndex(index);
570
+ auto dictionary = array.dictionary().get();
571
+ check_status(dictionary->Accept(this),
569
572
  "[raw-records][dictionary-array]");
570
573
  return result_;
571
574
  }
572
575
 
573
- // TODO: Convert to real value.
574
576
  #define VISIT(TYPE) \
575
577
  arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
576
- result_ = convert_value(array, index_); \
578
+ result_ = convert_value(array, value_index_); \
577
579
  return arrow::Status::OK(); \
578
580
  }
579
581
 
582
+ VISIT(Null)
583
+ VISIT(Boolean)
580
584
  VISIT(Int8)
581
585
  VISIT(Int16)
582
586
  VISIT(Int32)
583
587
  VISIT(Int64)
588
+ VISIT(UInt8)
589
+ VISIT(UInt16)
590
+ VISIT(UInt32)
591
+ VISIT(UInt64)
592
+ // TODO
593
+ // VISIT(HalfFloat)
594
+ VISIT(Float)
595
+ VISIT(Double)
596
+ VISIT(Binary)
597
+ VISIT(String)
598
+ VISIT(FixedSizeBinary)
599
+ VISIT(Date32)
600
+ VISIT(Date64)
601
+ VISIT(Time32)
602
+ VISIT(Time64)
603
+ VISIT(Timestamp)
604
+ // TODO
605
+ // VISIT(Interval)
606
+ VISIT(List)
607
+ VISIT(Struct)
608
+ VISIT(SparseUnion)
609
+ VISIT(DenseUnion)
610
+ VISIT(Dictionary)
611
+ VISIT(Decimal128)
612
+ // TODO
613
+ // VISIT(Extension)
584
614
 
585
615
  #undef VISIT
586
616
 
@@ -592,7 +622,7 @@ namespace red_arrow {
592
622
  }
593
623
 
594
624
  ArrayValueConverter* array_value_converter_;
595
- int64_t index_;
625
+ int64_t value_index_;
596
626
  VALUE result_;
597
627
  };
598
628
 
@@ -100,7 +100,8 @@ namespace red_arrow {
100
100
  // VISIT(Interval)
101
101
  VISIT(List)
102
102
  VISIT(Struct)
103
- VISIT(Union)
103
+ VISIT(SparseUnion)
104
+ VISIT(DenseUnion)
104
105
  VISIT(Dictionary)
105
106
  VISIT(Decimal128)
106
107
  // TODO
@@ -81,7 +81,8 @@ namespace red_arrow {
81
81
  // VISIT(Interval)
82
82
  VISIT(List)
83
83
  VISIT(Struct)
84
- VISIT(Union)
84
+ VISIT(SparseUnion)
85
+ VISIT(DenseUnion)
85
86
  VISIT(Dictionary)
86
87
  VISIT(Decimal128)
87
88
  // TODO
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Buffer
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ def initialize(data)
24
+ @data = data
25
+ initialize_raw(data)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DictionaryArray
20
+ def get_value(i)
21
+ dictionary[indices[i]]
22
+ end
23
+ end
24
+ end
@@ -41,6 +41,7 @@ module Arrow
41
41
  require "arrow/array"
42
42
  require "arrow/array-builder"
43
43
  require "arrow/bigdecimal-extension"
44
+ require "arrow/buffer"
44
45
  require "arrow/chunked-array"
45
46
  require "arrow/column"
46
47
  require "arrow/compression-type"
@@ -56,6 +57,7 @@ module Arrow
56
57
  require "arrow/decimal128-array-builder"
57
58
  require "arrow/decimal128-data-type"
58
59
  require "arrow/dense-union-data-type"
60
+ require "arrow/dictionary-array"
59
61
  require "arrow/dictionary-data-type"
60
62
  require "arrow/field"
61
63
  require "arrow/file-output-stream"
@@ -69,6 +71,7 @@ module Arrow
69
71
  require "arrow/record-batch"
70
72
  require "arrow/record-batch-builder"
71
73
  require "arrow/record-batch-file-reader"
74
+ require "arrow/record-batch-iterator"
72
75
  require "arrow/record-batch-stream-reader"
73
76
  require "arrow/rolling-window"
74
77
  require "arrow/schema"
@@ -0,0 +1,47 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RawTableConverter
20
+ attr_reader :n_rows
21
+ attr_reader :schema
22
+ attr_reader :values
23
+ def initialize(raw_table)
24
+ @raw_table = raw_table
25
+ convert
26
+ end
27
+
28
+ private
29
+ def convert
30
+ if @raw_table.is_a?(::Array) and @raw_table[0].is_a?(Column)
31
+ fields = @raw_table.collect(&:field)
32
+ @schema = Schema.new(fields)
33
+ @values = @raw_table.collect(&:data)
34
+ else
35
+ fields = []
36
+ @values = []
37
+ @raw_table.each do |name, array|
38
+ array = ArrayBuilder.build(array) if array.is_a?(::Array)
39
+ fields << Field.new(name.to_s, array.value_data_type)
40
+ @values << array
41
+ end
42
+ @schema = Schema.new(fields)
43
+ end
44
+ @n_rows = @values[0].length
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,22 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RecordBatchIterator
20
+ alias_method :to_a, :to_list
21
+ end
22
+ end
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "arrow/raw-table-converter"
19
+
18
20
  module Arrow
19
21
  class RecordBatch
20
22
  include ColumnContainable
@@ -25,13 +27,19 @@ module Arrow
25
27
  def new(*args)
26
28
  n_args = args.size
27
29
  case n_args
30
+ when 1
31
+ raw_table_converter = RawTableConverter.new(args[0])
32
+ n_rows = raw_table_converter.n_rows
33
+ schema = raw_table_converter.schema
34
+ values = raw_table_converter.values
35
+ super(schema, n_rows, values)
28
36
  when 2
29
37
  schema, data = args
30
38
  RecordBatchBuilder.build(schema, data)
31
39
  when 3
32
40
  super
33
41
  else
34
- message = "wrong number of arguments (given #{n_args}, expected 2..3)"
42
+ message = "wrong number of arguments (given #{n_args}, expected 1..3)"
35
43
  raise ArgumentError, message
36
44
  end
37
45
  end
@@ -32,7 +32,7 @@ module Arrow
32
32
  case index_or_name
33
33
  when String, Symbol
34
34
  name = index_or_name
35
- (@name_to_builder ||= build_name_to_builder)[name.to_s]
35
+ cached_name_to_builder[name.to_s]
36
36
  else
37
37
  index = index_or_name
38
38
  cached_field_builders[index]
@@ -70,13 +70,18 @@ module Arrow
70
70
  append_null
71
71
  when ::Array
72
72
  append_value_raw
73
- value.each_with_index do |sub_value, i|
74
- self[i].append(sub_value)
73
+ cached_field_builders.zip(value) do |builder, sub_value|
74
+ builder.append(sub_value)
75
75
  end
76
76
  when Hash
77
77
  append_value_raw
78
+ local_name_to_builder = cached_name_to_builder.dup
78
79
  value.each do |name, sub_value|
79
- self[name].append(sub_value)
80
+ builder = local_name_to_builder.delete(name.to_s)
81
+ builder.append(sub_value)
82
+ end
83
+ local_name_to_builder.each do |_, builder|
84
+ builder.append_null
80
85
  end
81
86
  else
82
87
  message =
@@ -108,9 +113,6 @@ module Arrow
108
113
  alias_method :append_null_raw, :append_null
109
114
  def append_null
110
115
  append_null_raw
111
- cached_field_builders.each do |builder|
112
- builder.append_null
113
- end
114
116
  end
115
117
 
116
118
  # @since 0.12.0
@@ -136,5 +138,9 @@ module Arrow
136
138
  end
137
139
  name_to_builder
138
140
  end
141
+
142
+ def cached_name_to_builder
143
+ @name_to_builder ||= build_name_to_builder
144
+ end
139
145
  end
140
146
  end