red-arrow 0.17.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 661c88599c8cb32f10fbea1e03994a861bb0a14da3617a0124c614428dc8e016
4
- data.tar.gz: b478f0588467345bdce0da3c8a10453767bb0ce9cf64586bbd38411363f4e423
3
+ metadata.gz: a45e003f0a453f175b1dbfc81c1fcf092cbfe964dd43e44d7b16e2087834ef5d
4
+ data.tar.gz: 5d1dc1d87a821d1ac4c49603d1d92d723230eb1f8a7cdd540dafe3715a53c12e
5
5
  SHA512:
6
- metadata.gz: e2c72551f2211db790cb0627754290a5d1ddf6583d0b14b830ee5922786a5096d1ad0e2b78849f8213b2164a4c2e6803fd6317d65d5a3f36cbf71b3c660bbd51
7
- data.tar.gz: b1271385c4e346627dd58340a369742307fb432ce29a0f2acab501c385c1677c50ed885e3528f5d895bc8a35d5e5a99ddc2ee79357c902cd2236186d6da647d5
6
+ metadata.gz: 85329e05ae20268d44a9ffff5fb3278263e5b77deda6f03fda31595b117252b7970fd17328ce2650c8650895a1813a1fb95799d3331a312bce2c276f3e5d55a4
7
+ data.tar.gz: fe90b0ff0dfb9b9126765818d54f7d4d203e6c72ab53dfb2680a48dc474fef09b8ed3f0553f3c1fe110d500d625ab2300563c3e4169403326c63c6bf27f33a5f
@@ -285,7 +285,8 @@ namespace red_arrow {
285
285
  // VISIT(Interval)
286
286
  VISIT(List)
287
287
  VISIT(Struct)
288
- VISIT(Union)
288
+ VISIT(SparseUnion)
289
+ VISIT(DenseUnion)
289
290
  VISIT(Dictionary)
290
291
  VISIT(Decimal128)
291
292
  // TODO
@@ -339,9 +340,9 @@ namespace red_arrow {
339
340
  index_ = index;
340
341
  result_ = rb_hash_new();
341
342
  const auto struct_type = array.struct_type();
342
- const auto n = struct_type->num_children();
343
+ const auto n = struct_type->num_fields();
343
344
  for (int i = 0; i < n; ++i) {
344
- const auto field_type = struct_type->child(i).get();
345
+ const auto field_type = struct_type->field(i).get();
345
346
  const auto& field_name = field_type->name();
346
347
  auto key_keep = key_;
347
348
  key_ = rb_utf8_str_new(field_name.data(), field_name.length());
@@ -388,7 +389,8 @@ namespace red_arrow {
388
389
  // VISIT(Interval)
389
390
  VISIT(List)
390
391
  VISIT(Struct)
391
- VISIT(Union)
392
+ VISIT(SparseUnion)
393
+ VISIT(DenseUnion)
392
394
  VISIT(Dictionary)
393
395
  VISIT(Decimal128)
394
396
  // TODO
@@ -432,10 +434,10 @@ namespace red_arrow {
432
434
  index_ = index;
433
435
  switch (array.mode()) {
434
436
  case arrow::UnionMode::SPARSE:
435
- convert_sparse(array);
437
+ convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
436
438
  break;
437
439
  case arrow::UnionMode::DENSE:
438
- convert_dense(array);
440
+ convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
439
441
  break;
440
442
  default:
441
443
  rb_raise(rb_eArgError, "Invalid union mode");
@@ -479,7 +481,8 @@ namespace red_arrow {
479
481
  // VISIT(Interval)
480
482
  VISIT(List)
481
483
  VISIT(Struct)
482
- VISIT(Union)
484
+ VISIT(SparseUnion)
485
+ VISIT(DenseUnion)
483
486
  VISIT(Dictionary)
484
487
  VISIT(Decimal128)
485
488
  // TODO
@@ -501,14 +504,14 @@ namespace red_arrow {
501
504
  result_ = result;
502
505
  }
503
506
 
504
- uint8_t compute_child_index(const arrow::UnionArray& array,
507
+ uint8_t compute_field_index(const arrow::UnionArray& array,
505
508
  arrow::UnionType* type,
506
509
  const char* tag) {
507
510
  const auto type_code = array.raw_type_codes()[index_];
508
511
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
- const auto child_id = type->child_ids()[type_code];
510
- if (child_id >= 0) {
511
- return child_id;
512
+ const auto field_id = type->child_ids()[type_code];
513
+ if (field_id >= 0) {
514
+ return field_id;
512
515
  }
513
516
  }
514
517
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -516,33 +519,33 @@ namespace red_arrow {
516
519
  return 0;
517
520
  }
518
521
 
519
- void convert_sparse(const arrow::UnionArray& array) {
522
+ void convert_sparse(const arrow::SparseUnionArray& array) {
520
523
  const auto type =
521
524
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
522
525
  const auto tag = "[raw-records][union-sparse-array]";
523
- const auto child_index = compute_child_index(array, type, tag);
524
- const auto child_field = type->child(child_index).get();
525
- const auto& field_name = child_field->name();
526
+ const auto index = compute_field_index(array, type, tag);
527
+ const auto field = type->field(index).get();
528
+ const auto& field_name = field->name();
526
529
  const auto field_name_keep = field_name_;
527
530
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
528
- const auto child_array = array.child(child_index).get();
529
- check_status(child_array->Accept(this), tag);
531
+ const auto field_array = array.field(index).get();
532
+ check_status(field_array->Accept(this), tag);
530
533
  field_name_ = field_name_keep;
531
534
  }
532
535
 
533
- void convert_dense(const arrow::UnionArray& array) {
536
+ void convert_dense(const arrow::DenseUnionArray& array) {
534
537
  const auto type =
535
538
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
536
539
  const auto tag = "[raw-records][union-dense-array]";
537
- const auto child_index = compute_child_index(array, type, tag);
538
- const auto child_field = type->child(child_index).get();
539
- const auto& field_name = child_field->name();
540
+ const auto index = compute_field_index(array, type, tag);
541
+ const auto field = type->field(index).get();
542
+ const auto& field_name = field->name();
540
543
  const auto field_name_keep = field_name_;
541
544
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
542
- const auto child_array = array.child(child_index);
545
+ const auto field_array = array.field(index);
543
546
  const auto index_keep = index_;
544
547
  index_ = array.value_offset(index_);
545
- check_status(child_array->Accept(this), tag);
548
+ check_status(field_array->Accept(this), tag);
546
549
  index_ = index_keep;
547
550
  field_name_ = field_name_keep;
548
551
  }
@@ -557,30 +560,57 @@ namespace red_arrow {
557
560
  public:
558
561
  explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
559
562
  : array_value_converter_(converter),
560
- index_(0),
563
+ value_index_(0),
561
564
  result_(Qnil) {
562
565
  }
563
566
 
564
567
  VALUE convert(const arrow::DictionaryArray& array,
565
568
  const int64_t index) {
566
- index_ = index;
567
- auto indices = array.indices().get();
568
- check_status(indices->Accept(this),
569
+ value_index_ = array.GetValueIndex(index);
570
+ auto dictionary = array.dictionary().get();
571
+ check_status(dictionary->Accept(this),
569
572
  "[raw-records][dictionary-array]");
570
573
  return result_;
571
574
  }
572
575
 
573
- // TODO: Convert to real value.
574
576
  #define VISIT(TYPE) \
575
577
  arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
576
- result_ = convert_value(array, index_); \
578
+ result_ = convert_value(array, value_index_); \
577
579
  return arrow::Status::OK(); \
578
580
  }
579
581
 
582
+ VISIT(Null)
583
+ VISIT(Boolean)
580
584
  VISIT(Int8)
581
585
  VISIT(Int16)
582
586
  VISIT(Int32)
583
587
  VISIT(Int64)
588
+ VISIT(UInt8)
589
+ VISIT(UInt16)
590
+ VISIT(UInt32)
591
+ VISIT(UInt64)
592
+ // TODO
593
+ // VISIT(HalfFloat)
594
+ VISIT(Float)
595
+ VISIT(Double)
596
+ VISIT(Binary)
597
+ VISIT(String)
598
+ VISIT(FixedSizeBinary)
599
+ VISIT(Date32)
600
+ VISIT(Date64)
601
+ VISIT(Time32)
602
+ VISIT(Time64)
603
+ VISIT(Timestamp)
604
+ // TODO
605
+ // VISIT(Interval)
606
+ VISIT(List)
607
+ VISIT(Struct)
608
+ VISIT(SparseUnion)
609
+ VISIT(DenseUnion)
610
+ VISIT(Dictionary)
611
+ VISIT(Decimal128)
612
+ // TODO
613
+ // VISIT(Extension)
584
614
 
585
615
  #undef VISIT
586
616
 
@@ -592,7 +622,7 @@ namespace red_arrow {
592
622
  }
593
623
 
594
624
  ArrayValueConverter* array_value_converter_;
595
- int64_t index_;
625
+ int64_t value_index_;
596
626
  VALUE result_;
597
627
  };
598
628
 
@@ -100,7 +100,8 @@ namespace red_arrow {
100
100
  // VISIT(Interval)
101
101
  VISIT(List)
102
102
  VISIT(Struct)
103
- VISIT(Union)
103
+ VISIT(SparseUnion)
104
+ VISIT(DenseUnion)
104
105
  VISIT(Dictionary)
105
106
  VISIT(Decimal128)
106
107
  // TODO
@@ -81,7 +81,8 @@ namespace red_arrow {
81
81
  // VISIT(Interval)
82
82
  VISIT(List)
83
83
  VISIT(Struct)
84
- VISIT(Union)
84
+ VISIT(SparseUnion)
85
+ VISIT(DenseUnion)
85
86
  VISIT(Dictionary)
86
87
  VISIT(Decimal128)
87
88
  // TODO
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Buffer
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ def initialize(data)
24
+ @data = data
25
+ initialize_raw(data)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DictionaryArray
20
+ def get_value(i)
21
+ dictionary[indices[i]]
22
+ end
23
+ end
24
+ end
@@ -41,6 +41,7 @@ module Arrow
41
41
  require "arrow/array"
42
42
  require "arrow/array-builder"
43
43
  require "arrow/bigdecimal-extension"
44
+ require "arrow/buffer"
44
45
  require "arrow/chunked-array"
45
46
  require "arrow/column"
46
47
  require "arrow/compression-type"
@@ -56,6 +57,7 @@ module Arrow
56
57
  require "arrow/decimal128-array-builder"
57
58
  require "arrow/decimal128-data-type"
58
59
  require "arrow/dense-union-data-type"
60
+ require "arrow/dictionary-array"
59
61
  require "arrow/dictionary-data-type"
60
62
  require "arrow/field"
61
63
  require "arrow/file-output-stream"
@@ -69,6 +71,7 @@ module Arrow
69
71
  require "arrow/record-batch"
70
72
  require "arrow/record-batch-builder"
71
73
  require "arrow/record-batch-file-reader"
74
+ require "arrow/record-batch-iterator"
72
75
  require "arrow/record-batch-stream-reader"
73
76
  require "arrow/rolling-window"
74
77
  require "arrow/schema"
@@ -0,0 +1,47 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RawTableConverter
20
+ attr_reader :n_rows
21
+ attr_reader :schema
22
+ attr_reader :values
23
+ def initialize(raw_table)
24
+ @raw_table = raw_table
25
+ convert
26
+ end
27
+
28
+ private
29
+ def convert
30
+ if @raw_table.is_a?(::Array) and @raw_table[0].is_a?(Column)
31
+ fields = @raw_table.collect(&:field)
32
+ @schema = Schema.new(fields)
33
+ @values = @raw_table.collect(&:data)
34
+ else
35
+ fields = []
36
+ @values = []
37
+ @raw_table.each do |name, array|
38
+ array = ArrayBuilder.build(array) if array.is_a?(::Array)
39
+ fields << Field.new(name.to_s, array.value_data_type)
40
+ @values << array
41
+ end
42
+ @schema = Schema.new(fields)
43
+ end
44
+ @n_rows = @values[0].length
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,22 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class RecordBatchIterator
20
+ alias_method :to_a, :to_list
21
+ end
22
+ end
@@ -15,6 +15,8 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ require "arrow/raw-table-converter"
19
+
18
20
  module Arrow
19
21
  class RecordBatch
20
22
  include ColumnContainable
@@ -25,13 +27,19 @@ module Arrow
25
27
  def new(*args)
26
28
  n_args = args.size
27
29
  case n_args
30
+ when 1
31
+ raw_table_converter = RawTableConverter.new(args[0])
32
+ n_rows = raw_table_converter.n_rows
33
+ schema = raw_table_converter.schema
34
+ values = raw_table_converter.values
35
+ super(schema, n_rows, values)
28
36
  when 2
29
37
  schema, data = args
30
38
  RecordBatchBuilder.build(schema, data)
31
39
  when 3
32
40
  super
33
41
  else
34
- message = "wrong number of arguments (given #{n_args}, expected 2..3)"
42
+ message = "wrong number of arguments (given #{n_args}, expected 1..3)"
35
43
  raise ArgumentError, message
36
44
  end
37
45
  end
@@ -32,7 +32,7 @@ module Arrow
32
32
  case index_or_name
33
33
  when String, Symbol
34
34
  name = index_or_name
35
- (@name_to_builder ||= build_name_to_builder)[name.to_s]
35
+ cached_name_to_builder[name.to_s]
36
36
  else
37
37
  index = index_or_name
38
38
  cached_field_builders[index]
@@ -70,13 +70,18 @@ module Arrow
70
70
  append_null
71
71
  when ::Array
72
72
  append_value_raw
73
- value.each_with_index do |sub_value, i|
74
- self[i].append(sub_value)
73
+ cached_field_builders.zip(value) do |builder, sub_value|
74
+ builder.append(sub_value)
75
75
  end
76
76
  when Hash
77
77
  append_value_raw
78
+ local_name_to_builder = cached_name_to_builder.dup
78
79
  value.each do |name, sub_value|
79
- self[name].append(sub_value)
80
+ builder = local_name_to_builder.delete(name.to_s)
81
+ builder.append(sub_value)
82
+ end
83
+ local_name_to_builder.each do |_, builder|
84
+ builder.append_null
80
85
  end
81
86
  else
82
87
  message =
@@ -108,9 +113,6 @@ module Arrow
108
113
  alias_method :append_null_raw, :append_null
109
114
  def append_null
110
115
  append_null_raw
111
- cached_field_builders.each do |builder|
112
- builder.append_null
113
- end
114
116
  end
115
117
 
116
118
  # @since 0.12.0
@@ -136,5 +138,9 @@ module Arrow
136
138
  end
137
139
  name_to_builder
138
140
  end
141
+
142
+ def cached_name_to_builder
143
+ @name_to_builder ||= build_name_to_builder
144
+ end
139
145
  end
140
146
  end