red-arrow 5.0.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/datum.rb +2 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +13 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +41 -3
- data/lib/arrow/table-saver.rb +29 -3
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +32 -38
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-expression.rb +40 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +190 -53
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +107 -76
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: c553e573f87398da7d492e8339684bb86b73d1fff02b56cff3277bb87d96a4cd
         | 
| 4 | 
            +
              data.tar.gz: 9b97be2878369da0bed9ac8621330926b8a95144d833de6798858a210f88c92a
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 0bc3526645457a551e25621ba3efaa52a3ad7398e8216c34440651beab58886ec8b52804b7afe227f953b727871980a2ac9a9b1153776499b48cd790ae476304
         | 
| 7 | 
            +
              data.tar.gz: 9f3032d5a54a9b0b911edd273916ce68f140c50753d13cb88b950166ea771138f432d738a3a694f44bfff985fcf8329457e97712dca10a7f8a8430d3d8cb82f6
         | 
    
        data/README.md
    CHANGED
    
    | @@ -50,3 +50,26 @@ table = Arrow::Table.load("/dev/shm/data.arrow") | |
| 50 50 | 
             
            # Process data in table
         | 
| 51 51 | 
             
            table.save("/dev/shm/data-processed.arrow")
         | 
| 52 52 | 
             
            ```
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            ## Development
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            Note that you need to install Apache Arrow C++/GLib at master before preparing Red Arrow. See also:
         | 
| 57 | 
            +
             | 
| 58 | 
            +
              * For Apache Arrow C++: https://arrow.apache.org/docs/developers/cpp/building.html
         | 
| 59 | 
            +
              * For Apache Arrow GLib: https://github.com/apache/arrow/blob/master/c_glib/README.md
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            ```console
         | 
| 62 | 
            +
            $ cd ruby/red-arrow
         | 
| 63 | 
            +
            $ bundle install
         | 
| 64 | 
            +
            $ bundle exec rake test
         | 
| 65 | 
            +
            ```
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            ### For macOS with Homebrew
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            ```console
         | 
| 70 | 
            +
            $ cd ruby/red-arrow
         | 
| 71 | 
            +
            $ bundle install
         | 
| 72 | 
            +
            $ brew install apache-arrow --head
         | 
| 73 | 
            +
            $ brew install apache-arrow-glib --head
         | 
| 74 | 
            +
            $ bundle exec rake test
         | 
| 75 | 
            +
            ```
         | 
    
        data/ext/arrow/converters.cpp
    CHANGED
    
    | @@ -30,6 +30,11 @@ namespace red_arrow { | |
| 30 30 | 
             
                return struct_array_value_converter_->convert(array, i);
         | 
| 31 31 | 
             
              }
         | 
| 32 32 |  | 
| 33 | 
            +
              VALUE ArrayValueConverter::convert(const arrow::MapArray& array,
         | 
| 34 | 
            +
                                                 const int64_t i) {
         | 
| 35 | 
            +
                return map_array_value_converter_->convert(array, i);
         | 
| 36 | 
            +
              }
         | 
| 37 | 
            +
             | 
| 33 38 | 
             
              VALUE ArrayValueConverter::convert(const arrow::UnionArray& array,
         | 
| 34 39 | 
             
                                                 const int64_t i) {
         | 
| 35 40 | 
             
                return union_array_value_converter_->convert(array, i);
         | 
    
        data/ext/arrow/converters.hpp
    CHANGED
    
    | @@ -29,6 +29,7 @@ | |
| 29 29 | 
             
            namespace red_arrow {
         | 
| 30 30 | 
             
              class ListArrayValueConverter;
         | 
| 31 31 | 
             
              class StructArrayValueConverter;
         | 
| 32 | 
            +
              class MapArrayValueConverter;
         | 
| 32 33 | 
             
              class UnionArrayValueConverter;
         | 
| 33 34 | 
             
              class DictionaryArrayValueConverter;
         | 
| 34 35 |  | 
| @@ -38,16 +39,19 @@ namespace red_arrow { | |
| 38 39 | 
             
                  : decimal_buffer_(),
         | 
| 39 40 | 
             
                    list_array_value_converter_(nullptr),
         | 
| 40 41 | 
             
                    struct_array_value_converter_(nullptr),
         | 
| 42 | 
            +
                    map_array_value_converter_(nullptr),
         | 
| 41 43 | 
             
                    union_array_value_converter_(nullptr),
         | 
| 42 44 | 
             
                    dictionary_array_value_converter_(nullptr) {
         | 
| 43 45 | 
             
                }
         | 
| 44 46 |  | 
| 45 47 | 
             
                inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter,
         | 
| 46 48 | 
             
                                                     StructArrayValueConverter* struct_array_value_converter,
         | 
| 49 | 
            +
                                                     MapArrayValueConverter* map_array_value_converter,
         | 
| 47 50 | 
             
                                                     UnionArrayValueConverter* union_array_value_converter,
         | 
| 48 51 | 
             
                                                     DictionaryArrayValueConverter* dictionary_array_value_converter) {
         | 
| 49 52 | 
             
                  list_array_value_converter_ = list_array_value_converter;
         | 
| 50 53 | 
             
                  struct_array_value_converter_ = struct_array_value_converter;
         | 
| 54 | 
            +
                  map_array_value_converter_ = map_array_value_converter;
         | 
| 51 55 | 
             
                  union_array_value_converter_ = union_array_value_converter;
         | 
| 52 56 | 
             
                  dictionary_array_value_converter_ = dictionary_array_value_converter;
         | 
| 53 57 | 
             
                }
         | 
| @@ -204,6 +208,9 @@ namespace red_arrow { | |
| 204 208 | 
             
                VALUE convert(const arrow::StructArray& array,
         | 
| 205 209 | 
             
                              const int64_t i);
         | 
| 206 210 |  | 
| 211 | 
            +
                VALUE convert(const arrow::MapArray& array,
         | 
| 212 | 
            +
                              const int64_t i);
         | 
| 213 | 
            +
             | 
| 207 214 | 
             
                VALUE convert(const arrow::UnionArray& array,
         | 
| 208 215 | 
             
                              const int64_t i);
         | 
| 209 216 |  | 
| @@ -234,6 +241,7 @@ namespace red_arrow { | |
| 234 241 | 
             
                std::string decimal_buffer_;
         | 
| 235 242 | 
             
                ListArrayValueConverter* list_array_value_converter_;
         | 
| 236 243 | 
             
                StructArrayValueConverter* struct_array_value_converter_;
         | 
| 244 | 
            +
                MapArrayValueConverter* map_array_value_converter_;
         | 
| 237 245 | 
             
                UnionArrayValueConverter* union_array_value_converter_;
         | 
| 238 246 | 
             
                DictionaryArrayValueConverter* dictionary_array_value_converter_;
         | 
| 239 247 | 
             
              };
         | 
| @@ -294,6 +302,7 @@ namespace red_arrow { | |
| 294 302 | 
             
                // VISIT(Interval)
         | 
| 295 303 | 
             
                VISIT(List)
         | 
| 296 304 | 
             
                VISIT(Struct)
         | 
| 305 | 
            +
                VISIT(Map)
         | 
| 297 306 | 
             
                VISIT(SparseUnion)
         | 
| 298 307 | 
             
                VISIT(DenseUnion)
         | 
| 299 308 | 
             
                VISIT(Dictionary)
         | 
| @@ -399,6 +408,7 @@ namespace red_arrow { | |
| 399 408 | 
             
                // VISIT(Interval)
         | 
| 400 409 | 
             
                VISIT(List)
         | 
| 401 410 | 
             
                VISIT(Struct)
         | 
| 411 | 
            +
                VISIT(Map)
         | 
| 402 412 | 
             
                VISIT(SparseUnion)
         | 
| 403 413 | 
             
                VISIT(DenseUnion)
         | 
| 404 414 | 
             
                VISIT(Dictionary)
         | 
| @@ -431,6 +441,117 @@ namespace red_arrow { | |
| 431 441 | 
             
                VALUE result_;
         | 
| 432 442 | 
             
              };
         | 
| 433 443 |  | 
| 444 | 
            +
              class MapArrayValueConverter : public arrow::ArrayVisitor {
         | 
| 445 | 
            +
              public:
         | 
| 446 | 
            +
                explicit MapArrayValueConverter(ArrayValueConverter* converter)
         | 
| 447 | 
            +
                  : array_value_converter_(converter),
         | 
| 448 | 
            +
                    offset_(0),
         | 
| 449 | 
            +
                    length_(0),
         | 
| 450 | 
            +
                    values_(Qnil) {}
         | 
| 451 | 
            +
             | 
| 452 | 
            +
                VALUE convert(const arrow::MapArray& array,
         | 
| 453 | 
            +
                              const int64_t index) {
         | 
| 454 | 
            +
                  auto key_array = array.keys().get();
         | 
| 455 | 
            +
                  auto item_array = array.items().get();
         | 
| 456 | 
            +
                  auto offset_keep = offset_;
         | 
| 457 | 
            +
                  auto length_keep = length_;
         | 
| 458 | 
            +
                  auto values_keep = values_;
         | 
| 459 | 
            +
                  offset_ = array.value_offset(index);
         | 
| 460 | 
            +
                  length_ = array.value_length(index);
         | 
| 461 | 
            +
                  auto keys = rb_ary_new_capa(length_);
         | 
| 462 | 
            +
                  values_ = keys;
         | 
| 463 | 
            +
                  check_status(key_array->Accept(this),
         | 
| 464 | 
            +
                               "[raw-records][map-array][keys]");
         | 
| 465 | 
            +
                  auto items = rb_ary_new_capa(length_);
         | 
| 466 | 
            +
                  values_ = items;
         | 
| 467 | 
            +
                  check_status(item_array->Accept(this),
         | 
| 468 | 
            +
                               "[raw-records][map-array][items]");
         | 
| 469 | 
            +
                  auto map = rb_hash_new();
         | 
| 470 | 
            +
                  auto n = RARRAY_LEN(keys);
         | 
| 471 | 
            +
                  auto raw_keys = RARRAY_CONST_PTR(keys);
         | 
| 472 | 
            +
                  auto raw_items = RARRAY_CONST_PTR(items);
         | 
| 473 | 
            +
                  for (long i = 0; i < n; ++i) {
         | 
| 474 | 
            +
                    rb_hash_aset(map, raw_keys[i], raw_items[i]);
         | 
| 475 | 
            +
                  }
         | 
| 476 | 
            +
                  offset_ = offset_keep;
         | 
| 477 | 
            +
                  length_ = length_keep;
         | 
| 478 | 
            +
                  values_ = values_keep;
         | 
| 479 | 
            +
                  return map;
         | 
| 480 | 
            +
                }
         | 
| 481 | 
            +
             | 
| 482 | 
            +
            #define VISIT(TYPE)                                                     \
         | 
| 483 | 
            +
                arrow::Status Visit(const arrow::TYPE ## Array& array) override {   \
         | 
| 484 | 
            +
                  return visit_value(array);                                        \
         | 
| 485 | 
            +
                }
         | 
| 486 | 
            +
             | 
| 487 | 
            +
                VISIT(Null)
         | 
| 488 | 
            +
                VISIT(Boolean)
         | 
| 489 | 
            +
                VISIT(Int8)
         | 
| 490 | 
            +
                VISIT(Int16)
         | 
| 491 | 
            +
                VISIT(Int32)
         | 
| 492 | 
            +
                VISIT(Int64)
         | 
| 493 | 
            +
                VISIT(UInt8)
         | 
| 494 | 
            +
                VISIT(UInt16)
         | 
| 495 | 
            +
                VISIT(UInt32)
         | 
| 496 | 
            +
                VISIT(UInt64)
         | 
| 497 | 
            +
                // TODO
         | 
| 498 | 
            +
                // VISIT(HalfFloat)
         | 
| 499 | 
            +
                VISIT(Float)
         | 
| 500 | 
            +
                VISIT(Double)
         | 
| 501 | 
            +
                VISIT(Binary)
         | 
| 502 | 
            +
                VISIT(String)
         | 
| 503 | 
            +
                VISIT(FixedSizeBinary)
         | 
| 504 | 
            +
                VISIT(Date32)
         | 
| 505 | 
            +
                VISIT(Date64)
         | 
| 506 | 
            +
                VISIT(Time32)
         | 
| 507 | 
            +
                VISIT(Time64)
         | 
| 508 | 
            +
                VISIT(Timestamp)
         | 
| 509 | 
            +
                // TODO
         | 
| 510 | 
            +
                // VISIT(Interval)
         | 
| 511 | 
            +
                VISIT(List)
         | 
| 512 | 
            +
                VISIT(Struct)
         | 
| 513 | 
            +
                VISIT(Map)
         | 
| 514 | 
            +
                VISIT(SparseUnion)
         | 
| 515 | 
            +
                VISIT(DenseUnion)
         | 
| 516 | 
            +
                VISIT(Dictionary)
         | 
| 517 | 
            +
                VISIT(Decimal128)
         | 
| 518 | 
            +
                VISIT(Decimal256)
         | 
| 519 | 
            +
                // TODO
         | 
| 520 | 
            +
                // VISIT(Extension)
         | 
| 521 | 
            +
             | 
| 522 | 
            +
            #undef VISIT
         | 
| 523 | 
            +
             | 
| 524 | 
            +
              private:
         | 
| 525 | 
            +
                template <typename ArrayType>
         | 
| 526 | 
            +
                inline VALUE convert_value(const ArrayType& array,
         | 
| 527 | 
            +
                                           const int64_t i) {
         | 
| 528 | 
            +
                  return array_value_converter_->convert(array, i);
         | 
| 529 | 
            +
                }
         | 
| 530 | 
            +
             | 
| 531 | 
            +
                template <typename ArrayType>
         | 
| 532 | 
            +
                arrow::Status visit_value(const ArrayType& array) {
         | 
| 533 | 
            +
                  if (array.null_count() > 0) {
         | 
| 534 | 
            +
                    for (int64_t i = 0; i < length_; ++i) {
         | 
| 535 | 
            +
                      auto value = Qnil;
         | 
| 536 | 
            +
                      if (!array.IsNull(i + offset_)) {
         | 
| 537 | 
            +
                        value = convert_value(array, i + offset_);
         | 
| 538 | 
            +
                      }
         | 
| 539 | 
            +
                      rb_ary_push(values_, value);
         | 
| 540 | 
            +
                    }
         | 
| 541 | 
            +
                  } else {
         | 
| 542 | 
            +
                    for (int64_t i = 0; i < length_; ++i) {
         | 
| 543 | 
            +
                      rb_ary_push(values_, convert_value(array, i + offset_));
         | 
| 544 | 
            +
                    }
         | 
| 545 | 
            +
                  }
         | 
| 546 | 
            +
                  return arrow::Status::OK();
         | 
| 547 | 
            +
                }
         | 
| 548 | 
            +
             | 
| 549 | 
            +
                ArrayValueConverter* array_value_converter_;
         | 
| 550 | 
            +
                int32_t offset_;
         | 
| 551 | 
            +
                int32_t length_;
         | 
| 552 | 
            +
                VALUE values_;
         | 
| 553 | 
            +
              };
         | 
| 554 | 
            +
             | 
| 434 555 | 
             
              class UnionArrayValueConverter : public arrow::ArrayVisitor {
         | 
| 435 556 | 
             
              public:
         | 
| 436 557 | 
             
                explicit UnionArrayValueConverter(ArrayValueConverter* converter)
         | 
| @@ -492,6 +613,7 @@ namespace red_arrow { | |
| 492 613 | 
             
                // VISIT(Interval)
         | 
| 493 614 | 
             
                VISIT(List)
         | 
| 494 615 | 
             
                VISIT(Struct)
         | 
| 616 | 
            +
                VISIT(Map)
         | 
| 495 617 | 
             
                VISIT(SparseUnion)
         | 
| 496 618 | 
             
                VISIT(DenseUnion)
         | 
| 497 619 | 
             
                VISIT(Dictionary)
         | 
| @@ -617,6 +739,7 @@ namespace red_arrow { | |
| 617 739 | 
             
                // VISIT(Interval)
         | 
| 618 740 | 
             
                VISIT(List)
         | 
| 619 741 | 
             
                VISIT(Struct)
         | 
| 742 | 
            +
                VISIT(Map)
         | 
| 620 743 | 
             
                VISIT(SparseUnion)
         | 
| 621 744 | 
             
                VISIT(DenseUnion)
         | 
| 622 745 | 
             
                VISIT(Dictionary)
         | 
| @@ -645,11 +768,13 @@ namespace red_arrow { | |
| 645 768 | 
             
                  : array_value_converter_(),
         | 
| 646 769 | 
             
                    list_array_value_converter_(&array_value_converter_),
         | 
| 647 770 | 
             
                    struct_array_value_converter_(&array_value_converter_),
         | 
| 771 | 
            +
                    map_array_value_converter_(&array_value_converter_),
         | 
| 648 772 | 
             
                    union_array_value_converter_(&array_value_converter_),
         | 
| 649 773 | 
             
                    dictionary_array_value_converter_(&array_value_converter_) {
         | 
| 650 774 | 
             
                  array_value_converter_.
         | 
| 651 775 | 
             
                    set_sub_value_converters(&list_array_value_converter_,
         | 
| 652 776 | 
             
                                             &struct_array_value_converter_,
         | 
| 777 | 
            +
                                             &map_array_value_converter_,
         | 
| 653 778 | 
             
                                             &union_array_value_converter_,
         | 
| 654 779 | 
             
                                             &dictionary_array_value_converter_);
         | 
| 655 780 | 
             
                }
         | 
| @@ -663,6 +788,7 @@ namespace red_arrow { | |
| 663 788 | 
             
                ArrayValueConverter array_value_converter_;
         | 
| 664 789 | 
             
                ListArrayValueConverter list_array_value_converter_;
         | 
| 665 790 | 
             
                StructArrayValueConverter struct_array_value_converter_;
         | 
| 791 | 
            +
                MapArrayValueConverter map_array_value_converter_;
         | 
| 666 792 | 
             
                UnionArrayValueConverter union_array_value_converter_;
         | 
| 667 793 | 
             
                DictionaryArrayValueConverter dictionary_array_value_converter_;
         | 
| 668 794 | 
             
              };
         | 
    
        data/ext/arrow/extconf.rb
    CHANGED
    
    | @@ -25,6 +25,19 @@ if arrow_pkg_config_path | |
| 25 25 | 
             
              ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
         | 
| 26 26 | 
             
            end
         | 
| 27 27 |  | 
| 28 | 
            +
            checking_for(checking_message("Homebrew")) do
         | 
| 29 | 
            +
              platform = NativePackageInstaller::Platform.detect
         | 
| 30 | 
            +
              if platform.is_a?(NativePackageInstaller::Platform::Homebrew)
         | 
| 31 | 
            +
                openssl_prefix = `brew --prefix openssl@1.1`.chomp
         | 
| 32 | 
            +
                unless openssl_prefix.empty?
         | 
| 33 | 
            +
                  PKGConfig.add_path("#{openssl_prefix}/lib/pkgconfig")
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
                true
         | 
| 36 | 
            +
              else
         | 
| 37 | 
            +
                false
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
            end
         | 
| 40 | 
            +
             | 
| 28 41 | 
             
            unless required_pkg_config_package([
         | 
| 29 42 | 
             
                                                 "arrow",
         | 
| 30 43 | 
             
                                                 Arrow::Version::MAJOR,
         | 
    
        data/ext/arrow/raw-records.cpp
    CHANGED
    
    
    
        data/ext/arrow/values.cpp
    CHANGED
    
    
| @@ -0,0 +1,35 @@ | |
| 1 | 
            +
            # Licensed to the Apache Software Foundation (ASF) under one
         | 
| 2 | 
            +
            # or more contributor license agreements.  See the NOTICE file
         | 
| 3 | 
            +
            # distributed with this work for additional information
         | 
| 4 | 
            +
            # regarding copyright ownership.  The ASF licenses this file
         | 
| 5 | 
            +
            # to you under the Apache License, Version 2.0 (the
         | 
| 6 | 
            +
            # "License"); you may not use this file except in compliance
         | 
| 7 | 
            +
            # with the License.  You may obtain a copy of the License at
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            #   http://www.apache.org/licenses/LICENSE-2.0
         | 
| 10 | 
            +
            #
         | 
| 11 | 
            +
            # Unless required by applicable law or agreed to in writing,
         | 
| 12 | 
            +
            # software distributed under the License is distributed on an
         | 
| 13 | 
            +
            # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
         | 
| 14 | 
            +
            # KIND, either express or implied.  See the License for the
         | 
| 15 | 
            +
            # specific language governing permissions and limitations
         | 
| 16 | 
            +
            # under the License.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            module Arrow
         | 
| 19 | 
            +
              class AggregateNodeOptions
         | 
| 20 | 
            +
                class << self
         | 
| 21 | 
            +
                  # @api private
         | 
| 22 | 
            +
                  def try_convert(value)
         | 
| 23 | 
            +
                    case value
         | 
| 24 | 
            +
                    when Hash
         | 
| 25 | 
            +
                      aggregations = value[:aggregations]
         | 
| 26 | 
            +
                      return nil if aggregations.nil?
         | 
| 27 | 
            +
                      keys = value[:keys]
         | 
| 28 | 
            +
                      new(aggregations, keys)
         | 
| 29 | 
            +
                    else
         | 
| 30 | 
            +
                      nil
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
              end
         | 
| 35 | 
            +
            end
         | 
| @@ -0,0 +1,46 @@ | |
| 1 | 
            +
            # Licensed to the Apache Software Foundation (ASF) under one
         | 
| 2 | 
            +
            # or more contributor license agreements.  See the NOTICE file
         | 
| 3 | 
            +
            # distributed with this work for additional information
         | 
| 4 | 
            +
            # regarding copyright ownership.  The ASF licenses this file
         | 
| 5 | 
            +
            # to you under the Apache License, Version 2.0 (the
         | 
| 6 | 
            +
            # "License"); you may not use this file except in compliance
         | 
| 7 | 
            +
            # with the License.  You may obtain a copy of the License at
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            #   http://www.apache.org/licenses/LICENSE-2.0
         | 
| 10 | 
            +
            #
         | 
| 11 | 
            +
            # Unless required by applicable law or agreed to in writing,
         | 
| 12 | 
            +
            # software distributed under the License is distributed on an
         | 
| 13 | 
            +
            # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
         | 
| 14 | 
            +
            # KIND, either express or implied.  See the License for the
         | 
| 15 | 
            +
            # specific language governing permissions and limitations
         | 
| 16 | 
            +
            # under the License.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            module Arrow
         | 
| 19 | 
            +
              class Aggregation
         | 
| 20 | 
            +
                class << self
         | 
| 21 | 
            +
                  # @api private
         | 
| 22 | 
            +
                  def try_convert(value)
         | 
| 23 | 
            +
                    case value
         | 
| 24 | 
            +
                    when Hash
         | 
| 25 | 
            +
                      function = value[:function]
         | 
| 26 | 
            +
                      return nil if function.nil?
         | 
| 27 | 
            +
                      function = function.to_s if function.is_a?(Symbol)
         | 
| 28 | 
            +
                      return nil unless function.is_a?(String)
         | 
| 29 | 
            +
                      # TODO: Improve this when we have non hash based aggregate function
         | 
| 30 | 
            +
                      function = "hash_#{function}" unless function.start_with?("hash_")
         | 
| 31 | 
            +
                      options = value[:options]
         | 
| 32 | 
            +
                      input = value[:input]
         | 
| 33 | 
            +
                      return nil if input.nil?
         | 
| 34 | 
            +
                      output = value[:output]
         | 
| 35 | 
            +
                      if output.nil?
         | 
| 36 | 
            +
                        normalized_function = function.gsub(/\Ahash_/, "")
         | 
| 37 | 
            +
                        output = "#{normalized_function}(#{input})"
         | 
| 38 | 
            +
                      end
         | 
| 39 | 
            +
                      new(function, options, input, output)
         | 
| 40 | 
            +
                    else
         | 
| 41 | 
            +
                      nil
         | 
| 42 | 
            +
                    end
         | 
| 43 | 
            +
                  end
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
              end
         | 
| 46 | 
            +
            end
         | 
    
        data/lib/arrow/array-builder.rb
    CHANGED
    
    
| @@ -0,0 +1,27 @@ | |
| 1 | 
            +
            # Licensed to the Apache Software Foundation (ASF) under one
         | 
| 2 | 
            +
            # or more contributor license agreements.  See the NOTICE file
         | 
| 3 | 
            +
            # distributed with this work for additional information
         | 
| 4 | 
            +
            # regarding copyright ownership.  The ASF licenses this file
         | 
| 5 | 
            +
            # to you under the Apache License, Version 2.0 (the
         | 
| 6 | 
            +
            # "License"); you may not use this file except in compliance
         | 
| 7 | 
            +
            # with the License.  You may obtain a copy of the License at
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            #   http://www.apache.org/licenses/LICENSE-2.0
         | 
| 10 | 
            +
            #
         | 
| 11 | 
            +
            # Unless required by applicable law or agreed to in writing,
         | 
| 12 | 
            +
            # software distributed under the License is distributed on an
         | 
| 13 | 
            +
            # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
         | 
| 14 | 
            +
            # KIND, either express or implied.  See the License for the
         | 
| 15 | 
            +
            # specific language governing permissions and limitations
         | 
| 16 | 
            +
            # under the License.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            module Arrow
         | 
| 19 | 
            +
              class BinaryDictionaryArrayBuilder
         | 
| 20 | 
            +
                include SymbolValuesAppendable
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                private
         | 
| 23 | 
            +
                def create_values_array_builder
         | 
| 24 | 
            +
                  BinaryArrayBuilder.new
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
            end
         | 
| @@ -27,6 +27,17 @@ module Arrow | |
| 27 27 | 
             
                  columns.each(&block)
         | 
| 28 28 | 
             
                end
         | 
| 29 29 |  | 
| 30 | 
            +
                # @overload [](name)
         | 
| 31 | 
            +
                #   Find a column that has the given name.
         | 
| 32 | 
            +
                #
         | 
| 33 | 
            +
                #   @param name [String, Symbol] The column name to be found.
         | 
| 34 | 
            +
                #   @return [Column] The found column.
         | 
| 35 | 
            +
                #
         | 
| 36 | 
            +
                # @overload [](index)
         | 
| 37 | 
            +
                #   Find the `index`-th column.
         | 
| 38 | 
            +
                #
         | 
| 39 | 
            +
                #   @param index [Integer] The index to be found.
         | 
| 40 | 
            +
                #   @return [Column] The found column.
         | 
| 30 41 | 
             
                def find_column(name_or_index)
         | 
| 31 42 | 
             
                  case name_or_index
         | 
| 32 43 | 
             
                  when String, Symbol
         | 
| @@ -40,9 +51,97 @@ module Arrow | |
| 40 51 | 
             
                    return nil if index < 0 or index >= n_columns
         | 
| 41 52 | 
             
                    Column.new(self, index)
         | 
| 42 53 | 
             
                  else
         | 
| 43 | 
            -
                    message = "column name or index must be String, Symbol or Integer"
         | 
| 54 | 
            +
                    message = "column name or index must be String, Symbol or Integer: "
         | 
| 55 | 
            +
                    message << name_or_index.inspect
         | 
| 44 56 | 
             
                    raise ArgumentError, message
         | 
| 45 57 | 
             
                  end
         | 
| 46 58 | 
             
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                # Selects columns that are selected by `selectors` and/or `block`
         | 
| 61 | 
            +
                # and creates a new container only with the selected columns.
         | 
| 62 | 
            +
                #
         | 
| 63 | 
            +
                # @param selectors [Array<String, Symbol, Integer, Range>]
         | 
| 64 | 
            +
                #   If a selector is `String`, `Symbol` or `Integer`, the selector
         | 
| 65 | 
            +
                #   selects a column by {#find_column}.
         | 
| 66 | 
            +
                #
         | 
| 67 | 
            +
                #   If a selector is `Range`, the selector selects columns by `::Array#[]`.
         | 
| 68 | 
            +
                # @yield [column] Gives a column to the block to select columns.
         | 
| 69 | 
            +
                #   This uses `::Array#select`.
         | 
| 70 | 
            +
                # @yieldparam column [Column] A target column.
         | 
| 71 | 
            +
                # @yieldreturn [Boolean] Whether the given column is selected or not.
         | 
| 72 | 
            +
                # @return [self.class] The newly created container that only has selected
         | 
| 73 | 
            +
                #   columns.
         | 
| 74 | 
            +
                def select_columns(*selectors, &block)
         | 
| 75 | 
            +
                  if selectors.empty?
         | 
| 76 | 
            +
                    return to_enum(__method__) unless block_given?
         | 
| 77 | 
            +
                    selected_columns = columns.select(&block)
         | 
| 78 | 
            +
                  else
         | 
| 79 | 
            +
                    selected_columns = []
         | 
| 80 | 
            +
                    selectors.each do |selector|
         | 
| 81 | 
            +
                      case selector
         | 
| 82 | 
            +
                      when Range
         | 
| 83 | 
            +
                        selected_columns.concat(columns[selector])
         | 
| 84 | 
            +
                      else
         | 
| 85 | 
            +
                        column = find_column(selector)
         | 
| 86 | 
            +
                        if column.nil?
         | 
| 87 | 
            +
                          case selector
         | 
| 88 | 
            +
                          when String, Symbol
         | 
| 89 | 
            +
                            message = "unknown column: #{selector.inspect}: #{inspect}"
         | 
| 90 | 
            +
                            raise KeyError.new(message)
         | 
| 91 | 
            +
                          else
         | 
| 92 | 
            +
                            message = "out of index (0..#{n_columns - 1}): "
         | 
| 93 | 
            +
                            message << "#{selector.inspect}: #{inspect}"
         | 
| 94 | 
            +
                            raise IndexError.new(message)
         | 
| 95 | 
            +
                          end
         | 
| 96 | 
            +
                        end
         | 
| 97 | 
            +
                        selected_columns << column
         | 
| 98 | 
            +
                      end
         | 
| 99 | 
            +
                    end
         | 
| 100 | 
            +
                    selected_columns = selected_columns.select(&block) if block_given?
         | 
| 101 | 
            +
                  end
         | 
| 102 | 
            +
                  self.class.new(selected_columns)
         | 
| 103 | 
            +
                end
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                # @overload [](name)
         | 
| 106 | 
            +
                #   Find a column that has the given name.
         | 
| 107 | 
            +
                #
         | 
| 108 | 
            +
                #   @param name [String, Symbol] The column name to be found.
         | 
| 109 | 
            +
                #   @return [Column] The found column.
         | 
| 110 | 
            +
                #   @see #find_column
         | 
| 111 | 
            +
                #
         | 
| 112 | 
            +
                # @overload [](index)
         | 
| 113 | 
            +
                #   Find the `index`-th column.
         | 
| 114 | 
            +
                #
         | 
| 115 | 
            +
                #   @param index [Integer] The index to be found.
         | 
| 116 | 
            +
                #   @return [Column] The found column.
         | 
| 117 | 
            +
                #   @see #find_column
         | 
| 118 | 
            +
                #
         | 
| 119 | 
            +
                # @overload [](range)
         | 
| 120 | 
            +
                #   Selects columns that are in `range` and creates a new container
         | 
| 121 | 
            +
                #   only with the selected columns.
         | 
| 122 | 
            +
                #
         | 
| 123 | 
            +
                #   @param range [Range] The range to be selected.
         | 
| 124 | 
            +
                #   @return [self.class] The newly created container that only has selected
         | 
| 125 | 
            +
                #     columns.
         | 
| 126 | 
            +
                #   @see #select_columns
         | 
| 127 | 
            +
                #
         | 
| 128 | 
            +
                # @overload [](selectors)
         | 
| 129 | 
            +
                #   Selects columns that are selected by `selectors` and creates a
         | 
| 130 | 
            +
                #   new container only with the selected columns.
         | 
| 131 | 
            +
                #
         | 
| 132 | 
            +
                #   @param selectors [Array] The selectors that are used to select columns.
         | 
| 133 | 
            +
                #   @return [self.class] The newly created container that only has selected
         | 
| 134 | 
            +
                #     columns.
         | 
| 135 | 
            +
                #   @see #select_columns
         | 
| 136 | 
            +
                def [](selector)
         | 
| 137 | 
            +
                  case selector
         | 
| 138 | 
            +
                  when ::Array
         | 
| 139 | 
            +
                    select_columns(*selector)
         | 
| 140 | 
            +
                  when Range
         | 
| 141 | 
            +
                    select_columns(selector)
         | 
| 142 | 
            +
                  else
         | 
| 143 | 
            +
                    find_column(selector)
         | 
| 144 | 
            +
                  end
         | 
| 145 | 
            +
                end
         | 
| 47 146 | 
             
              end
         | 
| 48 147 | 
             
            end
         | 
    
        data/lib/arrow/datum.rb
    CHANGED
    
    
| @@ -0,0 +1,48 @@ | |
| 1 | 
            +
            # Licensed to the Apache Software Foundation (ASF) under one
         | 
| 2 | 
            +
            # or more contributor license agreements.  See the NOTICE file
         | 
| 3 | 
            +
            # distributed with this work for additional information
         | 
| 4 | 
            +
            # regarding copyright ownership.  The ASF licenses this file
         | 
| 5 | 
            +
            # to you under the Apache License, Version 2.0 (the
         | 
| 6 | 
            +
            # "License"); you may not use this file except in compliance
         | 
| 7 | 
            +
            # with the License.  You may obtain a copy of the License at
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            #   http://www.apache.org/licenses/LICENSE-2.0
         | 
| 10 | 
            +
            #
         | 
| 11 | 
            +
            # Unless required by applicable law or agreed to in writing,
         | 
| 12 | 
            +
            # software distributed under the License is distributed on an
         | 
| 13 | 
            +
            # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
         | 
| 14 | 
            +
            # KIND, either express or implied.  See the License for the
         | 
| 15 | 
            +
            # specific language governing permissions and limitations
         | 
| 16 | 
            +
            # under the License.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            module Arrow
         | 
| 19 | 
            +
              class Expression
         | 
| 20 | 
            +
                class << self
         | 
| 21 | 
            +
                  # @api private
         | 
| 22 | 
            +
                  def try_convert(value)
         | 
| 23 | 
            +
                    case value
         | 
| 24 | 
            +
                    when Symbol
         | 
| 25 | 
            +
                      FieldExpression.new(value.to_s)
         | 
| 26 | 
            +
                    when ::Array
         | 
| 27 | 
            +
                      function_name, *arguments = value
         | 
| 28 | 
            +
                      case function_name
         | 
| 29 | 
            +
                      when String, Symbol
         | 
| 30 | 
            +
                        function_name = function_name.to_s
         | 
| 31 | 
            +
                      else
         | 
| 32 | 
            +
                        return nil
         | 
| 33 | 
            +
                      end
         | 
| 34 | 
            +
                      if arguments.last.is_a?(FunctionOptions)
         | 
| 35 | 
            +
                        options = arguments.pop
         | 
| 36 | 
            +
                      else
         | 
| 37 | 
            +
                        options = nil
         | 
| 38 | 
            +
                      end
         | 
| 39 | 
            +
                      CallExpression.new(function_name, arguments, options)
         | 
| 40 | 
            +
                    else
         | 
| 41 | 
            +
                      datum = Datum.try_convert(value)
         | 
| 42 | 
            +
                      return nil if datum.nil?
         | 
| 43 | 
            +
                      LiteralExpression.new(datum)
         | 
| 44 | 
            +
                    end
         | 
| 45 | 
            +
                  end
         | 
| 46 | 
            +
                end
         | 
| 47 | 
            +
              end
         | 
| 48 | 
            +
            end
         | 
| @@ -0,0 +1,34 @@ | |
| 1 | 
            +
            # Licensed to the Apache Software Foundation (ASF) under one
         | 
| 2 | 
            +
            # or more contributor license agreements.  See the NOTICE file
         | 
| 3 | 
            +
            # distributed with this work for additional information
         | 
| 4 | 
            +
            # regarding copyright ownership.  The ASF licenses this file
         | 
| 5 | 
            +
            # to you under the Apache License, Version 2.0 (the
         | 
| 6 | 
            +
            # "License"); you may not use this file except in compliance
         | 
| 7 | 
            +
            # with the License.  You may obtain a copy of the License at
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            #   http://www.apache.org/licenses/LICENSE-2.0
         | 
| 10 | 
            +
            #
         | 
| 11 | 
            +
            # Unless required by applicable law or agreed to in writing,
         | 
| 12 | 
            +
            # software distributed under the License is distributed on an
         | 
| 13 | 
            +
            # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
         | 
| 14 | 
            +
            # KIND, either express or implied.  See the License for the
         | 
| 15 | 
            +
            # specific language governing permissions and limitations
         | 
| 16 | 
            +
            # under the License.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            module Arrow
         | 
| 19 | 
            +
              class FileSystem
         | 
| 20 | 
            +
                alias_method :open_output_stream_raw, :open_output_stream
         | 
| 21 | 
            +
                def open_output_stream(path)
         | 
| 22 | 
            +
                  stream = open_output_stream_raw(path)
         | 
| 23 | 
            +
                  if block_given?
         | 
| 24 | 
            +
                    begin
         | 
| 25 | 
            +
                      yield(stream)
         | 
| 26 | 
            +
                    ensure
         | 
| 27 | 
            +
                      stream.close
         | 
| 28 | 
            +
                    end
         | 
| 29 | 
            +
                  else
         | 
| 30 | 
            +
                    stream
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
              end
         | 
| 34 | 
            +
            end
         |