red-arrow 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +1 -3
- data/ext/arrow/arrow.cpp +4 -0
- data/ext/arrow/{record-batch.cpp → raw-records.cpp} +49 -5
- data/ext/arrow/red-arrow.hpp +1 -0
- data/lib/arrow/dictionary-data-type.rb +23 -12
- data/lib/arrow/table.rb +228 -77
- data/lib/arrow/version.rb +1 -1
- data/test/raw-records/test-basic-arrays.rb +340 -0
- data/test/raw-records/test-dense-union-array.rb +492 -0
- data/test/raw-records/test-list-array.rb +520 -0
- data/test/raw-records/{record-batch/test-multiple-columns.rb → test-multiple-columns.rb} +34 -18
- data/test/raw-records/test-sparse-union-array.rb +480 -0
- data/test/raw-records/test-struct-array.rb +448 -0
- data/test/raw-records/test-table.rb +47 -0
- data/test/test-dictionary-data-type.rb +3 -3
- data/test/test-table.rb +40 -14
- metadata +59 -57
- data/test/raw-records/record-batch/test-basic-arrays.rb +0 -349
- data/test/raw-records/record-batch/test-dense-union-array.rb +0 -486
- data/test/raw-records/record-batch/test-list-array.rb +0 -498
- data/test/raw-records/record-batch/test-sparse-union-array.rb +0 -474
- data/test/raw-records/record-batch/test-struct-array.rb +0 -426
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97877568cac79133ccac62dcbb51f2e8466c2fb825aaaf58f65976742257fbdc
|
4
|
+
data.tar.gz: 5736802a2d3f5539a2a75421d215aa6bd2a4827ce6da6533cf440c88513db006
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3bf77b7050ee8ad28f6f2bf99186684b33385cbca51bdc39650441875ab7e0701803b05beb34e75cf9a6ab7a6c4b31b6fb053b4b83d14bf6d205018b84e3d246
|
7
|
+
data.tar.gz: 9be3f95a220af905a2c611a6a4742db395441d17d2daf20ea975cec2826bf5dc45319f70fce2536672649a4d3d1a3c499fc3b1f45207764b8500dbecc671613a
|
data/README.md
CHANGED
@@ -33,9 +33,7 @@ gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow
|
|
33
33
|
|
34
34
|
## Install
|
35
35
|
|
36
|
-
Install Apache Arrow GLib before install Red Arrow.
|
37
|
-
|
38
|
-
Note that the Apache Arrow GLib packages are "unofficial". "Official" packages will be released in the future.
|
36
|
+
Install Apache Arrow GLib before install Red Arrow. See [Apache Arrow install document](https://arrow.apache.org/install/) for details.
|
39
37
|
|
40
38
|
Install Red Arrow after you install Apache Arrow GLib:
|
41
39
|
|
data/ext/arrow/arrow.cpp
CHANGED
@@ -34,6 +34,10 @@ extern "C" void Init_arrow() {
|
|
34
34
|
rb_define_method(cArrowRecordBatch, "raw_records",
|
35
35
|
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
|
36
36
|
0);
|
37
|
+
auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
|
38
|
+
rb_define_method(cArrowTable, "raw_records",
|
39
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
|
40
|
+
0);
|
37
41
|
|
38
42
|
red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
|
39
43
|
|
@@ -642,11 +642,34 @@ namespace red_arrow {
|
|
642
642
|
auto record = rb_ary_new_capa(n_columns_);
|
643
643
|
rb_ary_push(records_, record);
|
644
644
|
}
|
645
|
+
row_offset_ = 0;
|
645
646
|
for (int i = 0; i < n_columns_; ++i) {
|
646
647
|
const auto array = record_batch.column(i).get();
|
647
648
|
column_index_ = i;
|
648
649
|
check_status(array->Accept(this),
|
649
|
-
"[raw-records]");
|
650
|
+
"[record-batch][raw-records]");
|
651
|
+
}
|
652
|
+
return Qnil;
|
653
|
+
});
|
654
|
+
}
|
655
|
+
|
656
|
+
void build(const arrow::Table& table) {
|
657
|
+
rb::protect([&] {
|
658
|
+
const auto n_rows = table.num_rows();
|
659
|
+
for (int64_t i = 0; i < n_rows; ++i) {
|
660
|
+
auto record = rb_ary_new_capa(n_columns_);
|
661
|
+
rb_ary_push(records_, record);
|
662
|
+
}
|
663
|
+
for (int i = 0; i < n_columns_; ++i) {
|
664
|
+
const auto column = table.column(i).get();
|
665
|
+
const auto chunked_array = column->data();
|
666
|
+
column_index_ = i;
|
667
|
+
row_offset_ = 0;
|
668
|
+
for (const auto array : chunked_array->chunks()) {
|
669
|
+
check_status(array->Accept(this),
|
670
|
+
"[table][raw-records]");
|
671
|
+
row_offset_ += array->length();
|
672
|
+
}
|
650
673
|
}
|
651
674
|
return Qnil;
|
652
675
|
});
|
@@ -703,17 +726,17 @@ namespace red_arrow {
|
|
703
726
|
void convert(const ArrayType& array) {
|
704
727
|
const auto n = array.length();
|
705
728
|
if (array.null_count() > 0) {
|
706
|
-
for (int64_t i = 0; i < n; ++i) {
|
729
|
+
for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) {
|
707
730
|
auto value = Qnil;
|
708
731
|
if (!array.IsNull(i)) {
|
709
732
|
value = convert_value(array, i);
|
710
733
|
}
|
711
|
-
auto record = rb_ary_entry(records_,
|
734
|
+
auto record = rb_ary_entry(records_, ii);
|
712
735
|
rb_ary_store(record, column_index_, value);
|
713
736
|
}
|
714
737
|
} else {
|
715
|
-
for (int64_t i = 0; i < n; ++i) {
|
716
|
-
auto record = rb_ary_entry(records_,
|
738
|
+
for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) {
|
739
|
+
auto record = rb_ary_entry(records_, ii);
|
717
740
|
rb_ary_store(record, column_index_, convert_value(array, i));
|
718
741
|
}
|
719
742
|
}
|
@@ -731,6 +754,9 @@ namespace red_arrow {
|
|
731
754
|
// The current column index.
|
732
755
|
int column_index_;
|
733
756
|
|
757
|
+
// The current row offset.
|
758
|
+
int64_t row_offset_;
|
759
|
+
|
734
760
|
// The number of columns.
|
735
761
|
const int n_columns_;
|
736
762
|
};
|
@@ -753,4 +779,22 @@ namespace red_arrow {
|
|
753
779
|
|
754
780
|
return records;
|
755
781
|
}
|
782
|
+
|
783
|
+
VALUE
|
784
|
+
table_raw_records(VALUE rb_table) {
|
785
|
+
auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table));
|
786
|
+
auto table = garrow_table_get_raw(garrow_table).get();
|
787
|
+
const auto n_rows = table->num_rows();
|
788
|
+
const auto n_columns = table->num_columns();
|
789
|
+
auto records = rb_ary_new_capa(n_rows);
|
790
|
+
|
791
|
+
try {
|
792
|
+
RawRecordsBuilder builder(records, n_columns);
|
793
|
+
builder.build(*table);
|
794
|
+
} catch (rb::State& state) {
|
795
|
+
state.jump();
|
796
|
+
}
|
797
|
+
|
798
|
+
return records;
|
799
|
+
}
|
756
800
|
}
|
data/ext/arrow/red-arrow.hpp
CHANGED
@@ -22,7 +22,7 @@ module Arrow
|
|
22
22
|
|
23
23
|
# Creates a new {Arrow::DictionaryDataType}.
|
24
24
|
#
|
25
|
-
# @overload initialize(index_data_type,
|
25
|
+
# @overload initialize(index_data_type, value_data_type, ordered)
|
26
26
|
#
|
27
27
|
# @param index_data_type [Arrow::DataType, Hash, String, Symbol]
|
28
28
|
# The index data type of the dictionary data type. It must be
|
@@ -39,18 +39,23 @@ module Arrow
|
|
39
39
|
# See {Arrow::DataType.resolve} how to specify data type
|
40
40
|
# description.
|
41
41
|
#
|
42
|
-
# @param
|
43
|
-
# dictionary data type.
|
42
|
+
# @param value_data_type [Arrow::DataType, Hash, String, Symbol]
|
43
|
+
# The value data type of the dictionary data type.
|
44
|
+
#
|
45
|
+
# You can specify data type as a description by `Hash`.
|
46
|
+
#
|
47
|
+
# See {Arrow::DataType.resolve} how to specify data type
|
48
|
+
# description.
|
44
49
|
#
|
45
50
|
# @param ordered [Boolean] Whether dictionary contents are
|
46
51
|
# ordered or not.
|
47
52
|
#
|
48
53
|
# @example Create a dictionary data type for {0: "Hello", 1: "World"}
|
49
54
|
# index_data_type = :int8
|
50
|
-
#
|
55
|
+
# value_data_type = :string
|
51
56
|
# ordered = true
|
52
57
|
# Arrow::DictionaryDataType.new(index_data_type,
|
53
|
-
#
|
58
|
+
# value_data_type,
|
54
59
|
# ordered)
|
55
60
|
#
|
56
61
|
# @overload initialize(description)
|
@@ -74,16 +79,21 @@ module Arrow
|
|
74
79
|
# See {Arrow::DataType.resolve} how to specify data type
|
75
80
|
# description.
|
76
81
|
#
|
77
|
-
# @option description [Arrow::
|
78
|
-
#
|
82
|
+
# @option description [Arrow::DataType, Hash, String, Symbol]
|
83
|
+
# :value_data_type
|
84
|
+
# The value data type of the dictionary data type.
|
85
|
+
#
|
86
|
+
# You can specify data type as a description by `Hash`.
|
87
|
+
#
|
88
|
+
# See {Arrow::DataType.resolve} how to specify data type
|
89
|
+
# description.
|
79
90
|
#
|
80
91
|
# @option description [Boolean] :ordered Whether dictionary
|
81
92
|
# contents are ordered or not.
|
82
93
|
#
|
83
94
|
# @example Create a dictionary data type for {0: "Hello", 1: "World"}
|
84
|
-
# dictionary = Arrow::StringArray.new(["Hello", "World"])
|
85
95
|
# Arrow::DictionaryDataType.new(index_data_type: :int8,
|
86
|
-
#
|
96
|
+
# value_data_type: :string,
|
87
97
|
# ordered: true)
|
88
98
|
def initialize(*args)
|
89
99
|
n_args = args.size
|
@@ -91,16 +101,17 @@ module Arrow
|
|
91
101
|
when 1
|
92
102
|
description = args[0]
|
93
103
|
index_data_type = description[:index_data_type]
|
94
|
-
|
104
|
+
value_data_type = description[:value_data_type]
|
95
105
|
ordered = description[:ordered]
|
96
106
|
when 3
|
97
|
-
index_data_type,
|
107
|
+
index_data_type, value_data_type, ordered = args
|
98
108
|
else
|
99
109
|
message = "wrong number of arguments (given, #{n_args}, expected 1 or 3)"
|
100
110
|
raise ArgumentError, message
|
101
111
|
end
|
102
112
|
index_data_type = DataType.resolve(index_data_type)
|
103
|
-
|
113
|
+
value_data_type = DataType.resolve(value_data_type)
|
114
|
+
initialize_raw(index_data_type, value_data_type, ordered)
|
104
115
|
end
|
105
116
|
end
|
106
117
|
end
|
data/lib/arrow/table.rb
CHANGED
@@ -30,27 +30,154 @@ module Arrow
|
|
30
30
|
|
31
31
|
alias_method :initialize_raw, :initialize
|
32
32
|
private :initialize_raw
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
|
34
|
+
# Creates a new {Arrow::Table}.
|
35
|
+
#
|
36
|
+
# @overload initialize(columns)
|
37
|
+
#
|
38
|
+
# @param columns [::Array<Arrow::Column>] The columns of the table.
|
39
|
+
#
|
40
|
+
# @example Create a table from columns
|
41
|
+
# count_field = Arrow::Field.new("count", :uint32)
|
42
|
+
# count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
|
43
|
+
# count_column = Arrow::Column.new(count_field, count_array)
|
44
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
45
|
+
# visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
|
46
|
+
# visible_column = Arrow::Column.new(visible_field, visible_array)
|
47
|
+
# Arrow::Table.new([count_column, visible_column])
|
48
|
+
#
|
49
|
+
# @overload initialize(raw_table)
|
50
|
+
#
|
51
|
+
# @param raw_table [Hash<String, Arrow::Array>]
|
52
|
+
# The pairs of column name and values of the table. Column values is
|
53
|
+
# `Arrow::Array`.
|
54
|
+
#
|
55
|
+
# @example Create a table from column name and values
|
56
|
+
# Arrow::Table.new("count" => Arrow::UInt32Array.new([0, 2, nil, 4]),
|
57
|
+
# "visible" => Arrow::BooleanArray.new([true, nil, nil, false]))
|
58
|
+
#
|
59
|
+
# @overload initialize(raw_table)
|
60
|
+
#
|
61
|
+
# @param raw_table [Hash<String, Arrow::ChunkedArray>]
|
62
|
+
# The pairs of column name and values of the table. Column values is
|
63
|
+
# `Arrow::ChunkedArray`.
|
64
|
+
#
|
65
|
+
# @example Create a table from column name and values
|
66
|
+
# count_chunks = [
|
67
|
+
# Arrow::UInt32Array.new([0, 2]),
|
68
|
+
# Arrow::UInt32Array.new([nil, 4]),
|
69
|
+
# ]
|
70
|
+
# visible_chunks = [
|
71
|
+
# Arrow::BooleanArray.new([true]),
|
72
|
+
# Arrow::BooleanArray.new([nil, nil, false]),
|
73
|
+
# ]
|
74
|
+
# Arrow::Table.new("count" => Arrow::ChunkedArray.new(count_chunks),
|
75
|
+
# "visible" => Arrow::ChunkedArray.new(visible_chunks))
|
76
|
+
#
|
77
|
+
# @overload initialize(schema, columns)
|
78
|
+
#
|
79
|
+
# @param schema [Arrow::Schema] The schema of the table.
|
80
|
+
# You can also specify schema as primitive Ruby objects.
|
81
|
+
# See {Arrow::Schema#initialize} for details.
|
82
|
+
#
|
83
|
+
# @param columns [::Array<Arrow::Column>] The data of the table.
|
84
|
+
#
|
85
|
+
# @example Create a table from schema and columns
|
86
|
+
# count_field = Arrow::Field.new("count", :uint32)
|
87
|
+
# count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
|
88
|
+
# count_column = Arrow::Column.new(count_field, count_array)
|
89
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
90
|
+
# visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
|
91
|
+
# visible_column = Arrow::Column.new(visible_field, visible_array)
|
92
|
+
# Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]),
|
93
|
+
# [count_column, visible_column])
|
94
|
+
#
|
95
|
+
# @overload initialize(schema, arrays)
|
96
|
+
#
|
97
|
+
# @param schema [Arrow::Schema] The schema of the table.
|
98
|
+
# You can also specify schema as primitive Ruby objects.
|
99
|
+
# See {Arrow::Schema#initialize} for details.
|
100
|
+
#
|
101
|
+
# @param arrays [::Array<Arrow::Array>] The data of the table.
|
102
|
+
#
|
103
|
+
# @example Create a table from schema and arrays
|
104
|
+
# count_field = Arrow::Field.new("count", :uint32)
|
105
|
+
# count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
|
106
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
107
|
+
# visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
|
108
|
+
# Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]),
|
109
|
+
# [count_array, visible_array])
|
110
|
+
#
|
111
|
+
# @overload initialize(schema, record_batches)
|
112
|
+
#
|
113
|
+
# @param schema [Arrow::Schema] The schema of the table.
|
114
|
+
# You can also specify schema as primitive Ruby objects.
|
115
|
+
# See {Arrow::Schema#initialize} for details.
|
116
|
+
#
|
117
|
+
# @param arrays [::Array<Arrow::RecordBatch>] The data of the table.
|
118
|
+
#
|
119
|
+
# @example Create a table from schema and record batches
|
120
|
+
# count_field = Arrow::Field.new("count", :uint32)
|
121
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
122
|
+
# schema = Arrow::Schema.new([count_field, visible_field])
|
123
|
+
# record_batches = [
|
124
|
+
# Arrow::RecordBatch.new(schema, [[0, true], [2, nil], [nil, nil]]),
|
125
|
+
# Arrow::RecordBatch.new(schema, [[4, false]]),
|
126
|
+
# ]
|
127
|
+
# Arrow::Table.new(schema, record_batches)
|
128
|
+
#
|
129
|
+
# @overload initialize(schema, raw_records)
|
130
|
+
#
|
131
|
+
# @param schema [Arrow::Schema] The schema of the table.
|
132
|
+
# You can also specify schema as primitive Ruby objects.
|
133
|
+
# See {Arrow::Schema#initialize} for details.
|
134
|
+
#
|
135
|
+
# @param arrays [::Array<::Array>] The data of the table as primitive
|
136
|
+
# Ruby objects.
|
137
|
+
#
|
138
|
+
# @example Create a table from schema and raw records
|
139
|
+
# schema = {
|
140
|
+
# count: :uint32,
|
141
|
+
# visible: :boolean,
|
142
|
+
# }
|
143
|
+
# raw_records = [
|
144
|
+
# [0, true],
|
145
|
+
# [2, nil],
|
146
|
+
# [nil, nil],
|
147
|
+
# [4, false],
|
148
|
+
# ]
|
149
|
+
# Arrow::Table.new(schema, raw_records)
|
150
|
+
def initialize(*args)
|
151
|
+
n_args = args.size
|
152
|
+
case n_args
|
153
|
+
when 1
|
154
|
+
if args[0][0].is_a?(Column)
|
155
|
+
values = args[0]
|
156
|
+
fields = values.collect(&:field)
|
38
157
|
schema = Schema.new(fields)
|
39
158
|
else
|
40
|
-
raw_table =
|
159
|
+
raw_table = args[0]
|
41
160
|
fields = []
|
42
|
-
|
161
|
+
values = []
|
43
162
|
raw_table.each do |name, array|
|
44
163
|
field = Field.new(name.to_s, array.value_data_type)
|
45
164
|
fields << field
|
46
|
-
|
165
|
+
values << Column.new(field, array)
|
47
166
|
end
|
48
167
|
schema = Schema.new(fields)
|
49
168
|
end
|
169
|
+
when 2
|
170
|
+
schema = args[0]
|
171
|
+
schema = Schema.new(schema) unless schema.is_a?(Schema)
|
172
|
+
values = args[1]
|
173
|
+
if values[0].is_a?(::Array)
|
174
|
+
values = [RecordBatch.new(schema, values)]
|
175
|
+
end
|
50
176
|
else
|
51
|
-
|
177
|
+
message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
|
178
|
+
raise ArgumentError, message
|
52
179
|
end
|
53
|
-
initialize_raw(schema,
|
180
|
+
initialize_raw(schema, values)
|
54
181
|
end
|
55
182
|
|
56
183
|
def columns
|
@@ -71,43 +198,92 @@ module Arrow
|
|
71
198
|
|
72
199
|
alias_method :[], :find_column
|
73
200
|
|
74
|
-
|
201
|
+
alias_method :slice_raw, :slice
|
202
|
+
|
203
|
+
# @overload slice(offset, length)
|
75
204
|
#
|
76
|
-
#
|
205
|
+
# @param offset [Integer] The offset of sub Arrow::Table.
|
206
|
+
# @param length [Integer] The length of sub Arrow::Table.
|
207
|
+
# @return [Arrow::Table]
|
208
|
+
# The sub `Arrow::Table` that covers only from
|
209
|
+
# `offset` to `offset + length` range.
|
210
|
+
#
|
211
|
+
# @overload slice(index)
|
212
|
+
#
|
213
|
+
# @param index [Integer] The index in this table.
|
214
|
+
# @return [Arrow::Record]
|
215
|
+
# The `Arrow::Record` corresponding to index of
|
216
|
+
# the table.
|
217
|
+
#
|
218
|
+
# @overload slice(booleans)
|
219
|
+
#
|
220
|
+
# @param booleans [::Array<Boolean>]
|
221
|
+
# The values indicating the target rows.
|
222
|
+
# @return [Arrow::Table]
|
223
|
+
# The sub `Arrow::Table` that covers only rows of indices
|
224
|
+
# the values of `booleans` is true.
|
225
|
+
#
|
226
|
+
# @overload slice(boolean_array)
|
227
|
+
#
|
228
|
+
# @param boolean_array [::Array<Arrow::BooleanArray>]
|
229
|
+
# The values indicating the target rows.
|
230
|
+
# @return [Arrow::Table]
|
231
|
+
# The sub `Arrow::Table` that covers only rows of indices
|
232
|
+
# the values of `boolean_array` is true.
|
233
|
+
#
|
234
|
+
# @overload slice(range)
|
235
|
+
#
|
236
|
+
# @param range_included_end [Range] The range indicating the target rows.
|
237
|
+
# @return [Arrow::Table]
|
238
|
+
# The sub `Arrow::Table` that covers only rows of the range of indices.
|
239
|
+
#
|
240
|
+
# @overload slice
|
241
|
+
#
|
242
|
+
# @yield [slicer] Gives slicer that constructs condition to select records.
|
243
|
+
# @yieldparam slicer [Arrow::Slicer] The slicer that helps us to
|
244
|
+
# build condition.
|
245
|
+
# @yieldreturn [Arrow::Slicer::Condition, ::Array<Arrow::Slicer::Condition>]
|
246
|
+
# The condition to select records.
|
247
|
+
# @return [Arrow::Table]
|
248
|
+
# The sub `Arrow::Table` that covers only rows matched by condition
|
249
|
+
# specified by slicer.
|
77
250
|
def slice(*args)
|
78
251
|
slicers = []
|
79
|
-
expected_n_args = nil
|
80
|
-
case args.size
|
81
|
-
when 0
|
82
|
-
expected_n_args = "1..2" unless block_given?
|
83
|
-
when 1
|
84
|
-
slicers << args[0]
|
85
|
-
when 2
|
86
|
-
from, to = args
|
87
|
-
slicers << (from...(from + to))
|
88
|
-
else
|
89
|
-
if block_given?
|
90
|
-
expected_n_args = "0..2"
|
91
|
-
else
|
92
|
-
expected_n_args = "1..2"
|
93
|
-
end
|
94
|
-
end
|
95
|
-
if expected_n_args
|
96
|
-
message = "wrong number of arguments " +
|
97
|
-
"(given #{args.size}, expected #{expected_n_args})"
|
98
|
-
raise ArgumentError, message
|
99
|
-
end
|
100
|
-
|
101
252
|
if block_given?
|
253
|
+
unless args.empty?
|
254
|
+
raise ArgumentError, "must not specify both arguments and block"
|
255
|
+
end
|
102
256
|
block_slicer = yield(Slicer.new(self))
|
103
257
|
case block_slicer
|
104
|
-
when nil
|
105
|
-
# Ignore
|
106
258
|
when ::Array
|
107
259
|
slicers.concat(block_slicer)
|
108
260
|
else
|
109
261
|
slicers << block_slicer
|
110
262
|
end
|
263
|
+
else
|
264
|
+
expected_n_args = nil
|
265
|
+
case args.size
|
266
|
+
when 1
|
267
|
+
if args[0].is_a?(Integer)
|
268
|
+
index = args[0]
|
269
|
+
index += n_rows if index < 0
|
270
|
+
return nil if index < 0
|
271
|
+
return nil if index >= n_rows
|
272
|
+
return Record.new(self, index)
|
273
|
+
else
|
274
|
+
slicers << args[0]
|
275
|
+
end
|
276
|
+
when 2
|
277
|
+
offset, length = args
|
278
|
+
slicers << (offset...(offset + length))
|
279
|
+
else
|
280
|
+
expected_n_args = "1..2"
|
281
|
+
end
|
282
|
+
if expected_n_args
|
283
|
+
message = "wrong number of arguments " +
|
284
|
+
"(given #{args.size}, expected #{expected_n_args})"
|
285
|
+
raise ArgumentError, message
|
286
|
+
end
|
111
287
|
end
|
112
288
|
|
113
289
|
ranges = []
|
@@ -116,12 +292,18 @@ module Arrow
|
|
116
292
|
case slicer
|
117
293
|
when Integer
|
118
294
|
slicer += n_rows if slicer < 0
|
119
|
-
ranges << [slicer,
|
295
|
+
ranges << [slicer, n_rows - 1]
|
120
296
|
when Range
|
121
|
-
from = slicer.first
|
297
|
+
original_from = from = slicer.first
|
122
298
|
to = slicer.last
|
123
299
|
to -= 1 if slicer.exclude_end?
|
124
300
|
from += n_rows if from < 0
|
301
|
+
if from < 0 or from >= n_rows
|
302
|
+
message =
|
303
|
+
"offset is out of range (-#{n_rows + 1},#{n_rows}): " +
|
304
|
+
"#{original_from}"
|
305
|
+
raise ArgumentError, message
|
306
|
+
end
|
125
307
|
to += n_rows if to < 0
|
126
308
|
ranges << [from, to]
|
127
309
|
when ::Array
|
@@ -330,47 +512,16 @@ module Arrow
|
|
330
512
|
end
|
331
513
|
end
|
332
514
|
|
333
|
-
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
334
515
|
def slice_by_ranges(ranges)
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
while offset + arrays.first.length - offset_in_array < from
|
344
|
-
offset += arrays.first.length - offset_in_array
|
345
|
-
arrays.shift
|
346
|
-
offset_in_array = 0
|
347
|
-
end
|
348
|
-
if offset < from
|
349
|
-
skipped_size = from - offset
|
350
|
-
offset += skipped_size
|
351
|
-
offset_in_array += skipped_size
|
352
|
-
end
|
353
|
-
array = arrays.first
|
354
|
-
array_length = array.length
|
355
|
-
rest_length = array_length - offset_in_array
|
356
|
-
if rest_length <= range_size
|
357
|
-
chunks << array.slice(offset_in_array, array_length)
|
358
|
-
offset += rest_length
|
359
|
-
range_size -= rest_length
|
360
|
-
offset_in_array = 0
|
361
|
-
arrays.shift
|
362
|
-
else
|
363
|
-
chunks << array.slice(offset_in_array, range_size)
|
364
|
-
offset += range_size
|
365
|
-
offset_in_array += range_size
|
366
|
-
range_size = 0
|
367
|
-
end
|
368
|
-
end
|
369
|
-
end
|
370
|
-
Column.new(column.field, ChunkedArray.new(chunks))
|
516
|
+
sliced_table = []
|
517
|
+
ranges.each do |from, to|
|
518
|
+
sliced_table << slice_raw(from, to - from + 1)
|
519
|
+
end
|
520
|
+
if sliced_table.size > 1
|
521
|
+
sliced_table[0].concatenate(sliced_table[1..-1])
|
522
|
+
else
|
523
|
+
sliced_table[0]
|
371
524
|
end
|
372
|
-
|
373
|
-
self.class.new(schema, sliced_columns)
|
374
525
|
end
|
375
526
|
|
376
527
|
def ensure_column(name, data)
|