red-arrow 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +1 -3
- data/ext/arrow/arrow.cpp +4 -0
- data/ext/arrow/{record-batch.cpp → raw-records.cpp} +49 -5
- data/ext/arrow/red-arrow.hpp +1 -0
- data/lib/arrow/dictionary-data-type.rb +23 -12
- data/lib/arrow/table.rb +228 -77
- data/lib/arrow/version.rb +1 -1
- data/test/raw-records/test-basic-arrays.rb +340 -0
- data/test/raw-records/test-dense-union-array.rb +492 -0
- data/test/raw-records/test-list-array.rb +520 -0
- data/test/raw-records/{record-batch/test-multiple-columns.rb → test-multiple-columns.rb} +34 -18
- data/test/raw-records/test-sparse-union-array.rb +480 -0
- data/test/raw-records/test-struct-array.rb +448 -0
- data/test/raw-records/test-table.rb +47 -0
- data/test/test-dictionary-data-type.rb +3 -3
- data/test/test-table.rb +40 -14
- metadata +59 -57
- data/test/raw-records/record-batch/test-basic-arrays.rb +0 -349
- data/test/raw-records/record-batch/test-dense-union-array.rb +0 -486
- data/test/raw-records/record-batch/test-list-array.rb +0 -498
- data/test/raw-records/record-batch/test-sparse-union-array.rb +0 -474
- data/test/raw-records/record-batch/test-struct-array.rb +0 -426
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97877568cac79133ccac62dcbb51f2e8466c2fb825aaaf58f65976742257fbdc
|
4
|
+
data.tar.gz: 5736802a2d3f5539a2a75421d215aa6bd2a4827ce6da6533cf440c88513db006
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3bf77b7050ee8ad28f6f2bf99186684b33385cbca51bdc39650441875ab7e0701803b05beb34e75cf9a6ab7a6c4b31b6fb053b4b83d14bf6d205018b84e3d246
|
7
|
+
data.tar.gz: 9be3f95a220af905a2c611a6a4742db395441d17d2daf20ea975cec2826bf5dc45319f70fce2536672649a4d3d1a3c499fc3b1f45207764b8500dbecc671613a
|
data/README.md
CHANGED
@@ -33,9 +33,7 @@ gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow
|
|
33
33
|
|
34
34
|
## Install
|
35
35
|
|
36
|
-
Install Apache Arrow GLib before install Red Arrow.
|
37
|
-
|
38
|
-
Note that the Apache Arrow GLib packages are "unofficial". "Official" packages will be released in the future.
|
36
|
+
Install Apache Arrow GLib before install Red Arrow. See [Apache Arrow install document](https://arrow.apache.org/install/) for details.
|
39
37
|
|
40
38
|
Install Red Arrow after you install Apache Arrow GLib:
|
41
39
|
|
data/ext/arrow/arrow.cpp
CHANGED
@@ -34,6 +34,10 @@ extern "C" void Init_arrow() {
|
|
34
34
|
rb_define_method(cArrowRecordBatch, "raw_records",
|
35
35
|
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
|
36
36
|
0);
|
37
|
+
auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
|
38
|
+
rb_define_method(cArrowTable, "raw_records",
|
39
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
|
40
|
+
0);
|
37
41
|
|
38
42
|
red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
|
39
43
|
|
@@ -642,11 +642,34 @@ namespace red_arrow {
|
|
642
642
|
auto record = rb_ary_new_capa(n_columns_);
|
643
643
|
rb_ary_push(records_, record);
|
644
644
|
}
|
645
|
+
row_offset_ = 0;
|
645
646
|
for (int i = 0; i < n_columns_; ++i) {
|
646
647
|
const auto array = record_batch.column(i).get();
|
647
648
|
column_index_ = i;
|
648
649
|
check_status(array->Accept(this),
|
649
|
-
"[raw-records]");
|
650
|
+
"[record-batch][raw-records]");
|
651
|
+
}
|
652
|
+
return Qnil;
|
653
|
+
});
|
654
|
+
}
|
655
|
+
|
656
|
+
void build(const arrow::Table& table) {
|
657
|
+
rb::protect([&] {
|
658
|
+
const auto n_rows = table.num_rows();
|
659
|
+
for (int64_t i = 0; i < n_rows; ++i) {
|
660
|
+
auto record = rb_ary_new_capa(n_columns_);
|
661
|
+
rb_ary_push(records_, record);
|
662
|
+
}
|
663
|
+
for (int i = 0; i < n_columns_; ++i) {
|
664
|
+
const auto column = table.column(i).get();
|
665
|
+
const auto chunked_array = column->data();
|
666
|
+
column_index_ = i;
|
667
|
+
row_offset_ = 0;
|
668
|
+
for (const auto array : chunked_array->chunks()) {
|
669
|
+
check_status(array->Accept(this),
|
670
|
+
"[table][raw-records]");
|
671
|
+
row_offset_ += array->length();
|
672
|
+
}
|
650
673
|
}
|
651
674
|
return Qnil;
|
652
675
|
});
|
@@ -703,17 +726,17 @@ namespace red_arrow {
|
|
703
726
|
void convert(const ArrayType& array) {
|
704
727
|
const auto n = array.length();
|
705
728
|
if (array.null_count() > 0) {
|
706
|
-
for (int64_t i = 0; i < n; ++i) {
|
729
|
+
for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) {
|
707
730
|
auto value = Qnil;
|
708
731
|
if (!array.IsNull(i)) {
|
709
732
|
value = convert_value(array, i);
|
710
733
|
}
|
711
|
-
auto record = rb_ary_entry(records_,
|
734
|
+
auto record = rb_ary_entry(records_, ii);
|
712
735
|
rb_ary_store(record, column_index_, value);
|
713
736
|
}
|
714
737
|
} else {
|
715
|
-
for (int64_t i = 0; i < n; ++i) {
|
716
|
-
auto record = rb_ary_entry(records_,
|
738
|
+
for (int64_t i = 0, ii = row_offset_; i < n; ++i, ++ii) {
|
739
|
+
auto record = rb_ary_entry(records_, ii);
|
717
740
|
rb_ary_store(record, column_index_, convert_value(array, i));
|
718
741
|
}
|
719
742
|
}
|
@@ -731,6 +754,9 @@ namespace red_arrow {
|
|
731
754
|
// The current column index.
|
732
755
|
int column_index_;
|
733
756
|
|
757
|
+
// The current row offset.
|
758
|
+
int64_t row_offset_;
|
759
|
+
|
734
760
|
// The number of columns.
|
735
761
|
const int n_columns_;
|
736
762
|
};
|
@@ -753,4 +779,22 @@ namespace red_arrow {
|
|
753
779
|
|
754
780
|
return records;
|
755
781
|
}
|
782
|
+
|
783
|
+
VALUE
|
784
|
+
table_raw_records(VALUE rb_table) {
|
785
|
+
auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table));
|
786
|
+
auto table = garrow_table_get_raw(garrow_table).get();
|
787
|
+
const auto n_rows = table->num_rows();
|
788
|
+
const auto n_columns = table->num_columns();
|
789
|
+
auto records = rb_ary_new_capa(n_rows);
|
790
|
+
|
791
|
+
try {
|
792
|
+
RawRecordsBuilder builder(records, n_columns);
|
793
|
+
builder.build(*table);
|
794
|
+
} catch (rb::State& state) {
|
795
|
+
state.jump();
|
796
|
+
}
|
797
|
+
|
798
|
+
return records;
|
799
|
+
}
|
756
800
|
}
|
data/ext/arrow/red-arrow.hpp
CHANGED
@@ -22,7 +22,7 @@ module Arrow
|
|
22
22
|
|
23
23
|
# Creates a new {Arrow::DictionaryDataType}.
|
24
24
|
#
|
25
|
-
# @overload initialize(index_data_type,
|
25
|
+
# @overload initialize(index_data_type, value_data_type, ordered)
|
26
26
|
#
|
27
27
|
# @param index_data_type [Arrow::DataType, Hash, String, Symbol]
|
28
28
|
# The index data type of the dictionary data type. It must be
|
@@ -39,18 +39,23 @@ module Arrow
|
|
39
39
|
# See {Arrow::DataType.resolve} how to specify data type
|
40
40
|
# description.
|
41
41
|
#
|
42
|
-
# @param
|
43
|
-
# dictionary data type.
|
42
|
+
# @param value_data_type [Arrow::DataType, Hash, String, Symbol]
|
43
|
+
# The value data type of the dictionary data type.
|
44
|
+
#
|
45
|
+
# You can specify data type as a description by `Hash`.
|
46
|
+
#
|
47
|
+
# See {Arrow::DataType.resolve} how to specify data type
|
48
|
+
# description.
|
44
49
|
#
|
45
50
|
# @param ordered [Boolean] Whether dictionary contents are
|
46
51
|
# ordered or not.
|
47
52
|
#
|
48
53
|
# @example Create a dictionary data type for {0: "Hello", 1: "World"}
|
49
54
|
# index_data_type = :int8
|
50
|
-
#
|
55
|
+
# value_data_type = :string
|
51
56
|
# ordered = true
|
52
57
|
# Arrow::DictionaryDataType.new(index_data_type,
|
53
|
-
#
|
58
|
+
# value_data_type,
|
54
59
|
# ordered)
|
55
60
|
#
|
56
61
|
# @overload initialize(description)
|
@@ -74,16 +79,21 @@ module Arrow
|
|
74
79
|
# See {Arrow::DataType.resolve} how to specify data type
|
75
80
|
# description.
|
76
81
|
#
|
77
|
-
# @option description [Arrow::
|
78
|
-
#
|
82
|
+
# @option description [Arrow::DataType, Hash, String, Symbol]
|
83
|
+
# :value_data_type
|
84
|
+
# The value data type of the dictionary data type.
|
85
|
+
#
|
86
|
+
# You can specify data type as a description by `Hash`.
|
87
|
+
#
|
88
|
+
# See {Arrow::DataType.resolve} how to specify data type
|
89
|
+
# description.
|
79
90
|
#
|
80
91
|
# @option description [Boolean] :ordered Whether dictionary
|
81
92
|
# contents are ordered or not.
|
82
93
|
#
|
83
94
|
# @example Create a dictionary data type for {0: "Hello", 1: "World"}
|
84
|
-
# dictionary = Arrow::StringArray.new(["Hello", "World"])
|
85
95
|
# Arrow::DictionaryDataType.new(index_data_type: :int8,
|
86
|
-
#
|
96
|
+
# value_data_type: :string,
|
87
97
|
# ordered: true)
|
88
98
|
def initialize(*args)
|
89
99
|
n_args = args.size
|
@@ -91,16 +101,17 @@ module Arrow
|
|
91
101
|
when 1
|
92
102
|
description = args[0]
|
93
103
|
index_data_type = description[:index_data_type]
|
94
|
-
|
104
|
+
value_data_type = description[:value_data_type]
|
95
105
|
ordered = description[:ordered]
|
96
106
|
when 3
|
97
|
-
index_data_type,
|
107
|
+
index_data_type, value_data_type, ordered = args
|
98
108
|
else
|
99
109
|
message = "wrong number of arguments (given, #{n_args}, expected 1 or 3)"
|
100
110
|
raise ArgumentError, message
|
101
111
|
end
|
102
112
|
index_data_type = DataType.resolve(index_data_type)
|
103
|
-
|
113
|
+
value_data_type = DataType.resolve(value_data_type)
|
114
|
+
initialize_raw(index_data_type, value_data_type, ordered)
|
104
115
|
end
|
105
116
|
end
|
106
117
|
end
|
data/lib/arrow/table.rb
CHANGED
@@ -30,27 +30,154 @@ module Arrow
|
|
30
30
|
|
31
31
|
alias_method :initialize_raw, :initialize
|
32
32
|
private :initialize_raw
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
33
|
+
|
34
|
+
# Creates a new {Arrow::Table}.
|
35
|
+
#
|
36
|
+
# @overload initialize(columns)
|
37
|
+
#
|
38
|
+
# @param columns [::Array<Arrow::Column>] The columns of the table.
|
39
|
+
#
|
40
|
+
# @example Create a table from columns
|
41
|
+
# count_field = Arrow::Field.new("count", :uint32)
|
42
|
+
# count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
|
43
|
+
# count_column = Arrow::Column.new(count_field, count_array)
|
44
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
45
|
+
# visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
|
46
|
+
# visible_column = Arrow::Column.new(visible_field, visible_array)
|
47
|
+
# Arrow::Table.new([count_column, visible_column])
|
48
|
+
#
|
49
|
+
# @overload initialize(raw_table)
|
50
|
+
#
|
51
|
+
# @param raw_table [Hash<String, Arrow::Array>]
|
52
|
+
# The pairs of column name and values of the table. Column values is
|
53
|
+
# `Arrow::Array`.
|
54
|
+
#
|
55
|
+
# @example Create a table from column name and values
|
56
|
+
# Arrow::Table.new("count" => Arrow::UInt32Array.new([0, 2, nil, 4]),
|
57
|
+
# "visible" => Arrow::BooleanArray.new([true, nil, nil, false]))
|
58
|
+
#
|
59
|
+
# @overload initialize(raw_table)
|
60
|
+
#
|
61
|
+
# @param raw_table [Hash<String, Arrow::ChunkedArray>]
|
62
|
+
# The pairs of column name and values of the table. Column values is
|
63
|
+
# `Arrow::ChunkedArray`.
|
64
|
+
#
|
65
|
+
# @example Create a table from column name and values
|
66
|
+
# count_chunks = [
|
67
|
+
# Arrow::UInt32Array.new([0, 2]),
|
68
|
+
# Arrow::UInt32Array.new([nil, 4]),
|
69
|
+
# ]
|
70
|
+
# visible_chunks = [
|
71
|
+
# Arrow::BooleanArray.new([true]),
|
72
|
+
# Arrow::BooleanArray.new([nil, nil, false]),
|
73
|
+
# ]
|
74
|
+
# Arrow::Table.new("count" => Arrow::ChunkedArray.new(count_chunks),
|
75
|
+
# "visible" => Arrow::ChunkedArray.new(visible_chunks))
|
76
|
+
#
|
77
|
+
# @overload initialize(schema, columns)
|
78
|
+
#
|
79
|
+
# @param schema [Arrow::Schema] The schema of the table.
|
80
|
+
# You can also specify schema as primitive Ruby objects.
|
81
|
+
# See {Arrow::Schema#initialize} for details.
|
82
|
+
#
|
83
|
+
# @param columns [::Array<Arrow::Column>] The data of the table.
|
84
|
+
#
|
85
|
+
# @example Create a table from schema and columns
|
86
|
+
# count_field = Arrow::Field.new("count", :uint32)
|
87
|
+
# count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
|
88
|
+
# count_column = Arrow::Column.new(count_field, count_array)
|
89
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
90
|
+
# visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
|
91
|
+
# visible_column = Arrow::Column.new(visible_field, visible_array)
|
92
|
+
# Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]),
|
93
|
+
# [count_column, visible_column])
|
94
|
+
#
|
95
|
+
# @overload initialize(schema, arrays)
|
96
|
+
#
|
97
|
+
# @param schema [Arrow::Schema] The schema of the table.
|
98
|
+
# You can also specify schema as primitive Ruby objects.
|
99
|
+
# See {Arrow::Schema#initialize} for details.
|
100
|
+
#
|
101
|
+
# @param arrays [::Array<Arrow::Array>] The data of the table.
|
102
|
+
#
|
103
|
+
# @example Create a table from schema and arrays
|
104
|
+
# count_field = Arrow::Field.new("count", :uint32)
|
105
|
+
# count_array = Arrow::UInt32Array.new([0, 2, nil, 4])
|
106
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
107
|
+
# visible_array = Arrow::BooleanArray.new([true, nil, nil, false])
|
108
|
+
# Arrow::Table.new(Arrow::Schema.new([count_field, visible_field]),
|
109
|
+
# [count_array, visible_array])
|
110
|
+
#
|
111
|
+
# @overload initialize(schema, record_batches)
|
112
|
+
#
|
113
|
+
# @param schema [Arrow::Schema] The schema of the table.
|
114
|
+
# You can also specify schema as primitive Ruby objects.
|
115
|
+
# See {Arrow::Schema#initialize} for details.
|
116
|
+
#
|
117
|
+
# @param arrays [::Array<Arrow::RecordBatch>] The data of the table.
|
118
|
+
#
|
119
|
+
# @example Create a table from schema and record batches
|
120
|
+
# count_field = Arrow::Field.new("count", :uint32)
|
121
|
+
# visible_field = Arrow::Field.new("visible", :boolean)
|
122
|
+
# schema = Arrow::Schema.new([count_field, visible_field])
|
123
|
+
# record_batches = [
|
124
|
+
# Arrow::RecordBatch.new(schema, [[0, true], [2, nil], [nil, nil]]),
|
125
|
+
# Arrow::RecordBatch.new(schema, [[4, false]]),
|
126
|
+
# ]
|
127
|
+
# Arrow::Table.new(schema, record_batches)
|
128
|
+
#
|
129
|
+
# @overload initialize(schema, raw_records)
|
130
|
+
#
|
131
|
+
# @param schema [Arrow::Schema] The schema of the table.
|
132
|
+
# You can also specify schema as primitive Ruby objects.
|
133
|
+
# See {Arrow::Schema#initialize} for details.
|
134
|
+
#
|
135
|
+
# @param arrays [::Array<::Array>] The data of the table as primitive
|
136
|
+
# Ruby objects.
|
137
|
+
#
|
138
|
+
# @example Create a table from schema and raw records
|
139
|
+
# schema = {
|
140
|
+
# count: :uint32,
|
141
|
+
# visible: :boolean,
|
142
|
+
# }
|
143
|
+
# raw_records = [
|
144
|
+
# [0, true],
|
145
|
+
# [2, nil],
|
146
|
+
# [nil, nil],
|
147
|
+
# [4, false],
|
148
|
+
# ]
|
149
|
+
# Arrow::Table.new(schema, raw_records)
|
150
|
+
def initialize(*args)
|
151
|
+
n_args = args.size
|
152
|
+
case n_args
|
153
|
+
when 1
|
154
|
+
if args[0][0].is_a?(Column)
|
155
|
+
values = args[0]
|
156
|
+
fields = values.collect(&:field)
|
38
157
|
schema = Schema.new(fields)
|
39
158
|
else
|
40
|
-
raw_table =
|
159
|
+
raw_table = args[0]
|
41
160
|
fields = []
|
42
|
-
|
161
|
+
values = []
|
43
162
|
raw_table.each do |name, array|
|
44
163
|
field = Field.new(name.to_s, array.value_data_type)
|
45
164
|
fields << field
|
46
|
-
|
165
|
+
values << Column.new(field, array)
|
47
166
|
end
|
48
167
|
schema = Schema.new(fields)
|
49
168
|
end
|
169
|
+
when 2
|
170
|
+
schema = args[0]
|
171
|
+
schema = Schema.new(schema) unless schema.is_a?(Schema)
|
172
|
+
values = args[1]
|
173
|
+
if values[0].is_a?(::Array)
|
174
|
+
values = [RecordBatch.new(schema, values)]
|
175
|
+
end
|
50
176
|
else
|
51
|
-
|
177
|
+
message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
|
178
|
+
raise ArgumentError, message
|
52
179
|
end
|
53
|
-
initialize_raw(schema,
|
180
|
+
initialize_raw(schema, values)
|
54
181
|
end
|
55
182
|
|
56
183
|
def columns
|
@@ -71,43 +198,92 @@ module Arrow
|
|
71
198
|
|
72
199
|
alias_method :[], :find_column
|
73
200
|
|
74
|
-
|
201
|
+
alias_method :slice_raw, :slice
|
202
|
+
|
203
|
+
# @overload slice(offset, length)
|
75
204
|
#
|
76
|
-
#
|
205
|
+
# @param offset [Integer] The offset of sub Arrow::Table.
|
206
|
+
# @param length [Integer] The length of sub Arrow::Table.
|
207
|
+
# @return [Arrow::Table]
|
208
|
+
# The sub `Arrow::Table` that covers only from
|
209
|
+
# `offset` to `offset + length` range.
|
210
|
+
#
|
211
|
+
# @overload slice(index)
|
212
|
+
#
|
213
|
+
# @param index [Integer] The index in this table.
|
214
|
+
# @return [Arrow::Record]
|
215
|
+
# The `Arrow::Record` corresponding to index of
|
216
|
+
# the table.
|
217
|
+
#
|
218
|
+
# @overload slice(booleans)
|
219
|
+
#
|
220
|
+
# @param booleans [::Array<Boolean>]
|
221
|
+
# The values indicating the target rows.
|
222
|
+
# @return [Arrow::Table]
|
223
|
+
# The sub `Arrow::Table` that covers only rows of indices
|
224
|
+
# the values of `booleans` is true.
|
225
|
+
#
|
226
|
+
# @overload slice(boolean_array)
|
227
|
+
#
|
228
|
+
# @param boolean_array [::Array<Arrow::BooleanArray>]
|
229
|
+
# The values indicating the target rows.
|
230
|
+
# @return [Arrow::Table]
|
231
|
+
# The sub `Arrow::Table` that covers only rows of indices
|
232
|
+
# the values of `boolean_array` is true.
|
233
|
+
#
|
234
|
+
# @overload slice(range)
|
235
|
+
#
|
236
|
+
# @param range_included_end [Range] The range indicating the target rows.
|
237
|
+
# @return [Arrow::Table]
|
238
|
+
# The sub `Arrow::Table` that covers only rows of the range of indices.
|
239
|
+
#
|
240
|
+
# @overload slice
|
241
|
+
#
|
242
|
+
# @yield [slicer] Gives slicer that constructs condition to select records.
|
243
|
+
# @yieldparam slicer [Arrow::Slicer] The slicer that helps us to
|
244
|
+
# build condition.
|
245
|
+
# @yieldreturn [Arrow::Slicer::Condition, ::Array<Arrow::Slicer::Condition>]
|
246
|
+
# The condition to select records.
|
247
|
+
# @return [Arrow::Table]
|
248
|
+
# The sub `Arrow::Table` that covers only rows matched by condition
|
249
|
+
# specified by slicer.
|
77
250
|
def slice(*args)
|
78
251
|
slicers = []
|
79
|
-
expected_n_args = nil
|
80
|
-
case args.size
|
81
|
-
when 0
|
82
|
-
expected_n_args = "1..2" unless block_given?
|
83
|
-
when 1
|
84
|
-
slicers << args[0]
|
85
|
-
when 2
|
86
|
-
from, to = args
|
87
|
-
slicers << (from...(from + to))
|
88
|
-
else
|
89
|
-
if block_given?
|
90
|
-
expected_n_args = "0..2"
|
91
|
-
else
|
92
|
-
expected_n_args = "1..2"
|
93
|
-
end
|
94
|
-
end
|
95
|
-
if expected_n_args
|
96
|
-
message = "wrong number of arguments " +
|
97
|
-
"(given #{args.size}, expected #{expected_n_args})"
|
98
|
-
raise ArgumentError, message
|
99
|
-
end
|
100
|
-
|
101
252
|
if block_given?
|
253
|
+
unless args.empty?
|
254
|
+
raise ArgumentError, "must not specify both arguments and block"
|
255
|
+
end
|
102
256
|
block_slicer = yield(Slicer.new(self))
|
103
257
|
case block_slicer
|
104
|
-
when nil
|
105
|
-
# Ignore
|
106
258
|
when ::Array
|
107
259
|
slicers.concat(block_slicer)
|
108
260
|
else
|
109
261
|
slicers << block_slicer
|
110
262
|
end
|
263
|
+
else
|
264
|
+
expected_n_args = nil
|
265
|
+
case args.size
|
266
|
+
when 1
|
267
|
+
if args[0].is_a?(Integer)
|
268
|
+
index = args[0]
|
269
|
+
index += n_rows if index < 0
|
270
|
+
return nil if index < 0
|
271
|
+
return nil if index >= n_rows
|
272
|
+
return Record.new(self, index)
|
273
|
+
else
|
274
|
+
slicers << args[0]
|
275
|
+
end
|
276
|
+
when 2
|
277
|
+
offset, length = args
|
278
|
+
slicers << (offset...(offset + length))
|
279
|
+
else
|
280
|
+
expected_n_args = "1..2"
|
281
|
+
end
|
282
|
+
if expected_n_args
|
283
|
+
message = "wrong number of arguments " +
|
284
|
+
"(given #{args.size}, expected #{expected_n_args})"
|
285
|
+
raise ArgumentError, message
|
286
|
+
end
|
111
287
|
end
|
112
288
|
|
113
289
|
ranges = []
|
@@ -116,12 +292,18 @@ module Arrow
|
|
116
292
|
case slicer
|
117
293
|
when Integer
|
118
294
|
slicer += n_rows if slicer < 0
|
119
|
-
ranges << [slicer,
|
295
|
+
ranges << [slicer, n_rows - 1]
|
120
296
|
when Range
|
121
|
-
from = slicer.first
|
297
|
+
original_from = from = slicer.first
|
122
298
|
to = slicer.last
|
123
299
|
to -= 1 if slicer.exclude_end?
|
124
300
|
from += n_rows if from < 0
|
301
|
+
if from < 0 or from >= n_rows
|
302
|
+
message =
|
303
|
+
"offset is out of range (-#{n_rows + 1},#{n_rows}): " +
|
304
|
+
"#{original_from}"
|
305
|
+
raise ArgumentError, message
|
306
|
+
end
|
125
307
|
to += n_rows if to < 0
|
126
308
|
ranges << [from, to]
|
127
309
|
when ::Array
|
@@ -330,47 +512,16 @@ module Arrow
|
|
330
512
|
end
|
331
513
|
end
|
332
514
|
|
333
|
-
# TODO: Almost codes should be implemented in Apache Arrow C++.
|
334
515
|
def slice_by_ranges(ranges)
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
while offset + arrays.first.length - offset_in_array < from
|
344
|
-
offset += arrays.first.length - offset_in_array
|
345
|
-
arrays.shift
|
346
|
-
offset_in_array = 0
|
347
|
-
end
|
348
|
-
if offset < from
|
349
|
-
skipped_size = from - offset
|
350
|
-
offset += skipped_size
|
351
|
-
offset_in_array += skipped_size
|
352
|
-
end
|
353
|
-
array = arrays.first
|
354
|
-
array_length = array.length
|
355
|
-
rest_length = array_length - offset_in_array
|
356
|
-
if rest_length <= range_size
|
357
|
-
chunks << array.slice(offset_in_array, array_length)
|
358
|
-
offset += rest_length
|
359
|
-
range_size -= rest_length
|
360
|
-
offset_in_array = 0
|
361
|
-
arrays.shift
|
362
|
-
else
|
363
|
-
chunks << array.slice(offset_in_array, range_size)
|
364
|
-
offset += range_size
|
365
|
-
offset_in_array += range_size
|
366
|
-
range_size = 0
|
367
|
-
end
|
368
|
-
end
|
369
|
-
end
|
370
|
-
Column.new(column.field, ChunkedArray.new(chunks))
|
516
|
+
sliced_table = []
|
517
|
+
ranges.each do |from, to|
|
518
|
+
sliced_table << slice_raw(from, to - from + 1)
|
519
|
+
end
|
520
|
+
if sliced_table.size > 1
|
521
|
+
sliced_table[0].concatenate(sliced_table[1..-1])
|
522
|
+
else
|
523
|
+
sliced_table[0]
|
371
524
|
end
|
372
|
-
|
373
|
-
self.class.new(schema, sliced_columns)
|
374
525
|
end
|
375
526
|
|
376
527
|
def ensure_column(name, data)
|