red-arrow 12.0.1 → 14.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d5ae05a0b4404f01e8d25b3678e9ea60dc74818def853dcb7c9d3dabd90da57
4
- data.tar.gz: bbbf89a3ad19f9179125ac6745310057bc005375d234e52269a6515e49bf62f7
3
+ metadata.gz: d76026332781cece25d5056c7b5eff1d650bfb8b794678ee16fe5e733412d39d
4
+ data.tar.gz: de610617cb966bca7b13b4e8e5d012f1d4ccb9c2e61e1e5db89a7af6f5d46cd4
5
5
  SHA512:
6
- metadata.gz: 70f7827329c39b553e8d3183e480f7ad2d2eb6c30857cb2454c6de48835ef73eae80673cab1fe824db0d79040a057ab6f56eb044d408a0bec414480678a4a3de
7
- data.tar.gz: 36070c98687420931d9d16881f3d011da043f07c0e88b0f973d641bc73449a59b13682ef1e817b74fadd5394cd5950b325bd5a91324a3043e93c2fcaaffb676d
6
+ metadata.gz: 13e0adf265f41336b40cea4e3d96be8a142d920c0a6a15bc0ce343d80408ca7c4220edbe8a4a91a2973fed4ea8229820cd762166e5a9ae4d0418ed3eb1c164c1
7
+ data.tar.gz: 9cf84a3bd5527a7d19dfaaf3aa3c922543ec6cfccdedb90bd721d68bec1fb4b9851df20e003df302a663bde921c48d15debf2c37dd0e82bde669382e6cf4343d
data/ext/arrow/arrow.cpp CHANGED
@@ -43,6 +43,26 @@ namespace red_arrow {
43
43
  VALUE month;
44
44
  VALUE nanosecond;
45
45
  }
46
+
47
+ void
48
+ record_batch_reader_mark(gpointer object)
49
+ {
50
+ auto reader = GARROW_RECORD_BATCH_READER(object);
51
+ auto sources = garrow_record_batch_reader_get_sources(reader);
52
+ for (auto source = sources; sources; sources = g_list_next(sources)) {
53
+ rbgobj_gc_mark_instance(source->data);
54
+ }
55
+ }
56
+
57
+ void
58
+ execute_plan_mark(gpointer object)
59
+ {
60
+ auto plan = GARROW_EXECUTE_PLAN(object);
61
+ auto nodes = garrow_execute_plan_get_nodes(plan);
62
+ for (auto node = nodes; nodes; nodes = g_list_next(nodes)) {
63
+ rbgobj_gc_mark_instance(node->data);
64
+ }
65
+ }
46
66
  }
47
67
 
48
68
  extern "C" void Init_arrow() {
@@ -62,11 +82,17 @@ extern "C" void Init_arrow() {
62
82
  rb_define_method(cArrowRecordBatch, "raw_records",
63
83
  reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
64
84
  0);
85
+ rb_define_method(cArrowRecordBatch, "each_raw_record",
86
+ reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_each_raw_record),
87
+ 0);
65
88
 
66
89
  auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
67
90
  rb_define_method(cArrowTable, "raw_records",
68
91
  reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
69
92
  0);
93
+ rb_define_method(cArrowTable, "each_raw_record",
94
+ reinterpret_cast<rb::RawMethod>(red_arrow::table_each_raw_record),
95
+ 0);
70
96
 
71
97
  red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
72
98
 
@@ -93,4 +119,9 @@ extern "C" void Init_arrow() {
93
119
  red_arrow::symbols::millisecond = ID2SYM(rb_intern("millisecond"));
94
120
  red_arrow::symbols::month = ID2SYM(rb_intern("month"));
95
121
  red_arrow::symbols::nanosecond = ID2SYM(rb_intern("nanosecond"));
122
+
123
+ rbgobj_register_mark_func(GARROW_TYPE_RECORD_BATCH_READER,
124
+ red_arrow::record_batch_reader_mark);
125
+ rbgobj_register_mark_func(GARROW_TYPE_EXECUTE_PLAN,
126
+ red_arrow::execute_plan_mark);
96
127
  }
data/ext/arrow/extconf.rb CHANGED
@@ -41,9 +41,8 @@ end
41
41
  unless required_pkg_config_package([
42
42
  "arrow",
43
43
  Arrow::Version::MAJOR,
44
- Arrow::Version::MINOR,
45
- Arrow::Version::MICRO,
46
44
  ],
45
+ conda: "libarrow",
47
46
  debian: "libarrow-dev",
48
47
  fedora: "libarrow-devel",
49
48
  homebrew: "apache-arrow",
@@ -58,6 +57,7 @@ unless required_pkg_config_package([
58
57
  Arrow::Version::MINOR,
59
58
  Arrow::Version::MICRO,
60
59
  ],
60
+ conda: "arrow-c-glib",
61
61
  debian: "libarrow-glib-dev",
62
62
  fedora: "libarrow-glib-devel",
63
63
  homebrew: "apache-arrow-glib",
@@ -144,6 +144,128 @@ namespace red_arrow {
144
144
  // The number of columns.
145
145
  const int n_columns_;
146
146
  };
147
+
148
+ class RawRecordsProducer : private Converter, public arrow::ArrayVisitor {
149
+ public:
150
+ explicit RawRecordsProducer()
151
+ : Converter(),
152
+ record_(Qnil),
153
+ column_index_(0),
154
+ row_offset_(0) {
155
+ }
156
+
157
+ void produce(const arrow::RecordBatch& record_batch) {
158
+ rb::protect([&] {
159
+ const auto n_columns = record_batch.num_columns();
160
+ const auto n_rows = record_batch.num_rows();
161
+ for (int64_t i = 0; i < n_rows; ++i) {
162
+ record_ = rb_ary_new_capa(n_columns);
163
+ row_offset_ = i;
164
+ for (int i = 0; i < n_columns; ++i) {
165
+ const auto array = record_batch.column(i).get();
166
+ column_index_ = i;
167
+ check_status(array->Accept(this),
168
+ "[record-batch][each-raw-record]");
169
+ }
170
+ rb_yield(record_);
171
+ }
172
+ return Qnil;
173
+ });
174
+ }
175
+
176
+ void produce(const arrow::Table& table) {
177
+ rb::protect([&] {
178
+ const auto n_columns = table.num_columns();
179
+ const auto n_rows = table.num_rows();
180
+ std::vector<int> chunk_indexes(n_columns);
181
+ std::vector<int64_t> row_offsets(n_columns);
182
+ for (int64_t i_row = 0; i_row < n_rows; ++i_row) {
183
+ record_ = rb_ary_new_capa(n_columns);
184
+ for (int i_column = 0; i_column < n_columns; ++i_column) {
185
+ column_index_ = i_column;
186
+ const auto chunked_array = table.column(i_column).get();
187
+ auto& chunk_index = chunk_indexes[i_column];
188
+ auto& row_offset = row_offsets[i_column];
189
+ auto array = chunked_array->chunk(chunk_index).get();
190
+ while (array->length() == row_offset) {
191
+ ++chunk_index;
192
+ row_offset = 0;
193
+ array = chunked_array->chunk(chunk_index).get();
194
+ }
195
+ row_offset_ = row_offset;
196
+ check_status(array->Accept(this),
197
+ "[table][each-raw-record]");
198
+ ++row_offset;
199
+ }
200
+ rb_yield(record_);
201
+ }
202
+
203
+ return Qnil;
204
+ });
205
+ }
206
+
207
+ #define VISIT(TYPE) \
208
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
209
+ convert(array); \
210
+ return arrow::Status::OK(); \
211
+ }
212
+
213
+ VISIT(Null)
214
+ VISIT(Boolean)
215
+ VISIT(Int8)
216
+ VISIT(Int16)
217
+ VISIT(Int32)
218
+ VISIT(Int64)
219
+ VISIT(UInt8)
220
+ VISIT(UInt16)
221
+ VISIT(UInt32)
222
+ VISIT(UInt64)
223
+ VISIT(HalfFloat)
224
+ VISIT(Float)
225
+ VISIT(Double)
226
+ VISIT(Binary)
227
+ VISIT(String)
228
+ VISIT(FixedSizeBinary)
229
+ VISIT(Date32)
230
+ VISIT(Date64)
231
+ VISIT(Time32)
232
+ VISIT(Time64)
233
+ VISIT(Timestamp)
234
+ VISIT(MonthInterval)
235
+ VISIT(DayTimeInterval)
236
+ VISIT(MonthDayNanoInterval)
237
+ VISIT(List)
238
+ VISIT(Struct)
239
+ VISIT(Map)
240
+ VISIT(SparseUnion)
241
+ VISIT(DenseUnion)
242
+ VISIT(Dictionary)
243
+ VISIT(Decimal128)
244
+ VISIT(Decimal256)
245
+ // TODO
246
+ // VISIT(Extension)
247
+
248
+ #undef VISIT
249
+
250
+ private:
251
+ template <typename ArrayType>
252
+ void convert(const ArrayType& array) {
253
+ auto value = Qnil;
254
+ if (!array.IsNull(row_offset_)) {
255
+ value = convert_value(array, row_offset_);
256
+ }
257
+ rb_ary_store(record_, column_index_, value);
258
+ }
259
+
260
+ // Destination for converted record.
261
+ VALUE record_;
262
+
263
+ // The current column index.
264
+ int column_index_;
265
+
266
+ // The current row offset.
267
+ int64_t row_offset_;
268
+ };
147
269
  }
148
270
 
149
271
  VALUE
@@ -181,4 +303,36 @@ namespace red_arrow {
181
303
 
182
304
  return records;
183
305
  }
306
+
307
+ VALUE
308
+ record_batch_each_raw_record(VALUE rb_record_batch) {
309
+ auto garrow_record_batch = GARROW_RECORD_BATCH(RVAL2GOBJ(rb_record_batch));
310
+ auto record_batch = garrow_record_batch_get_raw(garrow_record_batch).get();
311
+ RETURN_SIZED_ENUMERATOR(rb_record_batch, 0, nullptr, record_batch->num_rows());
312
+
313
+ try {
314
+ RawRecordsProducer producer;
315
+ producer.produce(*record_batch);
316
+ } catch (rb::State& state) {
317
+ state.jump();
318
+ }
319
+
320
+ return Qnil;
321
+ }
322
+
323
+ VALUE
324
+ table_each_raw_record(VALUE rb_table) {
325
+ auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table));
326
+ auto table = garrow_table_get_raw(garrow_table).get();
327
+ RETURN_SIZED_ENUMERATOR(rb_table, 0, nullptr, table->num_rows());
328
+
329
+ try {
330
+ RawRecordsProducer producer;
331
+ producer.produce(*table);
332
+ } catch (rb::State& state) {
333
+ state.jump();
334
+ }
335
+
336
+ return Qnil;
337
+ }
184
338
  }
@@ -59,6 +59,8 @@ namespace red_arrow {
59
59
 
60
60
  VALUE record_batch_raw_records(VALUE obj);
61
61
  VALUE table_raw_records(VALUE obj);
62
+ VALUE record_batch_each_raw_record(VALUE obj);
63
+ VALUE table_each_raw_record(VALUE obj);
62
64
 
63
65
  inline VALUE time_unit_to_scale(const arrow::TimeUnit::type unit) {
64
66
  switch (unit) {
@@ -29,7 +29,11 @@ module Arrow
29
29
  end
30
30
 
31
31
  def to_arrow_array
32
- combine
32
+ if n_chunks.zero?
33
+ value_data_type.build_array([])
34
+ else
35
+ combine
36
+ end
33
37
  end
34
38
 
35
39
  def to_arrow_chunked_array
@@ -31,10 +31,14 @@ module Arrow
31
31
  else
32
32
  return nil
33
33
  end
34
+ options = nil
34
35
  if arguments.last.is_a?(FunctionOptions)
35
36
  options = arguments.pop
36
- else
37
- options = nil
37
+ elsif arguments.last.is_a?(Hash)
38
+ function = Function.find(function_name)
39
+ if function
40
+ options = function.resolve_options(arguments.pop)
41
+ end
38
42
  end
39
43
  CallExpression.new(function_name, arguments, options)
40
44
  else
@@ -24,7 +24,6 @@ module Arrow
24
24
  end
25
25
  alias_method :call, :execute
26
26
 
27
- private
28
27
  def resolve_options(options)
29
28
  return nil if options.nil?
30
29
  return options if options.is_a?(FunctionOptions)
data/lib/arrow/slicer.rb CHANGED
@@ -189,7 +189,7 @@ module Arrow
189
189
  message =
190
190
  "pattern must be either String or Regexp: #{pattern.inspect}"
191
191
  raise ArgumentError, message
192
- end
192
+ end
193
193
  end
194
194
 
195
195
  def start_with?(substring, ignore_case: false)
data/lib/arrow/table.rb CHANGED
@@ -127,7 +127,7 @@ module Arrow
127
127
  # You can also specify schema as primitive Ruby objects.
128
128
  # See {Arrow::Schema#initialize} for details.
129
129
  #
130
- # @param arrays [::Array<Arrow::RecordBatch>] The data of the table.
130
+ # @param record_batches [::Array<Arrow::RecordBatch>] The data of the table.
131
131
  #
132
132
  # @example Create a table from schema and record batches
133
133
  # count_field = Arrow::Field.new("count", :uint32)
@@ -145,7 +145,7 @@ module Arrow
145
145
  # You can also specify schema as primitive Ruby objects.
146
146
  # See {Arrow::Schema#initialize} for details.
147
147
  #
148
- # @param arrays [::Array<::Array>] The data of the table as primitive
148
+ # @param raw_records [::Array<::Array>] The data of the table as primitive
149
149
  # Ruby objects.
150
150
  #
151
151
  # @example Create a table from schema and raw records
data/lib/arrow/version.rb CHANGED
@@ -16,7 +16,7 @@
16
16
  # under the License.
17
17
 
18
18
  module Arrow
19
- VERSION = "12.0.1"
19
+ VERSION = "14.0.0"
20
20
 
21
21
  module Version
22
22
  numbers, TAG = VERSION.split("-")
data/red-arrow.gemspec CHANGED
@@ -47,6 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.extensions = ["ext/arrow/extconf.rb"]
48
48
 
49
49
  spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
50
+ spec.add_runtime_dependency("csv")
50
51
  spec.add_runtime_dependency("extpp", ">= 0.1.1")
51
52
  spec.add_runtime_dependency("gio2", ">= 3.5.0")
52
53
  spec.add_runtime_dependency("native-package-installer")