duckdb 1.5.3.0 → 1.5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +52 -0
- data/ext/duckdb/aggregate_function.c +0 -1
- data/ext/duckdb/appender.c +17 -0
- data/ext/duckdb/arrow_array_stream.c +226 -0
- data/ext/duckdb/arrow_array_stream.h +61 -0
- data/ext/duckdb/arrow_import.c +165 -0
- data/ext/duckdb/arrow_import.h +6 -0
- data/ext/duckdb/blob.c +1 -1
- data/ext/duckdb/blob.h +1 -2
- data/ext/duckdb/config.c +1 -1
- data/ext/duckdb/config.h +1 -1
- data/ext/duckdb/connection.c +3 -3
- data/ext/duckdb/converter.h +1 -0
- data/ext/duckdb/conveter.c +39 -9
- data/ext/duckdb/data_chunk.c +10 -0
- data/ext/duckdb/data_chunk.h +1 -0
- data/ext/duckdb/duckdb.c +13 -11
- data/ext/duckdb/error.c +1 -1
- data/ext/duckdb/error.h +1 -3
- data/ext/duckdb/function_executor.c +308 -2
- data/ext/duckdb/function_executor.h +44 -0
- data/ext/duckdb/prepared_statement.c +21 -0
- data/ext/duckdb/result.c +49 -3
- data/ext/duckdb/result.h +11 -0
- data/ext/duckdb/ruby-duckdb.h +3 -0
- data/ext/duckdb/scalar_function.c +97 -29
- data/ext/duckdb/scalar_function.h +2 -4
- data/ext/duckdb/scalar_function_bind_info.c +13 -13
- data/ext/duckdb/scalar_function_bind_info.h +1 -1
- data/ext/duckdb/scalar_function_set.c +9 -9
- data/ext/duckdb/scalar_function_set.h +2 -2
- data/ext/duckdb/table_description.c +19 -19
- data/ext/duckdb/table_description.h +1 -1
- data/ext/duckdb/table_function.c +94 -28
- data/ext/duckdb/table_function.h +2 -2
- data/ext/duckdb/table_function_bind_info.c +20 -20
- data/ext/duckdb/table_function_bind_info.h +2 -2
- data/ext/duckdb/table_function_function_info.c +5 -5
- data/ext/duckdb/table_function_function_info.h +2 -2
- data/ext/duckdb/table_function_init_info.c +70 -5
- data/ext/duckdb/table_function_init_info.h +2 -2
- data/lib/duckdb/appender.rb +23 -0
- data/lib/duckdb/arrow_array_stream.rb +33 -0
- data/lib/duckdb/connection.rb +54 -0
- data/lib/duckdb/prepared_statement.rb +17 -0
- data/lib/duckdb/version.rb +1 -1
- data/lib/duckdb.rb +1 -0
- metadata +6 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c9c37c59f2e64ac10b72765250ec674ef40e82d83a7499a6c9e1117ee8e9e9d0
|
|
4
|
+
data.tar.gz: e9510873a0de444b35bb7c3de2d26f8d9a9ebcd01466a95383fa16d38d277e97
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2ab204dc5d6034b846d3267ca78fd4d47f75f000e3d6c97e5673ca9791618c84e7130230cd1ad7634858ff25fd5cc5952bac500263ef228225767a730684749c
|
|
7
|
+
data.tar.gz: f55e20a3dc6a97aea0606dca4b047746da44941c4f84145a48c45248d150fd58ef800ca617adc7921ae6418ac0f8275bf29e24405f3cdbf2e917377029b8641f
|
data/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
# Unreleased
|
|
6
6
|
|
|
7
|
+
# 1.5.4.0 - 2026-06-20
|
|
8
|
+
- bump up DuckDB 1.5.4 and 1.4.5 on CI.
|
|
9
|
+
- add experimental `DuckDB::Result#arrow_c_stream` returning `DuckDB::ArrowArrayStream` to export a query result as an Arrow C stream (Arrow C Data Interface). The stream can be consumed directly by ruby-polars (`Polars::DataFrame.new(result)`) and red-arrow (`Arrow::RecordBatchReader.import(stream.to_i)`).
|
|
10
|
+
- add experimental `DuckDB::Connection#append_arrow(table, producer)` to import an Arrow producer (any object responding to `#arrow_c_stream`, such as a Polars `DataFrame` or a `DuckDB::Result`) into an existing table, returning the number of rows appended.
|
|
11
|
+
- drop Ruby 3.2.
|
|
12
|
+
- add `DuckDB::TableFunction::InitInfo#max_threads=` (and `#set_max_threads`) to hint DuckDB how many worker threads can execute a custom table function concurrently.
|
|
13
|
+
- add `DuckDB::TableFunction::InitInfo#column_count` to get the number of projected result columns of a custom table function scan.
|
|
14
|
+
- add `DuckDB::TableFunction::InitInfo#column_index` to get the source column index of a given projected result column of a custom table function scan.
|
|
15
|
+
- add `DuckDB::Appender#append_uuid(value)` to append a UUID value to a `UUID` column. `value` must be a String in canonical UUID format (`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`).
|
|
16
|
+
- add `DuckDB::PreparedStatement#bind_uuid(index, value)` to bind a UUID parameter. `value` must be a String in canonical UUID format.
|
|
17
|
+
|
|
7
18
|
# 1.5.3.0 - 2026-05-24
|
|
8
19
|
- bump up DuckDB 1.5.3 on CI.
|
|
9
20
|
- add `DuckDB::AggregateFunctionSet` class and `DuckDB::Connection#register_aggregate_function_set` to register multiple overloads of a custom aggregate function under one SQL name.
|
data/README.md
CHANGED
|
@@ -266,6 +266,58 @@ res.first.first # => 4
|
|
|
266
266
|
|
|
267
267
|
Set `DuckDB.default_timezone` to control how TIMESTAMP and TIME values without time zone are converted to Ruby `Time` objects. The default is `:local`, but you can use `:utc` for UTC conversion.
|
|
268
268
|
|
|
269
|
+
### Arrow interop (experimental)
|
|
270
|
+
|
|
271
|
+
`DuckDB::Result#arrow_c_stream` exports a query result as an Arrow C stream
|
|
272
|
+
([Arrow C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)).
|
|
273
|
+
The returned `DuckDB::ArrowArrayStream` satisfies the Ruby Arrow C stream
|
|
274
|
+
protocol (`#arrow_c_stream` / `#to_i`), so query results can be handed to
|
|
275
|
+
Arrow consumers in columnar form, without converting each row to Ruby objects.
|
|
276
|
+
|
|
277
|
+
With [ruby-polars](https://github.com/ankane/ruby-polars), no glue code is needed:
|
|
278
|
+
|
|
279
|
+
```ruby
|
|
280
|
+
result = con.query('SELECT * FROM users')
|
|
281
|
+
df = Polars::DataFrame.new(result)
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
With [red-arrow](https://github.com/apache/arrow/tree/main/ruby/red-arrow),
|
|
285
|
+
pass the stream address to `Arrow::RecordBatchReader.import`:
|
|
286
|
+
|
|
287
|
+
```ruby
|
|
288
|
+
result = con.query('SELECT * FROM users')
|
|
289
|
+
reader = Arrow::RecordBatchReader.import(result.arrow_c_stream.to_i)
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
The consumer takes ownership of the stream's contents, so a result can be
|
|
293
|
+
exported only once; exporting the same result again raises `DuckDB::Error`.
|
|
294
|
+
|
|
295
|
+
In the other direction, `DuckDB::Connection#append_arrow` imports an Arrow
|
|
296
|
+
producer into an existing table. Any object responding to `#arrow_c_stream`
|
|
297
|
+
works as the producer — for example a Polars `DataFrame`, or another
|
|
298
|
+
`DuckDB::Result`:
|
|
299
|
+
|
|
300
|
+
```ruby
|
|
301
|
+
con.query('CREATE TABLE users (id BIGINT, name VARCHAR)')
|
|
302
|
+
rows = con.append_arrow('users', polars_df) # => number of rows appended
|
|
303
|
+
con.query('SELECT * FROM users').to_a
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
The producer's columns must line up with the table's columns by count and
|
|
307
|
+
position. DuckDB casts compatible column types (e.g. INTEGER into a BIGINT
|
|
308
|
+
column); a type that cannot be cast raises `DuckDB::Error`. `append_arrow` is
|
|
309
|
+
not transactional — wrap it in your own transaction if you need all-or-nothing.
|
|
310
|
+
|
|
311
|
+
These features are **experimental**: they are built on DuckDB's unstable Arrow
|
|
312
|
+
C API and may change in any minor release.
|
|
313
|
+
|
|
314
|
+
Note: [red-arrow-format](https://github.com/apache/arrow/tree/main/ruby/red-arrow-format)
|
|
315
|
+
(the pure-Ruby Arrow implementation) supports only the Arrow IPC
|
|
316
|
+
serialization format, not the C Data Interface. To exchange data with it,
|
|
317
|
+
write/read Arrow IPC files through DuckDB's
|
|
318
|
+
[arrow community extension](https://duckdb.org/community_extensions/extensions/arrow.html)
|
|
319
|
+
(`COPY ... TO 'data.arrows'` / `read_arrow(...)`).
|
|
320
|
+
|
|
269
321
|
## Versioning and DuckDB support
|
|
270
322
|
|
|
271
323
|
The first three digits of the gem version track the DuckDB release that the
|
|
@@ -737,7 +737,6 @@ void rbduckdb_init_aggregate_function(void) {
|
|
|
737
737
|
rb_define_alloc_func(cDuckDBAggregateFunction, allocate);
|
|
738
738
|
rb_define_method(cDuckDBAggregateFunction, "initialize", aggregate_function_initialize, 0);
|
|
739
739
|
rb_define_method(cDuckDBAggregateFunction, "set_name", aggregate_function_set_name, 1);
|
|
740
|
-
rb_define_method(cDuckDBAggregateFunction, "name=", aggregate_function_set_name, 1);
|
|
741
740
|
rb_define_private_method(cDuckDBAggregateFunction, "_set_return_type", aggregate_function__set_return_type, 1);
|
|
742
741
|
rb_define_private_method(cDuckDBAggregateFunction, "_add_parameter", aggregate_function__add_parameter, 1);
|
|
743
742
|
rb_define_private_method(cDuckDBAggregateFunction, "_set_init", aggregate_function__set_init, 0);
|
data/ext/duckdb/appender.c
CHANGED
|
@@ -35,6 +35,7 @@ static VALUE appender__append_time(VALUE self, VALUE hour, VALUE min, VALUE sec,
|
|
|
35
35
|
static VALUE appender__append_timestamp(VALUE self, VALUE year, VALUE month, VALUE day, VALUE hour, VALUE min, VALUE sec, VALUE micros);
|
|
36
36
|
static VALUE appender__append_hugeint(VALUE self, VALUE lower, VALUE upper);
|
|
37
37
|
static VALUE appender__append_uhugeint(VALUE self, VALUE lower, VALUE upper);
|
|
38
|
+
static VALUE appender__append_uuid(VALUE self, VALUE val);
|
|
38
39
|
static VALUE appender__append_value(VALUE self, VALUE val);
|
|
39
40
|
static VALUE appender__append_data_chunk(VALUE self, VALUE chunk);
|
|
40
41
|
static VALUE appender__append_default_to_chunk(VALUE self, VALUE chunk, VALUE col, VALUE row);
|
|
@@ -453,6 +454,21 @@ static VALUE appender__append_uhugeint(VALUE self, VALUE lower, VALUE upper) {
|
|
|
453
454
|
return state_to_rbool(duckdb_append_uhugeint(ctx->appender, uhugeint));
|
|
454
455
|
}
|
|
455
456
|
|
|
457
|
+
/* :nodoc: */
|
|
458
|
+
static VALUE appender__append_uuid(VALUE self, VALUE val) {
|
|
459
|
+
rubyDuckDBAppender *ctx;
|
|
460
|
+
duckdb_uhugeint uhugeint;
|
|
461
|
+
duckdb_value uuid_val;
|
|
462
|
+
duckdb_state state;
|
|
463
|
+
|
|
464
|
+
TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
|
|
465
|
+
rbduckdb_uuid_str_to_uhugeint(val, &uhugeint);
|
|
466
|
+
uuid_val = duckdb_create_uuid(uhugeint);
|
|
467
|
+
state = duckdb_append_value(ctx->appender, uuid_val);
|
|
468
|
+
duckdb_destroy_value(&uuid_val);
|
|
469
|
+
return state_to_rbool(state);
|
|
470
|
+
}
|
|
471
|
+
|
|
456
472
|
/* :nodoc: */
|
|
457
473
|
static VALUE appender__append_value(VALUE self, VALUE val) {
|
|
458
474
|
rubyDuckDBAppender *ctx;
|
|
@@ -578,6 +594,7 @@ void rbduckdb_init_appender(void) {
|
|
|
578
594
|
rb_define_private_method(cDuckDBAppender, "_append_timestamp", appender__append_timestamp, 7);
|
|
579
595
|
rb_define_private_method(cDuckDBAppender, "_append_hugeint", appender__append_hugeint, 2);
|
|
580
596
|
rb_define_private_method(cDuckDBAppender, "_append_uhugeint", appender__append_uhugeint, 2);
|
|
597
|
+
rb_define_private_method(cDuckDBAppender, "_append_uuid", appender__append_uuid, 1);
|
|
581
598
|
rb_define_private_method(cDuckDBAppender, "_append_value", appender__append_value, 1);
|
|
582
599
|
rb_define_private_method(cDuckDBAppender, "_append_data_chunk", appender__append_data_chunk, 1);
|
|
583
600
|
rb_define_private_method(cDuckDBAppender, "_append_default_to_chunk", appender__append_default_to_chunk, 3);
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#include "ruby-duckdb.h"
|
|
2
|
+
#include <errno.h>
|
|
3
|
+
|
|
4
|
+
static VALUE cDuckDBArrowArrayStream;
|
|
5
|
+
|
|
6
|
+
typedef struct {
|
|
7
|
+
struct ArrowArrayStream stream;
|
|
8
|
+
} rubyDuckDBArrowArrayStream;
|
|
9
|
+
|
|
10
|
+
/*
|
|
11
|
+
* Heap-allocated context referenced by stream.private_data. Consumers may
|
|
12
|
+
* move the stream struct contents out and keep using the callbacks after
|
|
13
|
+
* the Ruby DuckDB::ArrowArrayStream object is gone, so this context is
|
|
14
|
+
* freed only by the stream release callback, and it holds a reference on
|
|
15
|
+
* the result struct (rbduckdb_result_ref) until then. The release callback
|
|
16
|
+
* must not call any Ruby API: it can run during GC sweep (via deallocate
|
|
17
|
+
* of an unconsumed stream) or from a non-Ruby thread.
|
|
18
|
+
*/
|
|
19
|
+
typedef struct {
|
|
20
|
+
rubyDuckDBResult *presult_ctx;
|
|
21
|
+
duckdb_arrow_options arrow_options;
|
|
22
|
+
char *last_error;
|
|
23
|
+
} arrowArrayStreamContext;
|
|
24
|
+
|
|
25
|
+
static void deallocate(void *ctx);
|
|
26
|
+
static VALUE allocate(VALUE klass);
|
|
27
|
+
static size_t memsize(const void *p);
|
|
28
|
+
static VALUE arrow_array_stream_to_i(VALUE self);
|
|
29
|
+
static VALUE arrow_array_stream_arrow_c_stream(VALUE self);
|
|
30
|
+
static void stream_set_error(arrowArrayStreamContext *ctx, const char *msg);
|
|
31
|
+
static int stream_check_error(arrowArrayStreamContext *ctx, duckdb_error_data error_data);
|
|
32
|
+
static int stream_get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out);
|
|
33
|
+
static int stream_get_next(struct ArrowArrayStream *stream, struct ArrowArray *out);
|
|
34
|
+
static const char *stream_get_last_error(struct ArrowArrayStream *stream);
|
|
35
|
+
static void stream_release(struct ArrowArrayStream *stream);
|
|
36
|
+
|
|
37
|
+
static const rb_data_type_t arrow_array_stream_data_type = {
|
|
38
|
+
"DuckDB/ArrowArrayStream",
|
|
39
|
+
{NULL, deallocate, memsize,},
|
|
40
|
+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
static void deallocate(void *ctx) {
|
|
44
|
+
rubyDuckDBArrowArrayStream *p = (rubyDuckDBArrowArrayStream *)ctx;
|
|
45
|
+
|
|
46
|
+
if (p->stream.release != NULL) {
|
|
47
|
+
p->stream.release(&(p->stream));
|
|
48
|
+
}
|
|
49
|
+
xfree(p);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
static VALUE allocate(VALUE klass) {
|
|
53
|
+
rubyDuckDBArrowArrayStream *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBArrowArrayStream));
|
|
54
|
+
return TypedData_Wrap_Struct(klass, &arrow_array_stream_data_type, ctx);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
static size_t memsize(const void *p) {
|
|
58
|
+
return sizeof(rubyDuckDBArrowArrayStream);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/* Context memory is managed with plain malloc/free because the release
|
|
62
|
+
* callback may run outside Ruby's memory bookkeeping. */
|
|
63
|
+
static void stream_set_error(arrowArrayStreamContext *ctx, const char *msg) {
|
|
64
|
+
size_t len = strlen(msg) + 1;
|
|
65
|
+
|
|
66
|
+
free(ctx->last_error);
|
|
67
|
+
ctx->last_error = malloc(len);
|
|
68
|
+
if (ctx->last_error != NULL) {
|
|
69
|
+
memcpy(ctx->last_error, msg, len);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
static int stream_check_error(arrowArrayStreamContext *ctx, duckdb_error_data error_data) {
|
|
74
|
+
if (error_data == NULL) {
|
|
75
|
+
return 0;
|
|
76
|
+
}
|
|
77
|
+
if (!duckdb_error_data_has_error(error_data)) {
|
|
78
|
+
duckdb_destroy_error_data(&error_data);
|
|
79
|
+
return 0;
|
|
80
|
+
}
|
|
81
|
+
stream_set_error(ctx, duckdb_error_data_message(error_data));
|
|
82
|
+
duckdb_destroy_error_data(&error_data);
|
|
83
|
+
return EIO;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
static int stream_get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out) {
|
|
87
|
+
arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
|
|
88
|
+
duckdb_error_data error_data;
|
|
89
|
+
duckdb_logical_type *types;
|
|
90
|
+
const char **names;
|
|
91
|
+
idx_t column_count;
|
|
92
|
+
idx_t i;
|
|
93
|
+
|
|
94
|
+
column_count = duckdb_column_count(&(ctx->presult_ctx->result));
|
|
95
|
+
types = calloc((size_t)column_count, sizeof(duckdb_logical_type));
|
|
96
|
+
names = calloc((size_t)column_count, sizeof(const char *));
|
|
97
|
+
if (column_count > 0 && (types == NULL || names == NULL)) {
|
|
98
|
+
free(types);
|
|
99
|
+
free(names);
|
|
100
|
+
stream_set_error(ctx, "failed to allocate memory for Arrow schema conversion");
|
|
101
|
+
return ENOMEM;
|
|
102
|
+
}
|
|
103
|
+
for (i = 0; i < column_count; i++) {
|
|
104
|
+
types[i] = duckdb_column_logical_type(&(ctx->presult_ctx->result), i);
|
|
105
|
+
names[i] = duckdb_column_name(&(ctx->presult_ctx->result), i);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
error_data = duckdb_to_arrow_schema(ctx->arrow_options, types, names, column_count, out);
|
|
109
|
+
|
|
110
|
+
for (i = 0; i < column_count; i++) {
|
|
111
|
+
duckdb_destroy_logical_type(&types[i]);
|
|
112
|
+
}
|
|
113
|
+
free(types);
|
|
114
|
+
free(names);
|
|
115
|
+
return stream_check_error(ctx, error_data);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
static int stream_get_next(struct ArrowArrayStream *stream, struct ArrowArray *out) {
|
|
119
|
+
arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
|
|
120
|
+
duckdb_data_chunk chunk;
|
|
121
|
+
duckdb_error_data error_data;
|
|
122
|
+
|
|
123
|
+
chunk = duckdb_fetch_chunk(ctx->presult_ctx->result);
|
|
124
|
+
if (chunk == NULL) {
|
|
125
|
+
/* End of stream: a released (release == NULL) array. */
|
|
126
|
+
memset(out, 0, sizeof(struct ArrowArray));
|
|
127
|
+
return 0;
|
|
128
|
+
}
|
|
129
|
+
/* duckdb_data_chunk_to_arrow copies the chunk into Arrow-owned buffers,
|
|
130
|
+
* so the chunk can be destroyed right after conversion. */
|
|
131
|
+
error_data = duckdb_data_chunk_to_arrow(ctx->arrow_options, chunk, out);
|
|
132
|
+
duckdb_destroy_data_chunk(&chunk);
|
|
133
|
+
return stream_check_error(ctx, error_data);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
static const char *stream_get_last_error(struct ArrowArrayStream *stream) {
|
|
137
|
+
arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
|
|
138
|
+
|
|
139
|
+
return ctx == NULL ? NULL : ctx->last_error;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
static void stream_release(struct ArrowArrayStream *stream) {
|
|
143
|
+
arrowArrayStreamContext *ctx;
|
|
144
|
+
|
|
145
|
+
if (stream == NULL || stream->release == NULL) {
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
ctx = (arrowArrayStreamContext *)stream->private_data;
|
|
149
|
+
if (ctx != NULL) {
|
|
150
|
+
rbduckdb_result_unref(ctx->presult_ctx);
|
|
151
|
+
if (ctx->arrow_options != NULL) {
|
|
152
|
+
duckdb_destroy_arrow_options(&(ctx->arrow_options));
|
|
153
|
+
}
|
|
154
|
+
free(ctx->last_error);
|
|
155
|
+
free(ctx);
|
|
156
|
+
}
|
|
157
|
+
stream->private_data = NULL;
|
|
158
|
+
stream->release = NULL;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
VALUE rbduckdb_create_arrow_array_stream(VALUE oDuckDBResult) {
|
|
162
|
+
VALUE obj;
|
|
163
|
+
rubyDuckDBArrowArrayStream *p;
|
|
164
|
+
rubyDuckDBResult *presult_ctx;
|
|
165
|
+
arrowArrayStreamContext *ctx;
|
|
166
|
+
|
|
167
|
+
obj = allocate(cDuckDBArrowArrayStream);
|
|
168
|
+
TypedData_Get_Struct(obj, rubyDuckDBArrowArrayStream, &arrow_array_stream_data_type, p);
|
|
169
|
+
presult_ctx = rbduckdb_get_struct_result(oDuckDBResult);
|
|
170
|
+
|
|
171
|
+
ctx = calloc((size_t)1, sizeof(arrowArrayStreamContext));
|
|
172
|
+
if (ctx == NULL) {
|
|
173
|
+
rb_raise(rb_eNoMemError, "failed to allocate ArrowArrayStream context");
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
rbduckdb_result_ref(presult_ctx);
|
|
177
|
+
ctx->presult_ctx = presult_ctx;
|
|
178
|
+
ctx->arrow_options = duckdb_result_get_arrow_options(&(presult_ctx->result));
|
|
179
|
+
|
|
180
|
+
p->stream.get_schema = stream_get_schema;
|
|
181
|
+
p->stream.get_next = stream_get_next;
|
|
182
|
+
p->stream.get_last_error = stream_get_last_error;
|
|
183
|
+
p->stream.release = stream_release;
|
|
184
|
+
p->stream.private_data = ctx;
|
|
185
|
+
|
|
186
|
+
return obj;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/*
|
|
190
|
+
* call-seq:
|
|
191
|
+
* stream.to_i -> Integer
|
|
192
|
+
*
|
|
193
|
+
* Returns the address of the underlying C <code>struct ArrowArrayStream</code>.
|
|
194
|
+
* Arrow consumers such as red-arrow accept this address directly:
|
|
195
|
+
*
|
|
196
|
+
* reader = Arrow::RecordBatchReader.import(stream.to_i)
|
|
197
|
+
*/
|
|
198
|
+
static VALUE arrow_array_stream_to_i(VALUE self) {
|
|
199
|
+
rubyDuckDBArrowArrayStream *p;
|
|
200
|
+
|
|
201
|
+
TypedData_Get_Struct(self, rubyDuckDBArrowArrayStream, &arrow_array_stream_data_type, p);
|
|
202
|
+
return ULL2NUM((unsigned long long)(uintptr_t)&(p->stream));
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/*
|
|
206
|
+
* call-seq:
|
|
207
|
+
* stream.arrow_c_stream -> self
|
|
208
|
+
*
|
|
209
|
+
* Returns self. Defined so that the stream object itself satisfies the
|
|
210
|
+
* Arrow C stream protocol used by ruby-polars and others.
|
|
211
|
+
*/
|
|
212
|
+
static VALUE arrow_array_stream_arrow_c_stream(VALUE self) {
|
|
213
|
+
return self;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
void rbduckdb_init_arrow_array_stream(void) {
|
|
217
|
+
#if 0
|
|
218
|
+
VALUE mDuckDB = rb_define_module("DuckDB");
|
|
219
|
+
#endif
|
|
220
|
+
cDuckDBArrowArrayStream = rb_define_class_under(mDuckDB, "ArrowArrayStream", rb_cObject);
|
|
221
|
+
|
|
222
|
+
rb_define_alloc_func(cDuckDBArrowArrayStream, allocate);
|
|
223
|
+
|
|
224
|
+
rb_define_method(cDuckDBArrowArrayStream, "to_i", arrow_array_stream_to_i, 0);
|
|
225
|
+
rb_define_method(cDuckDBArrowArrayStream, "arrow_c_stream", arrow_array_stream_arrow_c_stream, 0);
|
|
226
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#ifndef RUBY_DUCKDB_ARROW_ARRAY_STREAM_H
|
|
2
|
+
#define RUBY_DUCKDB_ARROW_ARRAY_STREAM_H
|
|
3
|
+
|
|
4
|
+
/*
|
|
5
|
+
* Canonical Arrow C Data Interface and Arrow C Stream Interface definitions.
|
|
6
|
+
* https://arrow.apache.org/docs/format/CDataInterface.html
|
|
7
|
+
* duckdb.h only forward-declares these structs.
|
|
8
|
+
*/
|
|
9
|
+
#ifndef ARROW_C_DATA_INTERFACE
|
|
10
|
+
#define ARROW_C_DATA_INTERFACE
|
|
11
|
+
|
|
12
|
+
#define ARROW_FLAG_DICTIONARY_ORDERED 1
|
|
13
|
+
#define ARROW_FLAG_NULLABLE 2
|
|
14
|
+
#define ARROW_FLAG_MAP_KEYS_SORTED 4
|
|
15
|
+
|
|
16
|
+
struct ArrowSchema {
|
|
17
|
+
const char *format;
|
|
18
|
+
const char *name;
|
|
19
|
+
const char *metadata;
|
|
20
|
+
int64_t flags;
|
|
21
|
+
int64_t n_children;
|
|
22
|
+
struct ArrowSchema **children;
|
|
23
|
+
struct ArrowSchema *dictionary;
|
|
24
|
+
|
|
25
|
+
void (*release)(struct ArrowSchema *);
|
|
26
|
+
void *private_data;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
struct ArrowArray {
|
|
30
|
+
int64_t length;
|
|
31
|
+
int64_t null_count;
|
|
32
|
+
int64_t offset;
|
|
33
|
+
int64_t n_buffers;
|
|
34
|
+
int64_t n_children;
|
|
35
|
+
const void **buffers;
|
|
36
|
+
struct ArrowArray **children;
|
|
37
|
+
struct ArrowArray *dictionary;
|
|
38
|
+
|
|
39
|
+
void (*release)(struct ArrowArray *);
|
|
40
|
+
void *private_data;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
#endif /* ARROW_C_DATA_INTERFACE */
|
|
44
|
+
|
|
45
|
+
#ifndef ARROW_C_STREAM_INTERFACE
|
|
46
|
+
#define ARROW_C_STREAM_INTERFACE
|
|
47
|
+
|
|
48
|
+
struct ArrowArrayStream {
|
|
49
|
+
int (*get_schema)(struct ArrowArrayStream *, struct ArrowSchema *out);
|
|
50
|
+
int (*get_next)(struct ArrowArrayStream *, struct ArrowArray *out);
|
|
51
|
+
const char *(*get_last_error)(struct ArrowArrayStream *);
|
|
52
|
+
void (*release)(struct ArrowArrayStream *);
|
|
53
|
+
void *private_data;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
#endif /* ARROW_C_STREAM_INTERFACE */
|
|
57
|
+
|
|
58
|
+
void rbduckdb_init_arrow_array_stream(void);
|
|
59
|
+
VALUE rbduckdb_create_arrow_array_stream(VALUE oDuckDBResult);
|
|
60
|
+
|
|
61
|
+
#endif
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
#include "ruby-duckdb.h"
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* Internal helpers backing DuckDB::Connection#append_arrow. They consume an
|
|
5
|
+
* Arrow producer's struct ArrowArrayStream (given by its address) and convert
|
|
6
|
+
* each chunk into a DuckDB::DataChunk using DuckDB's unstable Arrow C API. The
|
|
7
|
+
* Ruby layer owns the loop, the appender lifecycle, and error handling; these
|
|
8
|
+
* primitives only do the raw-pointer / C-API work.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
static VALUE cDuckDBArrowConvertedSchema;
|
|
12
|
+
|
|
13
|
+
typedef struct {
|
|
14
|
+
duckdb_arrow_converted_schema converted_schema;
|
|
15
|
+
} rubyDuckDBArrowConvertedSchema;
|
|
16
|
+
|
|
17
|
+
static void deallocate(void *ctx);
|
|
18
|
+
static VALUE allocate(VALUE klass);
|
|
19
|
+
static size_t memsize(const void *p);
|
|
20
|
+
static void raise_error_data(duckdb_error_data error_data);
|
|
21
|
+
|
|
22
|
+
static VALUE connection__arrow_converted_schema(VALUE self, VALUE address);
|
|
23
|
+
static VALUE connection__arrow_next_chunk(VALUE self, VALUE address, VALUE converted);
|
|
24
|
+
static VALUE connection__arrow_release(VALUE self, VALUE address);
|
|
25
|
+
|
|
26
|
+
static const rb_data_type_t arrow_converted_schema_data_type = {
|
|
27
|
+
"DuckDB/ArrowConvertedSchema",
|
|
28
|
+
{NULL, deallocate, memsize,},
|
|
29
|
+
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
static void deallocate(void *ctx) {
|
|
33
|
+
rubyDuckDBArrowConvertedSchema *p = (rubyDuckDBArrowConvertedSchema *)ctx;
|
|
34
|
+
|
|
35
|
+
if (p->converted_schema) {
|
|
36
|
+
duckdb_destroy_arrow_converted_schema(&(p->converted_schema));
|
|
37
|
+
}
|
|
38
|
+
xfree(p);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
static VALUE allocate(VALUE klass) {
|
|
42
|
+
rubyDuckDBArrowConvertedSchema *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBArrowConvertedSchema));
|
|
43
|
+
return TypedData_Wrap_Struct(klass, &arrow_converted_schema_data_type, ctx);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
static size_t memsize(const void *p) {
|
|
47
|
+
return sizeof(rubyDuckDBArrowConvertedSchema);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
static void raise_error_data(duckdb_error_data error_data) {
|
|
51
|
+
VALUE message;
|
|
52
|
+
|
|
53
|
+
if (error_data == NULL) {
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
if (!duckdb_error_data_has_error(error_data)) {
|
|
57
|
+
duckdb_destroy_error_data(&error_data);
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
message = rb_str_new_cstr(duckdb_error_data_message(error_data));
|
|
61
|
+
duckdb_destroy_error_data(&error_data);
|
|
62
|
+
rb_raise(eDuckDBError, "%s", StringValueCStr(message));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
static struct ArrowArrayStream *stream_from_address(VALUE address) {
|
|
66
|
+
return (struct ArrowArrayStream *)(uintptr_t)NUM2ULL(address);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/* :nodoc: */
|
|
70
|
+
static VALUE connection__arrow_converted_schema(VALUE self, VALUE address) {
|
|
71
|
+
rubyDuckDBConnection *ctx;
|
|
72
|
+
struct ArrowArrayStream *stream;
|
|
73
|
+
struct ArrowSchema schema;
|
|
74
|
+
duckdb_arrow_converted_schema converted_schema = NULL;
|
|
75
|
+
duckdb_error_data error_data;
|
|
76
|
+
VALUE obj;
|
|
77
|
+
rubyDuckDBArrowConvertedSchema *schema_ctx;
|
|
78
|
+
int rc;
|
|
79
|
+
|
|
80
|
+
ctx = rbduckdb_get_struct_connection(self);
|
|
81
|
+
stream = stream_from_address(address);
|
|
82
|
+
if (stream == NULL) {
|
|
83
|
+
rb_raise(eDuckDBError, "Arrow producer returned a NULL stream");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
memset(&schema, 0, sizeof(schema));
|
|
87
|
+
rc = stream->get_schema(stream, &schema);
|
|
88
|
+
if (rc != 0) {
|
|
89
|
+
const char *err = stream->get_last_error(stream);
|
|
90
|
+
rb_raise(eDuckDBError, "failed to get Arrow schema: %s", err ? err : "unknown error");
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
error_data = duckdb_schema_from_arrow(ctx->con, &schema, &converted_schema);
|
|
94
|
+
if (schema.release != NULL) {
|
|
95
|
+
schema.release(&schema);
|
|
96
|
+
}
|
|
97
|
+
raise_error_data(error_data);
|
|
98
|
+
|
|
99
|
+
obj = allocate(cDuckDBArrowConvertedSchema);
|
|
100
|
+
TypedData_Get_Struct(obj, rubyDuckDBArrowConvertedSchema, &arrow_converted_schema_data_type, schema_ctx);
|
|
101
|
+
schema_ctx->converted_schema = converted_schema;
|
|
102
|
+
return obj;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/* :nodoc: */
|
|
106
|
+
static VALUE connection__arrow_next_chunk(VALUE self, VALUE address, VALUE converted) {
|
|
107
|
+
rubyDuckDBConnection *ctx;
|
|
108
|
+
rubyDuckDBArrowConvertedSchema *schema_ctx;
|
|
109
|
+
struct ArrowArrayStream *stream;
|
|
110
|
+
struct ArrowArray array;
|
|
111
|
+
duckdb_data_chunk chunk = NULL;
|
|
112
|
+
duckdb_error_data error_data;
|
|
113
|
+
int rc;
|
|
114
|
+
|
|
115
|
+
ctx = rbduckdb_get_struct_connection(self);
|
|
116
|
+
TypedData_Get_Struct(converted, rubyDuckDBArrowConvertedSchema, &arrow_converted_schema_data_type, schema_ctx);
|
|
117
|
+
stream = stream_from_address(address);
|
|
118
|
+
|
|
119
|
+
memset(&array, 0, sizeof(array));
|
|
120
|
+
rc = stream->get_next(stream, &array);
|
|
121
|
+
if (rc != 0) {
|
|
122
|
+
const char *err = stream->get_last_error(stream);
|
|
123
|
+
rb_raise(eDuckDBError, "failed to get next Arrow chunk: %s", err ? err : "unknown error");
|
|
124
|
+
}
|
|
125
|
+
/* End of stream: a released array (release == NULL). */
|
|
126
|
+
if (array.release == NULL) {
|
|
127
|
+
return Qnil;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/* duckdb_data_chunk_from_arrow takes ownership of the array (nulls its
|
|
131
|
+
* release). On error before that, we still own it and must release it. */
|
|
132
|
+
error_data = duckdb_data_chunk_from_arrow(ctx->con, &array, schema_ctx->converted_schema, &chunk);
|
|
133
|
+
if (error_data != NULL && duckdb_error_data_has_error(error_data)) {
|
|
134
|
+
if (array.release != NULL) {
|
|
135
|
+
array.release(&array);
|
|
136
|
+
}
|
|
137
|
+
raise_error_data(error_data);
|
|
138
|
+
} else if (error_data != NULL) {
|
|
139
|
+
duckdb_destroy_error_data(&error_data);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return rbduckdb_create_data_chunk(chunk, true);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/* :nodoc: */
|
|
146
|
+
static VALUE connection__arrow_release(VALUE self, VALUE address) {
|
|
147
|
+
struct ArrowArrayStream *stream = stream_from_address(address);
|
|
148
|
+
|
|
149
|
+
if (stream != NULL && stream->release != NULL) {
|
|
150
|
+
stream->release(stream);
|
|
151
|
+
}
|
|
152
|
+
return Qnil;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
void rbduckdb_init_arrow_import(void) {
|
|
156
|
+
#if 0
|
|
157
|
+
VALUE mDuckDB = rb_define_module("DuckDB");
|
|
158
|
+
#endif
|
|
159
|
+
cDuckDBArrowConvertedSchema = rb_define_class_under(mDuckDB, "ArrowConvertedSchema", rb_cObject);
|
|
160
|
+
rb_define_alloc_func(cDuckDBArrowConvertedSchema, allocate);
|
|
161
|
+
|
|
162
|
+
rb_define_private_method(cDuckDBConnection, "_arrow_converted_schema", connection__arrow_converted_schema, 1);
|
|
163
|
+
rb_define_private_method(cDuckDBConnection, "_arrow_next_chunk", connection__arrow_next_chunk, 2);
|
|
164
|
+
rb_define_private_method(cDuckDBConnection, "_arrow_release", connection__arrow_release, 1);
|
|
165
|
+
}
|
data/ext/duckdb/blob.c
CHANGED
data/ext/duckdb/blob.h
CHANGED
data/ext/duckdb/config.c
CHANGED
data/ext/duckdb/config.h
CHANGED
data/ext/duckdb/connection.c
CHANGED
|
@@ -231,7 +231,7 @@ static VALUE connection__register_scalar_function(VALUE self, VALUE scalar_funct
|
|
|
231
231
|
duckdb_state state;
|
|
232
232
|
|
|
233
233
|
ctxcon = rbduckdb_get_struct_connection(self);
|
|
234
|
-
ctxsf =
|
|
234
|
+
ctxsf = rbduckdb_get_struct_scalar_function(scalar_function);
|
|
235
235
|
|
|
236
236
|
state = duckdb_register_scalar_function(ctxcon->con, ctxsf->scalar_function);
|
|
237
237
|
|
|
@@ -252,7 +252,7 @@ static VALUE connection__register_scalar_function_set(VALUE self, VALUE scalar_f
|
|
|
252
252
|
duckdb_state state;
|
|
253
253
|
|
|
254
254
|
ctxcon = rbduckdb_get_struct_connection(self);
|
|
255
|
-
ctxsfs =
|
|
255
|
+
ctxsfs = rbduckdb_get_struct_scalar_function_set(scalar_function_set);
|
|
256
256
|
|
|
257
257
|
state = duckdb_register_scalar_function_set(ctxcon->con, ctxsfs->scalar_function_set);
|
|
258
258
|
|
|
@@ -314,7 +314,7 @@ static VALUE connection__register_table_function(VALUE self, VALUE table_functio
|
|
|
314
314
|
duckdb_state state;
|
|
315
315
|
|
|
316
316
|
ctxcon = rbduckdb_get_struct_connection(self);
|
|
317
|
-
ctxtf =
|
|
317
|
+
ctxtf = rbduckdb_get_struct_table_function(table_function);
|
|
318
318
|
|
|
319
319
|
state = duckdb_register_table_function(ctxcon->con, ctxtf->table_function);
|
|
320
320
|
|
data/ext/duckdb/converter.h
CHANGED
|
@@ -19,6 +19,7 @@ extern ID id__decimal_to_unscaled;
|
|
|
19
19
|
VALUE rbduckdb_uuid_to_ruby(duckdb_hugeint h);
|
|
20
20
|
VALUE rbduckdb_uuid_uhugeint_to_ruby(duckdb_uhugeint h);
|
|
21
21
|
void rbduckdb_uuid_str_to_hugeint(VALUE uuid_str, duckdb_hugeint *out);
|
|
22
|
+
void rbduckdb_uuid_str_to_uhugeint(VALUE uuid_str, duckdb_uhugeint *out);
|
|
22
23
|
VALUE rbduckdb_interval_to_ruby(duckdb_interval i);
|
|
23
24
|
VALUE rbduckdb_hugeint_to_ruby(duckdb_hugeint h);
|
|
24
25
|
VALUE rbduckdb_uhugeint_to_ruby(duckdb_uhugeint h);
|