duckdb 1.5.3.0 → 1.5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +52 -0
- data/ext/duckdb/aggregate_function.c +0 -1
- data/ext/duckdb/appender.c +17 -0
- data/ext/duckdb/arrow_array_stream.c +226 -0
- data/ext/duckdb/arrow_array_stream.h +61 -0
- data/ext/duckdb/arrow_import.c +165 -0
- data/ext/duckdb/arrow_import.h +6 -0
- data/ext/duckdb/blob.c +1 -1
- data/ext/duckdb/blob.h +1 -2
- data/ext/duckdb/config.c +1 -1
- data/ext/duckdb/config.h +1 -1
- data/ext/duckdb/connection.c +3 -3
- data/ext/duckdb/converter.h +1 -0
- data/ext/duckdb/conveter.c +39 -9
- data/ext/duckdb/data_chunk.c +10 -0
- data/ext/duckdb/data_chunk.h +1 -0
- data/ext/duckdb/duckdb.c +13 -11
- data/ext/duckdb/error.c +1 -1
- data/ext/duckdb/error.h +1 -3
- data/ext/duckdb/function_executor.c +308 -2
- data/ext/duckdb/function_executor.h +44 -0
- data/ext/duckdb/prepared_statement.c +21 -0
- data/ext/duckdb/result.c +49 -3
- data/ext/duckdb/result.h +11 -0
- data/ext/duckdb/ruby-duckdb.h +3 -0
- data/ext/duckdb/scalar_function.c +97 -29
- data/ext/duckdb/scalar_function.h +2 -4
- data/ext/duckdb/scalar_function_bind_info.c +13 -13
- data/ext/duckdb/scalar_function_bind_info.h +1 -1
- data/ext/duckdb/scalar_function_set.c +9 -9
- data/ext/duckdb/scalar_function_set.h +2 -2
- data/ext/duckdb/table_description.c +19 -19
- data/ext/duckdb/table_description.h +1 -1
- data/ext/duckdb/table_function.c +94 -28
- data/ext/duckdb/table_function.h +2 -2
- data/ext/duckdb/table_function_bind_info.c +20 -20
- data/ext/duckdb/table_function_bind_info.h +2 -2
- data/ext/duckdb/table_function_function_info.c +5 -5
- data/ext/duckdb/table_function_function_info.h +2 -2
- data/ext/duckdb/table_function_init_info.c +70 -5
- data/ext/duckdb/table_function_init_info.h +2 -2
- data/lib/duckdb/appender.rb +23 -0
- data/lib/duckdb/arrow_array_stream.rb +33 -0
- data/lib/duckdb/connection.rb +54 -0
- data/lib/duckdb/prepared_statement.rb +17 -0
- data/lib/duckdb/version.rb +1 -1
- data/lib/duckdb.rb +1 -0
- metadata +6 -1
data/ext/duckdb/table_function.c
CHANGED
|
@@ -11,16 +11,21 @@ static void deallocate(void *ctx);
|
|
|
11
11
|
static VALUE allocate(VALUE klass);
|
|
12
12
|
static size_t memsize(const void *p);
|
|
13
13
|
static void compact(void *ctx);
|
|
14
|
-
static VALUE
|
|
15
|
-
static VALUE
|
|
16
|
-
static VALUE
|
|
17
|
-
static VALUE
|
|
18
|
-
static VALUE
|
|
14
|
+
static VALUE table_function_initialize(VALUE self);
|
|
15
|
+
static VALUE table_function_set_name(VALUE self, VALUE name);
|
|
16
|
+
static VALUE table_function_add_parameter(VALUE self, VALUE logical_type);
|
|
17
|
+
static VALUE table_function_add_named_parameter(VALUE self, VALUE name, VALUE logical_type);
|
|
18
|
+
static VALUE table_function_bind(VALUE self);
|
|
19
19
|
static void table_function_bind_callback(duckdb_bind_info info);
|
|
20
|
-
static VALUE
|
|
20
|
+
static VALUE table_function_init(VALUE self);
|
|
21
21
|
static void table_function_init_callback(duckdb_init_info info);
|
|
22
|
-
static VALUE
|
|
22
|
+
static VALUE table_function_execute(VALUE self);
|
|
23
23
|
static void table_function_execute_callback(duckdb_function_info info, duckdb_data_chunk output);
|
|
24
|
+
#ifdef HAVE_DUCKDB_H_GE_V1_5_0
|
|
25
|
+
/* Thread detection (declared in function_executor.c); used to skip the proxy on Ruby threads. */
|
|
26
|
+
extern int ruby_native_thread_p(void);
|
|
27
|
+
static void table_function_local_init_callback(duckdb_init_info info);
|
|
28
|
+
#endif
|
|
24
29
|
|
|
25
30
|
static const rb_data_type_t table_function_data_type = {
|
|
26
31
|
"DuckDB/TableFunction",
|
|
@@ -84,7 +89,7 @@ static size_t memsize(const void *p) {
|
|
|
84
89
|
* tf.name = "my_function"
|
|
85
90
|
* # ... configure tf ...
|
|
86
91
|
*/
|
|
87
|
-
static VALUE
|
|
92
|
+
static VALUE table_function_initialize(VALUE self) {
|
|
88
93
|
rubyDuckDBTableFunction *ctx;
|
|
89
94
|
|
|
90
95
|
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
|
|
@@ -113,7 +118,7 @@ static VALUE duckdb_table_function_initialize(VALUE self) {
|
|
|
113
118
|
*
|
|
114
119
|
* tf.name = "my_function"
|
|
115
120
|
*/
|
|
116
|
-
static VALUE
|
|
121
|
+
static VALUE table_function_set_name(VALUE self, VALUE name) {
|
|
117
122
|
rubyDuckDBTableFunction *ctx;
|
|
118
123
|
const char *func_name;
|
|
119
124
|
|
|
@@ -138,7 +143,7 @@ static VALUE rbduckdb_table_function_set_name(VALUE self, VALUE name) {
|
|
|
138
143
|
* tf.add_parameter(DuckDB::LogicalType::BIGINT)
|
|
139
144
|
* tf.add_parameter(DuckDB::LogicalType::VARCHAR)
|
|
140
145
|
*/
|
|
141
|
-
static VALUE
|
|
146
|
+
static VALUE table_function_add_parameter(VALUE self, VALUE logical_type) {
|
|
142
147
|
rubyDuckDBTableFunction *ctx;
|
|
143
148
|
rubyDuckDBLogicalType *ctx_logical_type;
|
|
144
149
|
|
|
@@ -162,7 +167,7 @@ static VALUE rbduckdb_table_function_add_parameter(VALUE self, VALUE logical_typ
|
|
|
162
167
|
*
|
|
163
168
|
* tf.add_named_parameter("limit", DuckDB::LogicalType::BIGINT)
|
|
164
169
|
*/
|
|
165
|
-
static VALUE
|
|
170
|
+
static VALUE table_function_add_named_parameter(VALUE self, VALUE name, VALUE logical_type) {
|
|
166
171
|
rubyDuckDBTableFunction *ctx;
|
|
167
172
|
rubyDuckDBLogicalType *ctx_logical_type;
|
|
168
173
|
const char *param_name;
|
|
@@ -192,7 +197,7 @@ static VALUE rbduckdb_table_function_add_named_parameter(VALUE self, VALUE name,
|
|
|
192
197
|
* bind_info.add_result_column('name', DuckDB::LogicalType::VARCHAR)
|
|
193
198
|
* end
|
|
194
199
|
*/
|
|
195
|
-
static VALUE
|
|
200
|
+
static VALUE table_function_bind(VALUE self) {
|
|
196
201
|
rubyDuckDBTableFunction *ctx;
|
|
197
202
|
|
|
198
203
|
if (!rb_block_given_p()) {
|
|
@@ -231,7 +236,7 @@ static void execute_bind_callback_protected(void *user_data) {
|
|
|
231
236
|
int state = 0;
|
|
232
237
|
|
|
233
238
|
bind_info_obj = rb_class_new_instance(0, NULL, cDuckDBTableFunctionBindInfo);
|
|
234
|
-
bind_info_ctx =
|
|
239
|
+
bind_info_ctx = rbduckdb_get_struct_bind_info(bind_info_obj);
|
|
235
240
|
bind_info_ctx->bind_info = darg->info;
|
|
236
241
|
|
|
237
242
|
VALUE call_args[2] = { darg->ctx->bind_proc, bind_info_obj };
|
|
@@ -269,7 +274,7 @@ static void table_function_bind_callback(duckdb_bind_info info) {
|
|
|
269
274
|
* # Initialize execution state
|
|
270
275
|
* end
|
|
271
276
|
*/
|
|
272
|
-
static VALUE
|
|
277
|
+
static VALUE table_function_init(VALUE self) {
|
|
273
278
|
rubyDuckDBTableFunction *ctx;
|
|
274
279
|
|
|
275
280
|
if (!rb_block_given_p()) {
|
|
@@ -307,7 +312,7 @@ static void execute_init_callback_protected(void *user_data) {
|
|
|
307
312
|
int state = 0;
|
|
308
313
|
|
|
309
314
|
init_info_obj = rb_class_new_instance(0, NULL, cDuckDBTableFunctionInitInfo);
|
|
310
|
-
init_info_ctx =
|
|
315
|
+
init_info_ctx = rbduckdb_get_struct_init_info(init_info_obj);
|
|
311
316
|
init_info_ctx->info = darg->info;
|
|
312
317
|
|
|
313
318
|
VALUE call_args[2] = { darg->ctx->init_proc, init_info_obj };
|
|
@@ -347,7 +352,7 @@ static void table_function_init_callback(duckdb_init_info info) {
|
|
|
347
352
|
* # Write data...
|
|
348
353
|
* end
|
|
349
354
|
*/
|
|
350
|
-
static VALUE
|
|
355
|
+
static VALUE table_function_execute(VALUE self) {
|
|
351
356
|
rubyDuckDBTableFunction *ctx;
|
|
352
357
|
|
|
353
358
|
if (!rb_block_given_p()) {
|
|
@@ -358,6 +363,10 @@ static VALUE rbduckdb_table_function_set_execute(VALUE self) {
|
|
|
358
363
|
|
|
359
364
|
ctx->execute_proc = rb_block_proc();
|
|
360
365
|
duckdb_table_function_set_function(ctx->table_function, table_function_execute_callback);
|
|
366
|
+
#ifdef HAVE_DUCKDB_H_GE_V1_5_0
|
|
367
|
+
/* Per-worker proxy threads for the execute path (DuckDB >= 1.5.0). */
|
|
368
|
+
duckdb_table_function_set_local_init(ctx->table_function, table_function_local_init_callback);
|
|
369
|
+
#endif
|
|
361
370
|
|
|
362
371
|
rbduckdb_function_executor_ensure_started();
|
|
363
372
|
|
|
@@ -384,7 +393,7 @@ static void execute_execute_callback_protected(void *user_data) {
|
|
|
384
393
|
int state = 0;
|
|
385
394
|
|
|
386
395
|
func_info_obj = rb_class_new_instance(0, NULL, cDuckDBTableFunctionFunctionInfo);
|
|
387
|
-
func_info_ctx =
|
|
396
|
+
func_info_ctx = rbduckdb_get_struct_function_info(func_info_obj);
|
|
388
397
|
func_info_ctx->info = darg->info;
|
|
389
398
|
|
|
390
399
|
data_chunk_obj = rb_class_new_instance(0, NULL, cDuckDBDataChunk);
|
|
@@ -405,6 +414,7 @@ static void execute_execute_callback_protected(void *user_data) {
|
|
|
405
414
|
static void table_function_execute_callback(duckdb_function_info info, duckdb_data_chunk output) {
|
|
406
415
|
rubyDuckDBTableFunction *ctx;
|
|
407
416
|
struct execute_dispatch_arg darg;
|
|
417
|
+
struct worker_proxy *proxy = NULL;
|
|
408
418
|
|
|
409
419
|
ctx = (rubyDuckDBTableFunction *)duckdb_function_get_extra_info(info);
|
|
410
420
|
if (!ctx || ctx->execute_proc == Qnil) return;
|
|
@@ -413,28 +423,84 @@ static void table_function_execute_callback(duckdb_function_info info, duckdb_da
|
|
|
413
423
|
darg.info = info;
|
|
414
424
|
darg.output = output;
|
|
415
425
|
|
|
416
|
-
|
|
426
|
+
#ifdef HAVE_DUCKDB_H_GE_V1_5_0
|
|
427
|
+
/* On DuckDB >= 1.5.0 each worker thread carries its own proxy (see local_init). */
|
|
428
|
+
proxy = (struct worker_proxy *)duckdb_function_get_local_init_data(info);
|
|
429
|
+
#endif
|
|
430
|
+
rbduckdb_function_executor_dispatch_via_proxy(execute_execute_callback_protected, &darg, proxy);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
#ifdef HAVE_DUCKDB_H_GE_V1_5_0
|
|
434
|
+
/*
|
|
435
|
+
* Per-worker init for the execute path (DuckDB >= 1.5.0).
|
|
436
|
+
*
|
|
437
|
+
* DuckDB calls this once on each worker thread that will run the execute
|
|
438
|
+
* callback. We create a per-worker proxy (allocating its Ruby thread under the
|
|
439
|
+
* GVL via the global executor, since this runs on a non-Ruby thread) and store
|
|
440
|
+
* it as thread-local init data. The execute callback then dispatches through it
|
|
441
|
+
* instead of the shared global executor, so workers run callbacks concurrently.
|
|
442
|
+
* DuckDB invokes rbduckdb_worker_proxy_destroy when the local state is freed.
|
|
443
|
+
*/
|
|
444
|
+
struct create_proxy_callback_arg {
|
|
445
|
+
struct worker_proxy *proxy;
|
|
446
|
+
};
|
|
447
|
+
|
|
448
|
+
static VALUE create_proxy_callback(VALUE varg) {
|
|
449
|
+
struct create_proxy_callback_arg *arg = (struct create_proxy_callback_arg *)varg;
|
|
450
|
+
arg->proxy = rbduckdb_worker_proxy_create();
|
|
451
|
+
return Qnil;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/*
|
|
455
|
+
* rbduckdb_worker_proxy_create may raise (NoMemError, Thread.new failure),
|
|
456
|
+
* and the executor runs callbacks unprotected — a raise would longjmp past
|
|
457
|
+
* its done-signaling and block the waiting DuckDB worker forever. Swallow
|
|
458
|
+
* the exception instead: the proxy stays NULL, local_init sets no state, and
|
|
459
|
+
* the execute callback falls back to the global executor.
|
|
460
|
+
*/
|
|
461
|
+
static void create_proxy_callback_protected(void *user_data) {
|
|
462
|
+
int exception_state;
|
|
463
|
+
|
|
464
|
+
rb_protect(create_proxy_callback, (VALUE)user_data, &exception_state);
|
|
465
|
+
if (exception_state) {
|
|
466
|
+
rb_set_errinfo(Qnil);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
static void table_function_local_init_callback(duckdb_init_info info) {
|
|
471
|
+
struct create_proxy_callback_arg arg;
|
|
472
|
+
|
|
473
|
+
/* A Ruby calling thread runs the callback inline (Case 1/2); no proxy needed. */
|
|
474
|
+
if (ruby_native_thread_p()) return;
|
|
475
|
+
|
|
476
|
+
arg.proxy = NULL;
|
|
477
|
+
rbduckdb_function_executor_dispatch(create_proxy_callback_protected, &arg);
|
|
478
|
+
|
|
479
|
+
if (arg.proxy != NULL) {
|
|
480
|
+
duckdb_init_set_init_data(info, arg.proxy, rbduckdb_worker_proxy_destroy);
|
|
481
|
+
}
|
|
417
482
|
}
|
|
483
|
+
#endif
|
|
418
484
|
|
|
419
|
-
rubyDuckDBTableFunction *
|
|
485
|
+
rubyDuckDBTableFunction *rbduckdb_get_struct_table_function(VALUE self) {
|
|
420
486
|
rubyDuckDBTableFunction *ctx;
|
|
421
487
|
TypedData_Get_Struct(self, rubyDuckDBTableFunction, &table_function_data_type, ctx);
|
|
422
488
|
return ctx;
|
|
423
489
|
}
|
|
424
490
|
|
|
425
|
-
void
|
|
491
|
+
void rbduckdb_init_table_function(void) {
|
|
426
492
|
#if 0
|
|
427
493
|
VALUE mDuckDB = rb_define_module("DuckDB");
|
|
428
494
|
#endif
|
|
429
495
|
cDuckDBTableFunction = rb_define_class_under(mDuckDB, "TableFunction", rb_cObject);
|
|
430
496
|
rb_define_alloc_func(cDuckDBTableFunction, allocate);
|
|
431
497
|
|
|
432
|
-
rb_define_method(cDuckDBTableFunction, "initialize",
|
|
433
|
-
rb_define_method(cDuckDBTableFunction, "set_name",
|
|
434
|
-
rb_define_method(cDuckDBTableFunction, "name=",
|
|
435
|
-
rb_define_method(cDuckDBTableFunction, "add_parameter",
|
|
436
|
-
rb_define_method(cDuckDBTableFunction, "add_named_parameter",
|
|
437
|
-
rb_define_method(cDuckDBTableFunction, "bind",
|
|
438
|
-
rb_define_method(cDuckDBTableFunction, "init",
|
|
439
|
-
rb_define_method(cDuckDBTableFunction, "execute",
|
|
498
|
+
rb_define_method(cDuckDBTableFunction, "initialize", table_function_initialize, 0);
|
|
499
|
+
rb_define_method(cDuckDBTableFunction, "set_name", table_function_set_name, 1);
|
|
500
|
+
rb_define_method(cDuckDBTableFunction, "name=", table_function_set_name, 1);
|
|
501
|
+
rb_define_method(cDuckDBTableFunction, "add_parameter", table_function_add_parameter, 1);
|
|
502
|
+
rb_define_method(cDuckDBTableFunction, "add_named_parameter", table_function_add_named_parameter, 2);
|
|
503
|
+
rb_define_method(cDuckDBTableFunction, "bind", table_function_bind, 0);
|
|
504
|
+
rb_define_method(cDuckDBTableFunction, "init", table_function_init, 0);
|
|
505
|
+
rb_define_method(cDuckDBTableFunction, "execute", table_function_execute, 0);
|
|
440
506
|
}
|
data/ext/duckdb/table_function.h
CHANGED
|
@@ -11,7 +11,7 @@ struct _rubyDuckDBTableFunction {
|
|
|
11
11
|
typedef struct _rubyDuckDBTableFunction rubyDuckDBTableFunction;
|
|
12
12
|
|
|
13
13
|
extern VALUE cDuckDBTableFunction;
|
|
14
|
-
rubyDuckDBTableFunction *
|
|
15
|
-
void
|
|
14
|
+
rubyDuckDBTableFunction *rbduckdb_get_struct_table_function(VALUE self);
|
|
15
|
+
void rbduckdb_init_table_function(void);
|
|
16
16
|
|
|
17
17
|
#endif
|
|
@@ -5,12 +5,12 @@ VALUE cDuckDBTableFunctionBindInfo;
|
|
|
5
5
|
static void deallocate(void *ctx);
|
|
6
6
|
static VALUE allocate(VALUE klass);
|
|
7
7
|
static size_t memsize(const void *p);
|
|
8
|
-
static VALUE
|
|
9
|
-
static VALUE
|
|
10
|
-
static VALUE
|
|
11
|
-
static VALUE
|
|
12
|
-
static VALUE
|
|
13
|
-
static VALUE
|
|
8
|
+
static VALUE table_function_bind_info_parameter_count(VALUE self);
|
|
9
|
+
static VALUE table_function_bind_info_get_parameter(VALUE self, VALUE index);
|
|
10
|
+
static VALUE table_function_bind_info_get_named_parameter(VALUE self, VALUE name);
|
|
11
|
+
static VALUE table_function_bind_info__add_result_column(VALUE self, VALUE column_name, VALUE logical_type);
|
|
12
|
+
static VALUE table_function_bind_info_set_cardinality(VALUE self, VALUE cardinality, VALUE is_exact);
|
|
13
|
+
static VALUE table_function_bind_info_set_error(VALUE self, VALUE error);
|
|
14
14
|
|
|
15
15
|
static const rb_data_type_t bind_info_data_type = {
|
|
16
16
|
"DuckDB/TableFunctionBindInfo",
|
|
@@ -32,7 +32,7 @@ static size_t memsize(const void *p) {
|
|
|
32
32
|
return sizeof(rubyDuckDBBindInfo);
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
rubyDuckDBBindInfo *
|
|
35
|
+
rubyDuckDBBindInfo *rbduckdb_get_struct_bind_info(VALUE obj) {
|
|
36
36
|
rubyDuckDBBindInfo *ctx;
|
|
37
37
|
TypedData_Get_Struct(obj, rubyDuckDBBindInfo, &bind_info_data_type, ctx);
|
|
38
38
|
return ctx;
|
|
@@ -52,7 +52,7 @@ rubyDuckDBBindInfo *get_struct_bind_info(VALUE obj) {
|
|
|
52
52
|
*
|
|
53
53
|
* bind_info.parameter_count # => 2
|
|
54
54
|
*/
|
|
55
|
-
static VALUE
|
|
55
|
+
static VALUE table_function_bind_info_parameter_count(VALUE self) {
|
|
56
56
|
rubyDuckDBBindInfo *ctx;
|
|
57
57
|
idx_t count;
|
|
58
58
|
|
|
@@ -71,7 +71,7 @@ static VALUE rbduckdb_bind_info_parameter_count(VALUE self) {
|
|
|
71
71
|
*
|
|
72
72
|
* param = bind_info.get_parameter(0)
|
|
73
73
|
*/
|
|
74
|
-
static VALUE
|
|
74
|
+
static VALUE table_function_bind_info_get_parameter(VALUE self, VALUE index) {
|
|
75
75
|
rubyDuckDBBindInfo *ctx;
|
|
76
76
|
idx_t idx;
|
|
77
77
|
duckdb_value param_value;
|
|
@@ -97,7 +97,7 @@ static VALUE rbduckdb_bind_info_get_parameter(VALUE self, VALUE index) {
|
|
|
97
97
|
*
|
|
98
98
|
* param = bind_info.get_named_parameter('limit')
|
|
99
99
|
*/
|
|
100
|
-
static VALUE
|
|
100
|
+
static VALUE table_function_bind_info_get_named_parameter(VALUE self, VALUE name) {
|
|
101
101
|
rubyDuckDBBindInfo *ctx;
|
|
102
102
|
const char *param_name;
|
|
103
103
|
duckdb_value param_value;
|
|
@@ -129,7 +129,7 @@ static VALUE rbduckdb_bind_info_get_named_parameter(VALUE self, VALUE name) {
|
|
|
129
129
|
* bind_info.add_result_column('id', DuckDB::LogicalType::BIGINT)
|
|
130
130
|
* bind_info.add_result_column('name', DuckDB::LogicalType::VARCHAR)
|
|
131
131
|
*/
|
|
132
|
-
static VALUE
|
|
132
|
+
static VALUE table_function_bind_info__add_result_column(VALUE self, VALUE column_name, VALUE logical_type) {
|
|
133
133
|
rubyDuckDBBindInfo *ctx;
|
|
134
134
|
rubyDuckDBLogicalType *ctx_logical_type;
|
|
135
135
|
const char *col_name;
|
|
@@ -152,7 +152,7 @@ static VALUE rbduckdb_bind_info__add_result_column(VALUE self, VALUE column_name
|
|
|
152
152
|
* bind_info.set_cardinality(100, true) # Exactly 100 rows
|
|
153
153
|
* bind_info.set_cardinality(1000, false) # Approximately 1000 rows
|
|
154
154
|
*/
|
|
155
|
-
static VALUE
|
|
155
|
+
static VALUE table_function_bind_info_set_cardinality(VALUE self, VALUE cardinality, VALUE is_exact) {
|
|
156
156
|
rubyDuckDBBindInfo *ctx;
|
|
157
157
|
idx_t card;
|
|
158
158
|
bool exact;
|
|
@@ -175,7 +175,7 @@ static VALUE rbduckdb_bind_info_set_cardinality(VALUE self, VALUE cardinality, V
|
|
|
175
175
|
*
|
|
176
176
|
* bind_info.set_error('Invalid parameter value')
|
|
177
177
|
*/
|
|
178
|
-
static VALUE
|
|
178
|
+
static VALUE table_function_bind_info_set_error(VALUE self, VALUE error) {
|
|
179
179
|
rubyDuckDBBindInfo *ctx;
|
|
180
180
|
const char *error_msg;
|
|
181
181
|
|
|
@@ -187,18 +187,18 @@ static VALUE rbduckdb_bind_info_set_error(VALUE self, VALUE error) {
|
|
|
187
187
|
return self;
|
|
188
188
|
}
|
|
189
189
|
|
|
190
|
-
void
|
|
190
|
+
void rbduckdb_init_table_function_bind_info(void) {
|
|
191
191
|
#if 0
|
|
192
192
|
VALUE mDuckDB = rb_define_module("DuckDB");
|
|
193
193
|
#endif
|
|
194
194
|
cDuckDBTableFunctionBindInfo = rb_define_class_under(cDuckDBTableFunction, "BindInfo", rb_cObject);
|
|
195
195
|
rb_define_alloc_func(cDuckDBTableFunctionBindInfo, allocate);
|
|
196
196
|
|
|
197
|
-
rb_define_method(cDuckDBTableFunctionBindInfo, "parameter_count",
|
|
198
|
-
rb_define_method(cDuckDBTableFunctionBindInfo, "get_parameter",
|
|
199
|
-
rb_define_method(cDuckDBTableFunctionBindInfo, "get_named_parameter",
|
|
200
|
-
rb_define_method(cDuckDBTableFunctionBindInfo, "set_cardinality",
|
|
201
|
-
rb_define_method(cDuckDBTableFunctionBindInfo, "set_error",
|
|
197
|
+
rb_define_method(cDuckDBTableFunctionBindInfo, "parameter_count", table_function_bind_info_parameter_count, 0);
|
|
198
|
+
rb_define_method(cDuckDBTableFunctionBindInfo, "get_parameter", table_function_bind_info_get_parameter, 1);
|
|
199
|
+
rb_define_method(cDuckDBTableFunctionBindInfo, "get_named_parameter", table_function_bind_info_get_named_parameter, 1);
|
|
200
|
+
rb_define_method(cDuckDBTableFunctionBindInfo, "set_cardinality", table_function_bind_info_set_cardinality, 2);
|
|
201
|
+
rb_define_method(cDuckDBTableFunctionBindInfo, "set_error", table_function_bind_info_set_error, 1);
|
|
202
202
|
|
|
203
|
-
rb_define_private_method(cDuckDBTableFunctionBindInfo, "_add_result_column",
|
|
203
|
+
rb_define_private_method(cDuckDBTableFunctionBindInfo, "_add_result_column", table_function_bind_info__add_result_column, 2);
|
|
204
204
|
}
|
|
@@ -8,7 +8,7 @@ struct _rubyDuckDBBindInfo {
|
|
|
8
8
|
typedef struct _rubyDuckDBBindInfo rubyDuckDBBindInfo;
|
|
9
9
|
|
|
10
10
|
extern VALUE cDuckDBTableFunctionBindInfo;
|
|
11
|
-
rubyDuckDBBindInfo *
|
|
12
|
-
void
|
|
11
|
+
rubyDuckDBBindInfo *rbduckdb_get_struct_bind_info(VALUE obj);
|
|
12
|
+
void rbduckdb_init_table_function_bind_info(void);
|
|
13
13
|
|
|
14
14
|
#endif
|
|
@@ -5,7 +5,7 @@ VALUE cDuckDBTableFunctionFunctionInfo;
|
|
|
5
5
|
static void deallocate(void *ctx);
|
|
6
6
|
static VALUE allocate(VALUE klass);
|
|
7
7
|
static size_t memsize(const void *p);
|
|
8
|
-
static VALUE
|
|
8
|
+
static VALUE table_function_function_info_set_error(VALUE self, VALUE error);
|
|
9
9
|
|
|
10
10
|
static const rb_data_type_t function_info_data_type = {
|
|
11
11
|
"DuckDB/TableFunctionFunctionInfo",
|
|
@@ -27,7 +27,7 @@ static size_t memsize(const void *p) {
|
|
|
27
27
|
return sizeof(rubyDuckDBFunctionInfo);
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
rubyDuckDBFunctionInfo *
|
|
30
|
+
rubyDuckDBFunctionInfo *rbduckdb_get_struct_function_info(VALUE obj) {
|
|
31
31
|
rubyDuckDBFunctionInfo *ctx;
|
|
32
32
|
TypedData_Get_Struct(obj, rubyDuckDBFunctionInfo, &function_info_data_type, ctx);
|
|
33
33
|
return ctx;
|
|
@@ -42,7 +42,7 @@ rubyDuckDBFunctionInfo *get_struct_function_info(VALUE obj) {
|
|
|
42
42
|
*
|
|
43
43
|
* function_info.set_error('Invalid parameter value')
|
|
44
44
|
*/
|
|
45
|
-
static VALUE
|
|
45
|
+
static VALUE table_function_function_info_set_error(VALUE self, VALUE error) {
|
|
46
46
|
rubyDuckDBFunctionInfo *ctx;
|
|
47
47
|
const char *error_msg;
|
|
48
48
|
|
|
@@ -54,12 +54,12 @@ static VALUE rbduckdb_function_info_set_error(VALUE self, VALUE error) {
|
|
|
54
54
|
return self;
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
void
|
|
57
|
+
void rbduckdb_init_table_function_function_info(void) {
|
|
58
58
|
#if 0
|
|
59
59
|
VALUE mDuckDB = rb_define_module("DuckDB");
|
|
60
60
|
#endif
|
|
61
61
|
cDuckDBTableFunctionFunctionInfo = rb_define_class_under(cDuckDBTableFunction, "FunctionInfo", rb_cObject);
|
|
62
62
|
rb_define_alloc_func(cDuckDBTableFunctionFunctionInfo, allocate);
|
|
63
63
|
|
|
64
|
-
rb_define_method(cDuckDBTableFunctionFunctionInfo, "set_error",
|
|
64
|
+
rb_define_method(cDuckDBTableFunctionFunctionInfo, "set_error", table_function_function_info_set_error, 1);
|
|
65
65
|
}
|
|
@@ -7,7 +7,7 @@ struct _rubyDuckDBFunctionInfo {
|
|
|
7
7
|
|
|
8
8
|
typedef struct _rubyDuckDBFunctionInfo rubyDuckDBFunctionInfo;
|
|
9
9
|
|
|
10
|
-
rubyDuckDBFunctionInfo *
|
|
11
|
-
void
|
|
10
|
+
rubyDuckDBFunctionInfo *rbduckdb_get_struct_function_info(VALUE obj);
|
|
11
|
+
void rbduckdb_init_table_function_function_info(void);
|
|
12
12
|
|
|
13
13
|
#endif
|
|
@@ -5,7 +5,10 @@ VALUE cDuckDBTableFunctionInitInfo;
|
|
|
5
5
|
static void deallocate(void *ctx);
|
|
6
6
|
static VALUE allocate(VALUE klass);
|
|
7
7
|
static size_t memsize(const void *p);
|
|
8
|
-
static VALUE
|
|
8
|
+
static VALUE table_function_init_info_set_error(VALUE self, VALUE error);
|
|
9
|
+
static VALUE table_function_init_info_set_max_threads(VALUE self, VALUE max_threads);
|
|
10
|
+
static VALUE table_function_init_info_column_count(VALUE self);
|
|
11
|
+
static VALUE table_function_init_info_column_index(VALUE self, VALUE index);
|
|
9
12
|
|
|
10
13
|
static const rb_data_type_t init_info_data_type = {
|
|
11
14
|
"DuckDB/TableFunctionInitInfo",
|
|
@@ -27,7 +30,7 @@ static size_t memsize(const void *p) {
|
|
|
27
30
|
return sizeof(rubyDuckDBInitInfo);
|
|
28
31
|
}
|
|
29
32
|
|
|
30
|
-
rubyDuckDBInitInfo *
|
|
33
|
+
rubyDuckDBInitInfo *rbduckdb_get_struct_init_info(VALUE obj) {
|
|
31
34
|
rubyDuckDBInitInfo *ctx;
|
|
32
35
|
TypedData_Get_Struct(obj, rubyDuckDBInitInfo, &init_info_data_type, ctx);
|
|
33
36
|
return ctx;
|
|
@@ -42,7 +45,7 @@ rubyDuckDBInitInfo *get_struct_init_info(VALUE obj) {
|
|
|
42
45
|
*
|
|
43
46
|
* init_info.set_error('Invalid initialization')
|
|
44
47
|
*/
|
|
45
|
-
static VALUE
|
|
48
|
+
static VALUE table_function_init_info_set_error(VALUE self, VALUE error) {
|
|
46
49
|
rubyDuckDBInitInfo *ctx;
|
|
47
50
|
const char *error_msg;
|
|
48
51
|
|
|
@@ -54,12 +57,74 @@ static VALUE rbduckdb_init_info_set_error(VALUE self, VALUE error) {
|
|
|
54
57
|
return self;
|
|
55
58
|
}
|
|
56
59
|
|
|
57
|
-
|
|
60
|
+
/*
|
|
61
|
+
* call-seq:
|
|
62
|
+
* init_info.set_max_threads(max_threads) -> self
|
|
63
|
+
* init_info.max_threads = max_threads
|
|
64
|
+
*
|
|
65
|
+
* Sets the maximum number of threads that can execute the table function concurrently.
|
|
66
|
+
* This is a hint to DuckDB's scheduler; the actual number of threads is also bounded
|
|
67
|
+
* by the configured worker pool size (e.g., +SET threads+).
|
|
68
|
+
*
|
|
69
|
+
* init_info.max_threads = 4
|
|
70
|
+
*/
|
|
71
|
+
static VALUE table_function_init_info_set_max_threads(VALUE self, VALUE max_threads) {
|
|
72
|
+
rubyDuckDBInitInfo *ctx;
|
|
73
|
+
|
|
74
|
+
TypedData_Get_Struct(self, rubyDuckDBInitInfo, &init_info_data_type, ctx);
|
|
75
|
+
|
|
76
|
+
duckdb_init_set_max_threads(ctx->info, NUM2ULL(max_threads));
|
|
77
|
+
|
|
78
|
+
return self;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/*
|
|
82
|
+
* call-seq:
|
|
83
|
+
* init_info.column_count -> Integer
|
|
84
|
+
*
|
|
85
|
+
* Returns the number of projected result columns for this scan.
|
|
86
|
+
* Without projection pushdown this equals the number of result columns
|
|
87
|
+
* added in the bind callback.
|
|
88
|
+
*
|
|
89
|
+
* init_info.column_count # => 2
|
|
90
|
+
*/
|
|
91
|
+
static VALUE table_function_init_info_column_count(VALUE self) {
|
|
92
|
+
rubyDuckDBInitInfo *ctx;
|
|
93
|
+
|
|
94
|
+
TypedData_Get_Struct(self, rubyDuckDBInitInfo, &init_info_data_type, ctx);
|
|
95
|
+
|
|
96
|
+
return ULL2NUM(duckdb_init_get_column_count(ctx->info));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/*
|
|
100
|
+
* call-seq:
|
|
101
|
+
* init_info.column_index(index) -> Integer
|
|
102
|
+
*
|
|
103
|
+
* Returns the column index of the projected result column at +index+
|
|
104
|
+
* (0 <= +index+ < column_count). Without projection pushdown the projected
|
|
105
|
+
* columns mirror the columns added in the bind callback, so this returns
|
|
106
|
+
* +index+ itself.
|
|
107
|
+
*
|
|
108
|
+
* init_info.column_index(0) # => 0
|
|
109
|
+
*/
|
|
110
|
+
static VALUE table_function_init_info_column_index(VALUE self, VALUE index) {
|
|
111
|
+
rubyDuckDBInitInfo *ctx;
|
|
112
|
+
|
|
113
|
+
TypedData_Get_Struct(self, rubyDuckDBInitInfo, &init_info_data_type, ctx);
|
|
114
|
+
|
|
115
|
+
return ULL2NUM(duckdb_init_get_column_index(ctx->info, NUM2ULL(index)));
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
void rbduckdb_init_table_function_init_info(void) {
|
|
58
119
|
#if 0
|
|
59
120
|
VALUE mDuckDB = rb_define_module("DuckDB");
|
|
60
121
|
#endif
|
|
61
122
|
cDuckDBTableFunctionInitInfo = rb_define_class_under(cDuckDBTableFunction, "InitInfo", rb_cObject);
|
|
62
123
|
rb_define_alloc_func(cDuckDBTableFunctionInitInfo, allocate);
|
|
63
124
|
|
|
64
|
-
rb_define_method(cDuckDBTableFunctionInitInfo, "set_error",
|
|
125
|
+
rb_define_method(cDuckDBTableFunctionInitInfo, "set_error", table_function_init_info_set_error, 1);
|
|
126
|
+
rb_define_method(cDuckDBTableFunctionInitInfo, "set_max_threads", table_function_init_info_set_max_threads, 1);
|
|
127
|
+
rb_define_method(cDuckDBTableFunctionInitInfo, "max_threads=", table_function_init_info_set_max_threads, 1);
|
|
128
|
+
rb_define_method(cDuckDBTableFunctionInitInfo, "column_count", table_function_init_info_column_count, 0);
|
|
129
|
+
rb_define_method(cDuckDBTableFunctionInitInfo, "column_index", table_function_init_info_column_index, 1);
|
|
65
130
|
}
|
|
@@ -8,7 +8,7 @@ struct _rubyDuckDBInitInfo {
|
|
|
8
8
|
typedef struct _rubyDuckDBInitInfo rubyDuckDBInitInfo;
|
|
9
9
|
|
|
10
10
|
extern VALUE cDuckDBTableFunctionInitInfo;
|
|
11
|
-
rubyDuckDBInitInfo *
|
|
12
|
-
void
|
|
11
|
+
rubyDuckDBInitInfo *rbduckdb_get_struct_init_info(VALUE obj);
|
|
12
|
+
void rbduckdb_init_table_function_init_info(void);
|
|
13
13
|
|
|
14
14
|
#endif
|
data/lib/duckdb/appender.rb
CHANGED
|
@@ -558,6 +558,29 @@ module DuckDB
|
|
|
558
558
|
raise_appender_error('failed to append_uhugeint')
|
|
559
559
|
end
|
|
560
560
|
|
|
561
|
+
# :call-seq:
|
|
562
|
+
# appender.append_uuid(val) -> self
|
|
563
|
+
#
|
|
564
|
+
# Appends a UUID value to the current row in the appender.
|
|
565
|
+
# +val+ must be a String in canonical UUID format
|
|
566
|
+
# (<tt>xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx</tt>).
|
|
567
|
+
# Raises ArgumentError if +val+ is not a valid UUID string.
|
|
568
|
+
#
|
|
569
|
+
# require 'duckdb'
|
|
570
|
+
# db = DuckDB::Database.open
|
|
571
|
+
# con = db.connect
|
|
572
|
+
# con.query('CREATE TABLE uuids (id UUID)')
|
|
573
|
+
# appender = con.appender('uuids')
|
|
574
|
+
# appender
|
|
575
|
+
# .append_uuid('550e8400-e29b-41d4-a716-446655440000')
|
|
576
|
+
# .end_row
|
|
577
|
+
# .flush
|
|
578
|
+
def append_uuid(value)
|
|
579
|
+
return self if _append_uuid(value)
|
|
580
|
+
|
|
581
|
+
raise_appender_error('failed to append_uuid')
|
|
582
|
+
end
|
|
583
|
+
|
|
561
584
|
# call-seq:
|
|
562
585
|
# appender.append_date(val) -> self
|
|
563
586
|
#
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DuckDB
|
|
4
|
+
# The ArrowArrayStream class represents an exported Arrow C stream of a
|
|
5
|
+
# query result (Arrow C Data Interface). It is created by
|
|
6
|
+
# DuckDB::Result#arrow_c_stream and cannot be instantiated directly.
|
|
7
|
+
#
|
|
8
|
+
# The object satisfies the Ruby Arrow C stream protocol: #arrow_c_stream
|
|
9
|
+
# returns self and #to_i returns the address of the underlying
|
|
10
|
+
# <tt>struct ArrowArrayStream</tt>, so it can be consumed by ruby-polars,
|
|
11
|
+
# red-arrow and other Arrow consumers:
|
|
12
|
+
#
|
|
13
|
+
# result = con.query('SELECT * FROM users')
|
|
14
|
+
#
|
|
15
|
+
# # ruby-polars
|
|
16
|
+
# df = Polars::DataFrame.new(result)
|
|
17
|
+
#
|
|
18
|
+
# # red-arrow
|
|
19
|
+
# reader = Arrow::RecordBatchReader.import(result.arrow_c_stream.to_i)
|
|
20
|
+
#
|
|
21
|
+
# The consumer takes ownership of the stream's contents; a result can be
|
|
22
|
+
# exported only once.
|
|
23
|
+
#
|
|
24
|
+
# [EXPERIMENTAL] This API is built on DuckDB's unstable Arrow C API and
|
|
25
|
+
# may change in any minor release.
|
|
26
|
+
class ArrowArrayStream
|
|
27
|
+
class << self
|
|
28
|
+
def new
|
|
29
|
+
raise DuckDB::Error, 'DuckDB::ArrowArrayStream cannot be instantiated directly. Use DuckDB::Result#arrow_c_stream.'
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
data/lib/duckdb/connection.rb
CHANGED
|
@@ -383,8 +383,62 @@ module DuckDB
|
|
|
383
383
|
register_table_function(tf)
|
|
384
384
|
end
|
|
385
385
|
|
|
386
|
+
# [EXPERIMENTAL] Appends an Arrow producer into an existing table.
|
|
387
|
+
#
|
|
388
|
+
# Reads +producer+ (any object responding to +#arrow_c_stream+, such as a
|
|
389
|
+
# ruby-polars +DataFrame+ or a +DuckDB::Result+) as an Arrow C stream and
|
|
390
|
+
# appends its chunks into the existing table +table+. The producer's Arrow
|
|
391
|
+
# columns must line up with the table's columns positionally and by count.
|
|
392
|
+
# DuckDB casts compatible column types (e.g. INTEGER into a BIGINT column);
|
|
393
|
+
# a type that cannot be cast (e.g. a non-numeric VARCHAR into an INTEGER
|
|
394
|
+
# column) or a column-count mismatch raises +DuckDB::Error+.
|
|
395
|
+
#
|
|
396
|
+
# This is not transactional: a schema mismatch fails before any rows are
|
|
397
|
+
# written, but a rarer mid-stream failure can leave earlier chunks
|
|
398
|
+
# appended. Wrap the call in your own transaction for all-or-nothing.
|
|
399
|
+
#
|
|
400
|
+
# This API is built on DuckDB's unstable Arrow C API and may change in any
|
|
401
|
+
# minor release.
|
|
402
|
+
#
|
|
403
|
+
# @param table [String] the name of the existing target table
|
|
404
|
+
# @param producer [#arrow_c_stream] the Arrow producer
|
|
405
|
+
# @raise [TypeError] if +producer+ does not respond to +#arrow_c_stream+
|
|
406
|
+
# @return [Integer] the number of rows appended
|
|
407
|
+
#
|
|
408
|
+
# @example Load a Polars DataFrame into a table
|
|
409
|
+
# con.query('CREATE TABLE t (id INTEGER, name VARCHAR)')
|
|
410
|
+
# con.append_arrow('t', polars_df)
|
|
411
|
+
#
|
|
412
|
+
def append_arrow(table, producer)
|
|
413
|
+
unless producer.respond_to?(:arrow_c_stream)
|
|
414
|
+
raise TypeError, "Arrow producer must respond to #arrow_c_stream, got #{producer.class}"
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
stream = producer.arrow_c_stream # keep the producer's stream alive for the duration
|
|
418
|
+
address = stream.to_i
|
|
419
|
+
begin
|
|
420
|
+
append_arrow_chunks(table, address)
|
|
421
|
+
ensure
|
|
422
|
+
_arrow_release(address)
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
|
|
386
426
|
private
|
|
387
427
|
|
|
428
|
+
# Drives the Arrow stream at +address+ chunk by chunk into +table+,
|
|
429
|
+
# returning the number of rows appended.
|
|
430
|
+
def append_arrow_chunks(table, address)
|
|
431
|
+
converted_schema = _arrow_converted_schema(address)
|
|
432
|
+
rows = 0
|
|
433
|
+
appender(table) do |app|
|
|
434
|
+
while (chunk = _arrow_next_chunk(address, converted_schema))
|
|
435
|
+
rows += chunk.size
|
|
436
|
+
app.append_data_chunk(chunk)
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
rows
|
|
440
|
+
end
|
|
441
|
+
|
|
388
442
|
def run_appender_block(appender, &)
|
|
389
443
|
return appender unless block_given?
|
|
390
444
|
|