duckdb 1.5.2.1 → 1.5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -0
- data/README.md +52 -0
- data/ext/duckdb/aggregate_function.c +1 -1
- data/ext/duckdb/aggregate_function_set.c +86 -0
- data/ext/duckdb/aggregate_function_set.h +14 -0
- data/ext/duckdb/appender.c +62 -4
- data/ext/duckdb/arrow_array_stream.c +226 -0
- data/ext/duckdb/arrow_array_stream.h +61 -0
- data/ext/duckdb/arrow_import.c +165 -0
- data/ext/duckdb/arrow_import.h +6 -0
- data/ext/duckdb/blob.c +1 -1
- data/ext/duckdb/blob.h +1 -2
- data/ext/duckdb/config.c +1 -1
- data/ext/duckdb/config.h +1 -1
- data/ext/duckdb/connection.c +26 -3
- data/ext/duckdb/converter.h +1 -0
- data/ext/duckdb/conveter.c +39 -9
- data/ext/duckdb/data_chunk.c +10 -0
- data/ext/duckdb/data_chunk.h +1 -0
- data/ext/duckdb/duckdb.c +14 -11
- data/ext/duckdb/error.c +1 -1
- data/ext/duckdb/error.h +1 -3
- data/ext/duckdb/extconf.rb +28 -13
- data/ext/duckdb/function_executor.c +308 -2
- data/ext/duckdb/function_executor.h +44 -0
- data/ext/duckdb/prepared_statement.c +38 -0
- data/ext/duckdb/result.c +49 -53
- data/ext/duckdb/result.h +11 -0
- data/ext/duckdb/ruby-duckdb.h +4 -0
- data/ext/duckdb/scalar_function.c +97 -29
- data/ext/duckdb/scalar_function.h +2 -4
- data/ext/duckdb/scalar_function_bind_info.c +13 -13
- data/ext/duckdb/scalar_function_bind_info.h +1 -1
- data/ext/duckdb/scalar_function_set.c +9 -9
- data/ext/duckdb/scalar_function_set.h +2 -2
- data/ext/duckdb/table_description.c +19 -19
- data/ext/duckdb/table_description.h +1 -1
- data/ext/duckdb/table_function.c +94 -28
- data/ext/duckdb/table_function.h +2 -2
- data/ext/duckdb/table_function_bind_info.c +20 -20
- data/ext/duckdb/table_function_bind_info.h +2 -2
- data/ext/duckdb/table_function_function_info.c +5 -5
- data/ext/duckdb/table_function_function_info.h +2 -2
- data/ext/duckdb/table_function_init_info.c +70 -5
- data/ext/duckdb/table_function_init_info.h +2 -2
- data/lib/duckdb/aggregate_function.rb +7 -1
- data/lib/duckdb/aggregate_function_set.rb +29 -0
- data/lib/duckdb/appender.rb +97 -0
- data/lib/duckdb/arrow_array_stream.rb +33 -0
- data/lib/duckdb/connection.rb +139 -9
- data/lib/duckdb/prepared_statement.rb +35 -0
- data/lib/duckdb/result.rb +39 -2
- data/lib/duckdb/scalar_function.rb +9 -4
- data/lib/duckdb/scalar_function_set.rb +0 -1
- data/lib/duckdb/table_description.rb +7 -0
- data/lib/duckdb/table_name_parser.rb +58 -0
- data/lib/duckdb/version.rb +1 -1
- data/lib/duckdb.rb +3 -0
- metadata +11 -2
|
@@ -68,6 +68,14 @@ static struct callback_request *g_request_list = NULL;
|
|
|
68
68
|
static VALUE g_executor_thread = Qnil;
|
|
69
69
|
static int g_executor_started = 0;
|
|
70
70
|
|
|
71
|
+
/*
|
|
72
|
+
* GC-protection array holding every live per-worker proxy Ruby thread.
|
|
73
|
+
* Proxies are created from non-Ruby init hooks (via the global executor) and
|
|
74
|
+
* are not reachable from any marked object, so without this array the GC could
|
|
75
|
+
* collect a proxy thread while DuckDB still dispatches callbacks to it.
|
|
76
|
+
*/
|
|
77
|
+
static VALUE g_proxy_threads = Qnil;
|
|
78
|
+
|
|
71
79
|
/* Data passed to the executor wait function */
|
|
72
80
|
struct executor_wait_data {
|
|
73
81
|
struct callback_request *request;
|
|
@@ -166,6 +174,11 @@ void rbduckdb_function_executor_ensure_started(void) {
|
|
|
166
174
|
}
|
|
167
175
|
#endif
|
|
168
176
|
|
|
177
|
+
if (g_proxy_threads == Qnil) {
|
|
178
|
+
g_proxy_threads = rb_ary_new();
|
|
179
|
+
rb_global_variable(&g_proxy_threads);
|
|
180
|
+
}
|
|
181
|
+
|
|
169
182
|
g_executor_thread = rb_thread_create(executor_thread_func, NULL);
|
|
170
183
|
rb_global_variable(&g_executor_thread);
|
|
171
184
|
g_executor_started = 1;
|
|
@@ -242,7 +255,293 @@ static void *callback_with_gvl(void *data) {
|
|
|
242
255
|
return NULL;
|
|
243
256
|
}
|
|
244
257
|
|
|
245
|
-
|
|
258
|
+
/*
|
|
259
|
+
* ============================================================================
|
|
260
|
+
* Per-worker proxy thread
|
|
261
|
+
* ============================================================================
|
|
262
|
+
*
|
|
263
|
+
* One dedicated Ruby thread per DuckDB worker thread. Same hand-off protocol as
|
|
264
|
+
* the global executor (mutex + condvars), but private to a single worker so
|
|
265
|
+
* that callbacks from different workers no longer serialize through one queue.
|
|
266
|
+
*
|
|
267
|
+
* Pattern follows the FFI gem's async callback dispatcher:
|
|
268
|
+
* https://github.com/ffi/ffi/blob/master/ext/ffi_c/Function.c
|
|
269
|
+
*/
|
|
270
|
+
struct worker_proxy {
|
|
271
|
+
VALUE ruby_thread;
|
|
272
|
+
volatile int stop_requested;
|
|
273
|
+
rbduckdb_function_callback_t cb;
|
|
274
|
+
void *user_data;
|
|
275
|
+
volatile int has_request;
|
|
276
|
+
volatile int request_done;
|
|
277
|
+
volatile int thread_exited;
|
|
278
|
+
#ifdef _MSC_VER
|
|
279
|
+
CRITICAL_SECTION lock;
|
|
280
|
+
CONDITION_VARIABLE request_cond;
|
|
281
|
+
CONDITION_VARIABLE request_done_cond;
|
|
282
|
+
CONDITION_VARIABLE thread_exit_cond;
|
|
283
|
+
#else
|
|
284
|
+
pthread_mutex_t lock;
|
|
285
|
+
pthread_cond_t request_cond;
|
|
286
|
+
pthread_cond_t request_done_cond;
|
|
287
|
+
pthread_cond_t thread_exit_cond;
|
|
288
|
+
#endif
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
/* Runs without GVL: the proxy waits for a callback request */
|
|
292
|
+
static void *proxy_wait_func(void *data) {
|
|
293
|
+
struct worker_proxy *proxy = (struct worker_proxy *)data;
|
|
294
|
+
|
|
295
|
+
#ifdef _MSC_VER
|
|
296
|
+
EnterCriticalSection(&proxy->lock);
|
|
297
|
+
while (!proxy->stop_requested && !proxy->has_request) {
|
|
298
|
+
SleepConditionVariableCS(&proxy->request_cond, &proxy->lock, INFINITE);
|
|
299
|
+
}
|
|
300
|
+
LeaveCriticalSection(&proxy->lock);
|
|
301
|
+
#else
|
|
302
|
+
pthread_mutex_lock(&proxy->lock);
|
|
303
|
+
while (!proxy->stop_requested && !proxy->has_request) {
|
|
304
|
+
pthread_cond_wait(&proxy->request_cond, &proxy->lock);
|
|
305
|
+
}
|
|
306
|
+
pthread_mutex_unlock(&proxy->lock);
|
|
307
|
+
#endif
|
|
308
|
+
|
|
309
|
+
return NULL;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/* Unblock function for the proxy thread (VM shutdown or Thread#kill) */
|
|
313
|
+
static void proxy_stop_func(void *data) {
|
|
314
|
+
struct worker_proxy *proxy = (struct worker_proxy *)data;
|
|
315
|
+
|
|
316
|
+
#ifdef _MSC_VER
|
|
317
|
+
EnterCriticalSection(&proxy->lock);
|
|
318
|
+
proxy->stop_requested = 1;
|
|
319
|
+
WakeConditionVariable(&proxy->request_cond);
|
|
320
|
+
LeaveCriticalSection(&proxy->lock);
|
|
321
|
+
#else
|
|
322
|
+
pthread_mutex_lock(&proxy->lock);
|
|
323
|
+
proxy->stop_requested = 1;
|
|
324
|
+
pthread_cond_signal(&proxy->request_cond);
|
|
325
|
+
pthread_mutex_unlock(&proxy->lock);
|
|
326
|
+
#endif
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/* The proxy thread main loop. Runs as the body of rb_ensure (see below). */
|
|
330
|
+
static VALUE proxy_loop_body(VALUE data) {
|
|
331
|
+
struct worker_proxy *proxy = (struct worker_proxy *)data;
|
|
332
|
+
|
|
333
|
+
while (!proxy->stop_requested) {
|
|
334
|
+
/* Release the GVL and wait for a request */
|
|
335
|
+
rb_thread_call_without_gvl(proxy_wait_func, proxy, proxy_stop_func, proxy);
|
|
336
|
+
|
|
337
|
+
if (proxy->stop_requested) break;
|
|
338
|
+
|
|
339
|
+
if (proxy->has_request) {
|
|
340
|
+
/* Execute the callback with the GVL held */
|
|
341
|
+
proxy->cb(proxy->user_data);
|
|
342
|
+
|
|
343
|
+
/* Signal completion to the DuckDB worker thread */
|
|
344
|
+
#ifdef _MSC_VER
|
|
345
|
+
EnterCriticalSection(&proxy->lock);
|
|
346
|
+
proxy->has_request = 0;
|
|
347
|
+
proxy->request_done = 1;
|
|
348
|
+
WakeConditionVariable(&proxy->request_done_cond);
|
|
349
|
+
LeaveCriticalSection(&proxy->lock);
|
|
350
|
+
#else
|
|
351
|
+
pthread_mutex_lock(&proxy->lock);
|
|
352
|
+
proxy->has_request = 0;
|
|
353
|
+
proxy->request_done = 1;
|
|
354
|
+
pthread_cond_signal(&proxy->request_done_cond);
|
|
355
|
+
pthread_mutex_unlock(&proxy->lock);
|
|
356
|
+
#endif
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
return Qnil;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/*
|
|
364
|
+
* Teardown for the proxy thread. Run via rb_ensure so it executes even if an
|
|
365
|
+
* async exception (Thread#kill, VM shutdown via rb_thread_terminate_all)
|
|
366
|
+
* unwinds proxy_loop_body. If it were skipped, thread_exited would stay 0
|
|
367
|
+
* forever and rbduckdb_worker_proxy_destroy's join would deadlock.
|
|
368
|
+
*/
|
|
369
|
+
static VALUE proxy_cleanup(VALUE data) {
|
|
370
|
+
struct worker_proxy *proxy = (struct worker_proxy *)data;
|
|
371
|
+
|
|
372
|
+
/* Stop being GC-protected now that we are about to exit */
|
|
373
|
+
if (g_proxy_threads != Qnil) {
|
|
374
|
+
rb_ary_delete(g_proxy_threads, proxy->ruby_thread);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/*
|
|
378
|
+
* Signal that this thread has finished and no longer touches the proxy
|
|
379
|
+
* struct. Only after this may rbduckdb_worker_proxy_destroy free it.
|
|
380
|
+
*/
|
|
381
|
+
#ifdef _MSC_VER
|
|
382
|
+
EnterCriticalSection(&proxy->lock);
|
|
383
|
+
proxy->thread_exited = 1;
|
|
384
|
+
WakeConditionVariable(&proxy->thread_exit_cond);
|
|
385
|
+
LeaveCriticalSection(&proxy->lock);
|
|
386
|
+
#else
|
|
387
|
+
pthread_mutex_lock(&proxy->lock);
|
|
388
|
+
proxy->thread_exited = 1;
|
|
389
|
+
pthread_cond_signal(&proxy->thread_exit_cond);
|
|
390
|
+
pthread_mutex_unlock(&proxy->lock);
|
|
391
|
+
#endif
|
|
392
|
+
|
|
393
|
+
return Qnil;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/* The proxy thread entry point (Ruby thread). */
|
|
397
|
+
static VALUE proxy_thread_func(void *data) {
|
|
398
|
+
return rb_ensure(proxy_loop_body, (VALUE)data, proxy_cleanup, (VALUE)data);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
struct worker_proxy *rbduckdb_worker_proxy_create(void) {
|
|
402
|
+
/*
|
|
403
|
+
* Use calloc (not xcalloc): rbduckdb_worker_proxy_destroy frees the struct
|
|
404
|
+
* from a non-Ruby thread where xfree is unsafe.
|
|
405
|
+
*/
|
|
406
|
+
struct worker_proxy *proxy = calloc(1, sizeof(struct worker_proxy));
|
|
407
|
+
if (proxy == NULL) {
|
|
408
|
+
rb_raise(rb_eNoMemError, "failed to allocate worker_proxy");
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
proxy->stop_requested = 0;
|
|
412
|
+
proxy->has_request = 0;
|
|
413
|
+
proxy->request_done = 0;
|
|
414
|
+
proxy->thread_exited = 0;
|
|
415
|
+
|
|
416
|
+
#ifdef _MSC_VER
|
|
417
|
+
InitializeCriticalSection(&proxy->lock);
|
|
418
|
+
InitializeConditionVariable(&proxy->request_cond);
|
|
419
|
+
InitializeConditionVariable(&proxy->request_done_cond);
|
|
420
|
+
InitializeConditionVariable(&proxy->thread_exit_cond);
|
|
421
|
+
#else
|
|
422
|
+
pthread_mutex_init(&proxy->lock, NULL);
|
|
423
|
+
pthread_cond_init(&proxy->request_cond, NULL);
|
|
424
|
+
pthread_cond_init(&proxy->request_done_cond, NULL);
|
|
425
|
+
pthread_cond_init(&proxy->thread_exit_cond, NULL);
|
|
426
|
+
#endif
|
|
427
|
+
|
|
428
|
+
/*
|
|
429
|
+
* Lazy-init the GC-protection array so create never silently skips it (see
|
|
430
|
+
* the g_proxy_threads comment above); create runs with the GVL, so safe.
|
|
431
|
+
*/
|
|
432
|
+
if (g_proxy_threads == Qnil) {
|
|
433
|
+
g_proxy_threads = rb_ary_new();
|
|
434
|
+
rb_global_variable(&g_proxy_threads);
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
proxy->ruby_thread = rb_thread_create(proxy_thread_func, proxy);
|
|
438
|
+
rb_ary_push(g_proxy_threads, proxy->ruby_thread);
|
|
439
|
+
|
|
440
|
+
return proxy;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/*
|
|
444
|
+
* Hand a callback to a proxy and block until it completes.
|
|
445
|
+
* Called from the DuckDB worker thread (non-Ruby thread) that owns this proxy.
|
|
446
|
+
*/
|
|
447
|
+
static void dispatch_callback_to_proxy(struct worker_proxy *proxy, rbduckdb_function_callback_t cb, void *user_data) {
|
|
448
|
+
#ifdef _MSC_VER
|
|
449
|
+
EnterCriticalSection(&proxy->lock);
|
|
450
|
+
proxy->cb = cb;
|
|
451
|
+
proxy->user_data = user_data;
|
|
452
|
+
proxy->request_done = 0;
|
|
453
|
+
proxy->has_request = 1;
|
|
454
|
+
WakeConditionVariable(&proxy->request_cond);
|
|
455
|
+
LeaveCriticalSection(&proxy->lock);
|
|
456
|
+
|
|
457
|
+
EnterCriticalSection(&proxy->lock);
|
|
458
|
+
while (!proxy->request_done) {
|
|
459
|
+
SleepConditionVariableCS(&proxy->request_done_cond, &proxy->lock, INFINITE);
|
|
460
|
+
}
|
|
461
|
+
LeaveCriticalSection(&proxy->lock);
|
|
462
|
+
#else
|
|
463
|
+
pthread_mutex_lock(&proxy->lock);
|
|
464
|
+
proxy->cb = cb;
|
|
465
|
+
proxy->user_data = user_data;
|
|
466
|
+
proxy->request_done = 0;
|
|
467
|
+
proxy->has_request = 1;
|
|
468
|
+
pthread_cond_signal(&proxy->request_cond);
|
|
469
|
+
pthread_mutex_unlock(&proxy->lock);
|
|
470
|
+
|
|
471
|
+
pthread_mutex_lock(&proxy->lock);
|
|
472
|
+
while (!proxy->request_done) {
|
|
473
|
+
pthread_cond_wait(&proxy->request_done_cond, &proxy->lock);
|
|
474
|
+
}
|
|
475
|
+
pthread_mutex_unlock(&proxy->lock);
|
|
476
|
+
#endif
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/* Blocks until the proxy thread has fully exited. Runs without the GVL. */
|
|
480
|
+
static void *proxy_join_func(void *data) {
|
|
481
|
+
struct worker_proxy *proxy = (struct worker_proxy *)data;
|
|
482
|
+
|
|
483
|
+
#ifdef _MSC_VER
|
|
484
|
+
EnterCriticalSection(&proxy->lock);
|
|
485
|
+
while (!proxy->thread_exited) {
|
|
486
|
+
SleepConditionVariableCS(&proxy->thread_exit_cond, &proxy->lock, INFINITE);
|
|
487
|
+
}
|
|
488
|
+
LeaveCriticalSection(&proxy->lock);
|
|
489
|
+
#else
|
|
490
|
+
pthread_mutex_lock(&proxy->lock);
|
|
491
|
+
while (!proxy->thread_exited) {
|
|
492
|
+
pthread_cond_wait(&proxy->thread_exit_cond, &proxy->lock);
|
|
493
|
+
}
|
|
494
|
+
pthread_mutex_unlock(&proxy->lock);
|
|
495
|
+
#endif
|
|
496
|
+
|
|
497
|
+
return NULL;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
void rbduckdb_worker_proxy_destroy(void *data) {
|
|
501
|
+
struct worker_proxy *proxy = (struct worker_proxy *)data;
|
|
502
|
+
if (proxy == NULL) return;
|
|
503
|
+
|
|
504
|
+
/* Ask the proxy thread to stop. */
|
|
505
|
+
#ifdef _MSC_VER
|
|
506
|
+
EnterCriticalSection(&proxy->lock);
|
|
507
|
+
proxy->stop_requested = 1;
|
|
508
|
+
WakeConditionVariable(&proxy->request_cond);
|
|
509
|
+
LeaveCriticalSection(&proxy->lock);
|
|
510
|
+
#else
|
|
511
|
+
pthread_mutex_lock(&proxy->lock);
|
|
512
|
+
proxy->stop_requested = 1;
|
|
513
|
+
pthread_cond_signal(&proxy->request_cond);
|
|
514
|
+
pthread_mutex_unlock(&proxy->lock);
|
|
515
|
+
#endif
|
|
516
|
+
|
|
517
|
+
/*
|
|
518
|
+
* Wait until the proxy thread has fully exited. Before exiting it runs Ruby
|
|
519
|
+
* code (removing itself from the GC-protection array), which needs the GVL.
|
|
520
|
+
* DuckDB may invoke this destructor either from a worker thread (no GVL) or
|
|
521
|
+
* — depending on when it tears down the local state — from a Ruby thread
|
|
522
|
+
* that holds the GVL. In the latter case we must release the GVL while
|
|
523
|
+
* waiting, or the proxy thread could never acquire it and we would deadlock.
|
|
524
|
+
*/
|
|
525
|
+
if (ruby_native_thread_p() && ruby_thread_has_gvl_p()) {
|
|
526
|
+
rb_thread_call_without_gvl(proxy_join_func, proxy, NULL, NULL);
|
|
527
|
+
} else {
|
|
528
|
+
proxy_join_func(proxy);
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/* The proxy thread is gone; tear down OS primitives and free the struct. */
|
|
532
|
+
#ifdef _MSC_VER
|
|
533
|
+
DeleteCriticalSection(&proxy->lock);
|
|
534
|
+
#else
|
|
535
|
+
pthread_cond_destroy(&proxy->thread_exit_cond);
|
|
536
|
+
pthread_cond_destroy(&proxy->request_done_cond);
|
|
537
|
+
pthread_cond_destroy(&proxy->request_cond);
|
|
538
|
+
pthread_mutex_destroy(&proxy->lock);
|
|
539
|
+
#endif
|
|
540
|
+
|
|
541
|
+
free(proxy);
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
void rbduckdb_function_executor_dispatch_via_proxy(rbduckdb_function_callback_t cb, void *user_data, struct worker_proxy *proxy) {
|
|
246
545
|
if (ruby_native_thread_p()) {
|
|
247
546
|
if (ruby_thread_has_gvl_p()) {
|
|
248
547
|
/* Case 1: Ruby thread with GVL - call directly */
|
|
@@ -254,8 +553,15 @@ void rbduckdb_function_executor_dispatch(rbduckdb_function_callback_t cb, void *
|
|
|
254
553
|
arg.user_data = user_data;
|
|
255
554
|
rb_thread_call_with_gvl(callback_with_gvl, &arg);
|
|
256
555
|
}
|
|
556
|
+
} else if (proxy != NULL) {
|
|
557
|
+
/* Case 3a: Non-Ruby thread with a per-worker proxy */
|
|
558
|
+
dispatch_callback_to_proxy(proxy, cb, user_data);
|
|
257
559
|
} else {
|
|
258
|
-
/* Case
|
|
560
|
+
/* Case 3b: Non-Ruby thread - dispatch to the global executor */
|
|
259
561
|
dispatch_callback_to_executor(cb, user_data);
|
|
260
562
|
}
|
|
261
563
|
}
|
|
564
|
+
|
|
565
|
+
void rbduckdb_function_executor_dispatch(rbduckdb_function_callback_t cb, void *user_data) {
|
|
566
|
+
rbduckdb_function_executor_dispatch_via_proxy(cb, user_data, NULL);
|
|
567
|
+
}
|
|
@@ -43,4 +43,48 @@ void rbduckdb_function_executor_ensure_started(void);
|
|
|
43
43
|
*/
|
|
44
44
|
void rbduckdb_function_executor_dispatch(rbduckdb_function_callback_t cb, void *user_data);
|
|
45
45
|
|
|
46
|
+
/*
|
|
47
|
+
* ============================================================================
|
|
48
|
+
* Per-worker proxy threads (DuckDB >= 1.5.0)
|
|
49
|
+
* ============================================================================
|
|
50
|
+
*
|
|
51
|
+
* The global executor above serializes every non-Ruby-thread callback through
|
|
52
|
+
* a single Ruby thread. A per-worker proxy instead gives each DuckDB worker
|
|
53
|
+
* thread its own dedicated Ruby thread, so callbacks from different workers can
|
|
54
|
+
* run concurrently — they compete for the GVL in round-robin fashion, which
|
|
55
|
+
* helps when callbacks release the GVL (e.g. on I/O).
|
|
56
|
+
*
|
|
57
|
+
* Proxies are created lazily from DuckDB's per-worker init hook and stored in
|
|
58
|
+
* DuckDB's thread-local state; the global executor remains the fallback.
|
|
59
|
+
*/
|
|
60
|
+
|
|
61
|
+
/* Opaque per-worker proxy handle. */
|
|
62
|
+
struct worker_proxy;
|
|
63
|
+
|
|
64
|
+
/*
|
|
65
|
+
* Create a per-worker proxy thread. Must be called with the GVL held
|
|
66
|
+
* (typically by dispatching this through the global executor from a per-worker
|
|
67
|
+
* init callback, which itself runs on a non-Ruby thread).
|
|
68
|
+
*
|
|
69
|
+
* May raise (NoMemError, Thread.new failure). The executor runs callbacks
|
|
70
|
+
* unprotected, so a wrapper dispatched to it must rb_protect this call —
|
|
71
|
+
* otherwise a raise longjmps past the executor's done-signaling and the
|
|
72
|
+
* waiting DuckDB worker blocks forever.
|
|
73
|
+
*/
|
|
74
|
+
struct worker_proxy *rbduckdb_worker_proxy_create(void);
|
|
75
|
+
|
|
76
|
+
/*
|
|
77
|
+
* Destroy a per-worker proxy. The signature matches duckdb_delete_callback_t so
|
|
78
|
+
* it can be handed directly to DuckDB. Safe to call from a non-Ruby thread: it
|
|
79
|
+
* touches only OS primitives and frees memory allocated with calloc.
|
|
80
|
+
*/
|
|
81
|
+
void rbduckdb_worker_proxy_destroy(void *proxy);
|
|
82
|
+
|
|
83
|
+
/*
|
|
84
|
+
* Like rbduckdb_function_executor_dispatch, but on the non-Ruby-thread path
|
|
85
|
+
* (Case 3) it routes through the given per-worker proxy when non-NULL, falling
|
|
86
|
+
* back to the global executor when NULL. Cases 1 and 2 are unchanged.
|
|
87
|
+
*/
|
|
88
|
+
void rbduckdb_function_executor_dispatch_via_proxy(rbduckdb_function_callback_t cb, void *user_data, struct worker_proxy *proxy);
|
|
89
|
+
|
|
46
90
|
#endif
|
|
@@ -34,9 +34,11 @@ static VALUE prepared_statement__bind_uint64(VALUE self, VALUE vidx, VALUE val);
|
|
|
34
34
|
static VALUE prepared_statement__bind_date(VALUE self, VALUE vidx, VALUE year, VALUE month, VALUE day);
|
|
35
35
|
static VALUE prepared_statement__bind_time(VALUE self, VALUE vidx, VALUE hour, VALUE min, VALUE sec, VALUE micros);
|
|
36
36
|
static VALUE prepared_statement__bind_timestamp(VALUE self, VALUE vidx, VALUE year, VALUE month, VALUE day, VALUE hour, VALUE min, VALUE sec, VALUE micros);
|
|
37
|
+
static VALUE prepared_statement__bind_timestamp_tz(VALUE self, VALUE vidx, VALUE year, VALUE month, VALUE day, VALUE hour, VALUE min, VALUE sec, VALUE micros);
|
|
37
38
|
static VALUE prepared_statement__bind_interval(VALUE self, VALUE vidx, VALUE months, VALUE days, VALUE micros);
|
|
38
39
|
static VALUE prepared_statement__bind_hugeint(VALUE self, VALUE vidx, VALUE lower, VALUE upper);
|
|
39
40
|
static VALUE prepared_statement__bind_uhugeint(VALUE self, VALUE vidx, VALUE lower, VALUE upper);
|
|
41
|
+
static VALUE prepared_statement__bind_uuid(VALUE self, VALUE vidx, VALUE val);
|
|
40
42
|
static VALUE prepared_statement__bind_decimal(VALUE self, VALUE vidx, VALUE lower, VALUE upper, VALUE width, VALUE scale);
|
|
41
43
|
static VALUE prepared_statement__bind_value(VALUE self, VALUE vidx, VALUE val);
|
|
42
44
|
|
|
@@ -471,6 +473,21 @@ static VALUE prepared_statement__bind_timestamp(VALUE self, VALUE vidx, VALUE ye
|
|
|
471
473
|
return self;
|
|
472
474
|
}
|
|
473
475
|
|
|
476
|
+
/* :nodoc: */
|
|
477
|
+
static VALUE prepared_statement__bind_timestamp_tz(VALUE self, VALUE vidx, VALUE year, VALUE month, VALUE day, VALUE hour, VALUE min, VALUE sec, VALUE micros) {
|
|
478
|
+
duckdb_timestamp timestamp_tz;
|
|
479
|
+
rubyDuckDBPreparedStatement *ctx;
|
|
480
|
+
idx_t idx = check_index(vidx);
|
|
481
|
+
|
|
482
|
+
timestamp_tz = rbduckdb_to_duckdb_timestamp_from_value(year, month, day, hour, min, sec, micros);
|
|
483
|
+
TypedData_Get_Struct(self, rubyDuckDBPreparedStatement, &prepared_statement_data_type, ctx);
|
|
484
|
+
|
|
485
|
+
if (duckdb_bind_timestamp_tz(ctx->prepared_statement, idx, timestamp_tz) == DuckDBError) {
|
|
486
|
+
rb_raise(eDuckDBError, "fail to bind %llu parameter", (unsigned long long)idx);
|
|
487
|
+
}
|
|
488
|
+
return self;
|
|
489
|
+
}
|
|
490
|
+
|
|
474
491
|
/* :nodoc: */
|
|
475
492
|
static VALUE prepared_statement__bind_interval(VALUE self, VALUE vidx, VALUE months, VALUE days, VALUE micros) {
|
|
476
493
|
duckdb_interval interval;
|
|
@@ -542,6 +559,25 @@ static VALUE prepared_statement__bind_decimal(VALUE self, VALUE vidx, VALUE lowe
|
|
|
542
559
|
return self;
|
|
543
560
|
}
|
|
544
561
|
|
|
562
|
+
/* :nodoc: */
|
|
563
|
+
static VALUE prepared_statement__bind_uuid(VALUE self, VALUE vidx, VALUE val) {
|
|
564
|
+
rubyDuckDBPreparedStatement *ctx;
|
|
565
|
+
duckdb_uhugeint uhugeint;
|
|
566
|
+
duckdb_value uuid_val;
|
|
567
|
+
duckdb_state state;
|
|
568
|
+
idx_t idx = check_index(vidx);
|
|
569
|
+
|
|
570
|
+
TypedData_Get_Struct(self, rubyDuckDBPreparedStatement, &prepared_statement_data_type, ctx);
|
|
571
|
+
rbduckdb_uuid_str_to_uhugeint(val, &uhugeint);
|
|
572
|
+
uuid_val = duckdb_create_uuid(uhugeint);
|
|
573
|
+
state = duckdb_bind_value(ctx->prepared_statement, idx, uuid_val);
|
|
574
|
+
duckdb_destroy_value(&uuid_val);
|
|
575
|
+
if (state == DuckDBError) {
|
|
576
|
+
rb_raise(eDuckDBError, "fail to bind %llu parameter", (unsigned long long)idx);
|
|
577
|
+
}
|
|
578
|
+
return self;
|
|
579
|
+
}
|
|
580
|
+
|
|
545
581
|
/* :nodoc: */
|
|
546
582
|
static VALUE prepared_statement__bind_value(VALUE self, VALUE vidx, VALUE val) {
|
|
547
583
|
rubyDuckDBPreparedStatement *ctx;
|
|
@@ -597,9 +633,11 @@ void rbduckdb_init_prepared_statement(void) {
|
|
|
597
633
|
rb_define_private_method(cDuckDBPreparedStatement, "_bind_date", prepared_statement__bind_date, 4);
|
|
598
634
|
rb_define_private_method(cDuckDBPreparedStatement, "_bind_time", prepared_statement__bind_time, 5);
|
|
599
635
|
rb_define_private_method(cDuckDBPreparedStatement, "_bind_timestamp", prepared_statement__bind_timestamp, 8);
|
|
636
|
+
rb_define_private_method(cDuckDBPreparedStatement, "_bind_timestamp_tz", prepared_statement__bind_timestamp_tz, 8);
|
|
600
637
|
rb_define_private_method(cDuckDBPreparedStatement, "_bind_interval", prepared_statement__bind_interval, 4);
|
|
601
638
|
rb_define_private_method(cDuckDBPreparedStatement, "_bind_hugeint", prepared_statement__bind_hugeint, 3);
|
|
602
639
|
rb_define_private_method(cDuckDBPreparedStatement, "_bind_uhugeint", prepared_statement__bind_uhugeint, 3);
|
|
640
|
+
rb_define_private_method(cDuckDBPreparedStatement, "_bind_uuid", prepared_statement__bind_uuid, 2);
|
|
603
641
|
rb_define_private_method(cDuckDBPreparedStatement, "_bind_decimal", prepared_statement__bind_decimal, 5);
|
|
604
642
|
rb_define_private_method(cDuckDBPreparedStatement, "_bind_value", prepared_statement__bind_value, 2);
|
|
605
643
|
}
|
data/ext/duckdb/result.c
CHANGED
|
@@ -16,13 +16,11 @@ static VALUE result_columns(VALUE oDuckDBResult);
|
|
|
16
16
|
static VALUE destroy_data_chunk(VALUE arg);
|
|
17
17
|
|
|
18
18
|
static VALUE result__chunk_stream(VALUE oDuckDBResult);
|
|
19
|
+
static VALUE result_arrow_c_stream(VALUE oDuckDBResult);
|
|
19
20
|
static VALUE yield_rows(VALUE arg);
|
|
20
|
-
static VALUE result__column_type(VALUE oDuckDBResult, VALUE col_idx);
|
|
21
21
|
static VALUE result__return_type(VALUE oDuckDBResult);
|
|
22
22
|
static VALUE result__statement_type(VALUE oDuckDBResult);
|
|
23
23
|
static VALUE result__enum_internal_type(VALUE oDuckDBResult, VALUE col_idx);
|
|
24
|
-
static VALUE result__enum_dictionary_size(VALUE oDuckDBResult, VALUE col_idx);
|
|
25
|
-
static VALUE result__enum_dictionary_value(VALUE oDuckDBResult, VALUE col_idx, VALUE idx);
|
|
26
24
|
|
|
27
25
|
static VALUE vector_date(void *vector_data, idx_t row_idx);
|
|
28
26
|
static VALUE vector_timestamp(void* vector_data, idx_t row_idx);
|
|
@@ -59,15 +57,30 @@ static const rb_data_type_t result_data_type = {
|
|
|
59
57
|
static void deallocate(void *ctx) {
|
|
60
58
|
rubyDuckDBResult *p = (rubyDuckDBResult *)ctx;
|
|
61
59
|
|
|
62
|
-
|
|
63
|
-
xfree(p);
|
|
60
|
+
rbduckdb_result_unref(p);
|
|
64
61
|
}
|
|
65
62
|
|
|
66
63
|
static VALUE allocate(VALUE klass) {
|
|
67
|
-
rubyDuckDBResult *ctx =
|
|
64
|
+
rubyDuckDBResult *ctx = calloc((size_t)1, sizeof(rubyDuckDBResult));
|
|
65
|
+
|
|
66
|
+
if (ctx == NULL) {
|
|
67
|
+
rb_raise(rb_eNoMemError, "failed to allocate DuckDB::Result");
|
|
68
|
+
}
|
|
69
|
+
ctx->refcount = 1;
|
|
68
70
|
return TypedData_Wrap_Struct(klass, &result_data_type, ctx);
|
|
69
71
|
}
|
|
70
72
|
|
|
73
|
+
void rbduckdb_result_ref(rubyDuckDBResult *ctx) {
|
|
74
|
+
RUBY_ATOMIC_FETCH_ADD(ctx->refcount, 1);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void rbduckdb_result_unref(rubyDuckDBResult *ctx) {
|
|
78
|
+
if (RUBY_ATOMIC_FETCH_SUB(ctx->refcount, 1) == 1) {
|
|
79
|
+
duckdb_destroy_result(&(ctx->result));
|
|
80
|
+
free(ctx);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
71
84
|
static size_t memsize(const void *p) {
|
|
72
85
|
return sizeof(rubyDuckDBResult);
|
|
73
86
|
}
|
|
@@ -198,13 +211,6 @@ static VALUE yield_rows(VALUE arg) {
|
|
|
198
211
|
return Qnil;
|
|
199
212
|
}
|
|
200
213
|
|
|
201
|
-
/* :nodoc: */
|
|
202
|
-
static VALUE result__column_type(VALUE oDuckDBResult, VALUE col_idx) {
|
|
203
|
-
rubyDuckDBResult *ctx;
|
|
204
|
-
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
|
205
|
-
return LL2NUM(duckdb_column_type(&(ctx->result), NUM2LL(col_idx)));
|
|
206
|
-
}
|
|
207
|
-
|
|
208
214
|
/* :nodoc: */
|
|
209
215
|
static VALUE result__return_type(VALUE oDuckDBResult) {
|
|
210
216
|
rubyDuckDBResult *ctx;
|
|
@@ -234,46 +240,10 @@ static VALUE result__enum_internal_type(VALUE oDuckDBResult, VALUE col_idx) {
|
|
|
234
240
|
return type;
|
|
235
241
|
}
|
|
236
242
|
|
|
237
|
-
/* :nodoc: */
|
|
238
|
-
static VALUE result__enum_dictionary_size(VALUE oDuckDBResult, VALUE col_idx) {
|
|
239
|
-
rubyDuckDBResult *ctx;
|
|
240
|
-
VALUE size = Qnil;
|
|
241
|
-
duckdb_logical_type logical_type;
|
|
242
|
-
|
|
243
|
-
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
|
244
|
-
logical_type = duckdb_column_logical_type(&(ctx->result), NUM2LL(col_idx));
|
|
245
|
-
if (logical_type) {
|
|
246
|
-
size = UINT2NUM(duckdb_enum_dictionary_size(logical_type));
|
|
247
|
-
}
|
|
248
|
-
duckdb_destroy_logical_type(&logical_type);
|
|
249
|
-
return size;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
/* :nodoc: */
|
|
253
|
-
static VALUE result__enum_dictionary_value(VALUE oDuckDBResult, VALUE col_idx, VALUE idx) {
|
|
254
|
-
rubyDuckDBResult *ctx;
|
|
255
|
-
VALUE value = Qnil;
|
|
256
|
-
duckdb_logical_type logical_type;
|
|
257
|
-
char *p;
|
|
258
|
-
|
|
259
|
-
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
|
260
|
-
logical_type = duckdb_column_logical_type(&(ctx->result), NUM2LL(col_idx));
|
|
261
|
-
if (logical_type) {
|
|
262
|
-
p = duckdb_enum_dictionary_value(logical_type, NUM2LL(idx));
|
|
263
|
-
if (p) {
|
|
264
|
-
value = rb_utf8_str_new_cstr(p);
|
|
265
|
-
duckdb_free(p);
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
duckdb_destroy_logical_type(&logical_type);
|
|
269
|
-
return value;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
243
|
VALUE rbduckdb_create_result(void) {
|
|
273
244
|
return allocate(cDuckDBResult);
|
|
274
245
|
}
|
|
275
246
|
|
|
276
|
-
|
|
277
247
|
static VALUE vector_date(void *vector_data, idx_t row_idx) {
|
|
278
248
|
return rbduckdb_date_to_ruby(((duckdb_date *)vector_data)[row_idx]);
|
|
279
249
|
}
|
|
@@ -286,7 +256,6 @@ static VALUE vector_time(void* vector_data, idx_t row_idx) {
|
|
|
286
256
|
return rbduckdb_time_to_ruby(((duckdb_time *)vector_data)[row_idx]);
|
|
287
257
|
}
|
|
288
258
|
|
|
289
|
-
|
|
290
259
|
static VALUE vector_interval(void* vector_data, idx_t row_idx) {
|
|
291
260
|
return rbduckdb_interval_to_ruby(((duckdb_interval *)vector_data)[row_idx]);
|
|
292
261
|
}
|
|
@@ -694,6 +663,35 @@ static VALUE vector_uuid(void* vector_data, idx_t row_idx) {
|
|
|
694
663
|
return rbduckdb_uuid_to_ruby(((duckdb_hugeint *)vector_data)[row_idx]);
|
|
695
664
|
}
|
|
696
665
|
|
|
666
|
+
/*
|
|
667
|
+
* call-seq:
|
|
668
|
+
* result.arrow_c_stream -> DuckDB::ArrowArrayStream
|
|
669
|
+
*
|
|
670
|
+
* [EXPERIMENTAL] Exports the result as an Arrow C stream
|
|
671
|
+
* (Arrow C Data Interface). The returned stream object satisfies the
|
|
672
|
+
* Ruby Arrow C stream protocol, so it can be consumed directly by
|
|
673
|
+
* ruby-polars, red-arrow and other Arrow consumers:
|
|
674
|
+
*
|
|
675
|
+
* result = con.query('SELECT * FROM users')
|
|
676
|
+
* df = Polars::DataFrame.new(result)
|
|
677
|
+
*
|
|
678
|
+
* The stream consumes the result's chunks; a result can be exported
|
|
679
|
+
* only once. This API is built on DuckDB's unstable Arrow C API and
|
|
680
|
+
* may change in any minor release.
|
|
681
|
+
*/
|
|
682
|
+
static VALUE result_arrow_c_stream(VALUE oDuckDBResult) {
|
|
683
|
+
rubyDuckDBResult *ctx;
|
|
684
|
+
VALUE stream;
|
|
685
|
+
|
|
686
|
+
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
|
687
|
+
if (ctx->arrow_exported) {
|
|
688
|
+
rb_raise(eDuckDBError, "result is already exported as an Arrow stream");
|
|
689
|
+
}
|
|
690
|
+
stream = rbduckdb_create_arrow_array_stream(oDuckDBResult);
|
|
691
|
+
ctx->arrow_exported = true;
|
|
692
|
+
return stream;
|
|
693
|
+
}
|
|
694
|
+
|
|
697
695
|
static VALUE vector_value(duckdb_vector vector, idx_t row_idx) {
|
|
698
696
|
duckdb_logical_type ty;
|
|
699
697
|
VALUE obj = Qnil;
|
|
@@ -718,11 +716,9 @@ void rbduckdb_init_result(void) {
|
|
|
718
716
|
rb_define_method(cDuckDBResult, "rows_changed", result_rows_changed, 0);
|
|
719
717
|
rb_define_method(cDuckDBResult, "columns", result_columns, 0);
|
|
720
718
|
rb_define_private_method(cDuckDBResult, "_chunk_stream", result__chunk_stream, 0);
|
|
721
|
-
|
|
719
|
+
rb_define_method(cDuckDBResult, "arrow_c_stream", result_arrow_c_stream, 0);
|
|
722
720
|
rb_define_private_method(cDuckDBResult, "_return_type", result__return_type, 0);
|
|
723
721
|
rb_define_private_method(cDuckDBResult, "_statement_type", result__statement_type, 0);
|
|
724
722
|
|
|
725
723
|
rb_define_private_method(cDuckDBResult, "_enum_internal_type", result__enum_internal_type, 1);
|
|
726
|
-
rb_define_private_method(cDuckDBResult, "_enum_dictionary_size", result__enum_dictionary_size, 1);
|
|
727
|
-
rb_define_private_method(cDuckDBResult, "_enum_dictionary_value", result__enum_dictionary_value, 2);
|
|
728
724
|
}
|
data/ext/duckdb/result.h
CHANGED
|
@@ -1,13 +1,24 @@
|
|
|
1
1
|
#ifndef RUBY_DUCKDB_RESULT_H
|
|
2
2
|
#define RUBY_DUCKDB_RESULT_H
|
|
3
3
|
|
|
4
|
+
/*
|
|
5
|
+
* Allocated with plain calloc/free and reference-counted: the Ruby Result
|
|
6
|
+
* object holds one reference, and each exported Arrow stream holds another,
|
|
7
|
+
* so the duckdb_result stays valid for consumers that outlive the Ruby
|
|
8
|
+
* objects. rbduckdb_result_unref() must not call any Ruby API: it runs from
|
|
9
|
+
* GC sweep (deallocate) and from Arrow stream release callbacks.
|
|
10
|
+
*/
|
|
4
11
|
struct _rubyDuckDBResult {
|
|
5
12
|
duckdb_result result;
|
|
13
|
+
bool arrow_exported;
|
|
14
|
+
rb_atomic_t refcount;
|
|
6
15
|
};
|
|
7
16
|
|
|
8
17
|
typedef struct _rubyDuckDBResult rubyDuckDBResult;
|
|
9
18
|
|
|
10
19
|
rubyDuckDBResult *rbduckdb_get_struct_result(VALUE obj);
|
|
20
|
+
void rbduckdb_result_ref(rubyDuckDBResult *ctx);
|
|
21
|
+
void rbduckdb_result_unref(rubyDuckDBResult *ctx);
|
|
11
22
|
void rbduckdb_init_result(void);
|
|
12
23
|
VALUE rbduckdb_create_result(void);
|
|
13
24
|
VALUE rbduckdb_vector_value_at(duckdb_vector vector, duckdb_logical_type element_type, idx_t index);
|
data/ext/duckdb/ruby-duckdb.h
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
#include "ruby.h"
|
|
8
8
|
#include "ruby/thread.h"
|
|
9
|
+
#include "ruby/atomic.h"
|
|
9
10
|
#include <duckdb.h>
|
|
10
11
|
|
|
11
12
|
#ifdef HAVE_DUCKDB_UNSAFE_VECTOR_ASSIGN_STRING_ELEMENT_LEN
|
|
@@ -38,6 +39,7 @@
|
|
|
38
39
|
#include "./scalar_function.h"
|
|
39
40
|
#include "./scalar_function_set.h"
|
|
40
41
|
#include "./aggregate_function.h"
|
|
42
|
+
#include "./aggregate_function_set.h"
|
|
41
43
|
#include "./expression.h"
|
|
42
44
|
#include "./client_context.h"
|
|
43
45
|
#include "./scalar_function_bind_info.h"
|
|
@@ -49,6 +51,8 @@
|
|
|
49
51
|
#include "./memory_helper.h"
|
|
50
52
|
#include "./table_function.h"
|
|
51
53
|
#include "./table_description.h"
|
|
54
|
+
#include "./arrow_array_stream.h"
|
|
55
|
+
#include "./arrow_import.h"
|
|
52
56
|
|
|
53
57
|
extern VALUE mDuckDB;
|
|
54
58
|
extern VALUE cDuckDBDatabase;
|