duckdb 1.5.1.0 → 1.5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +50 -0
  3. data/README.md +18 -0
  4. data/ext/duckdb/aggregate_function.c +791 -0
  5. data/ext/duckdb/aggregate_function.h +18 -0
  6. data/ext/duckdb/appender.c +27 -0
  7. data/ext/duckdb/connection.c +69 -0
  8. data/ext/duckdb/converter.h +9 -1
  9. data/ext/duckdb/conveter.c +173 -2
  10. data/ext/duckdb/data_chunk.c +69 -0
  11. data/ext/duckdb/data_chunk.h +1 -0
  12. data/ext/duckdb/database.c +5 -49
  13. data/ext/duckdb/duckdb.c +6 -1
  14. data/ext/duckdb/extconf.rb +0 -2
  15. data/ext/duckdb/extracted_statements.c +4 -4
  16. data/ext/duckdb/function_executor.c +261 -0
  17. data/ext/duckdb/function_executor.h +46 -0
  18. data/ext/duckdb/function_vector.c +207 -0
  19. data/ext/duckdb/function_vector.h +27 -0
  20. data/ext/duckdb/logical_type.c +147 -0
  21. data/ext/duckdb/memory_helper.c +2 -10
  22. data/ext/duckdb/prepared_statement.c +17 -0
  23. data/ext/duckdb/result.c +12 -21
  24. data/ext/duckdb/ruby-duckdb.h +9 -3
  25. data/ext/duckdb/scalar_function.c +53 -404
  26. data/ext/duckdb/scalar_function_set.c +86 -0
  27. data/ext/duckdb/scalar_function_set.h +14 -0
  28. data/ext/duckdb/table_description.c +144 -0
  29. data/ext/duckdb/table_description.h +16 -0
  30. data/ext/duckdb/table_function.c +78 -41
  31. data/ext/duckdb/util.c +16 -0
  32. data/ext/duckdb/util.h +1 -0
  33. data/ext/duckdb/value.c +279 -0
  34. data/ext/duckdb/value.h +15 -0
  35. data/lib/duckdb/aggregate_function.rb +47 -0
  36. data/lib/duckdb/appender.rb +32 -7
  37. data/lib/duckdb/column_description.rb +32 -0
  38. data/lib/duckdb/connection.rb +77 -0
  39. data/lib/duckdb/converter.rb +36 -16
  40. data/lib/duckdb/data_chunk.rb +46 -31
  41. data/lib/duckdb/database.rb +44 -15
  42. data/lib/duckdb/duckdb_native.so +0 -0
  43. data/lib/duckdb/extracted_statements.rb +2 -2
  44. data/lib/duckdb/function_type_validation.rb +50 -0
  45. data/lib/duckdb/interval.rb +15 -9
  46. data/lib/duckdb/logical_type.rb +66 -0
  47. data/lib/duckdb/prepared_statement.rb +18 -4
  48. data/lib/duckdb/scalar_function.rb +17 -51
  49. data/lib/duckdb/scalar_function_set.rb +31 -0
  50. data/lib/duckdb/table_description.rb +79 -0
  51. data/lib/duckdb/value.rb +248 -0
  52. data/lib/duckdb/version.rb +1 -1
  53. data/lib/duckdb.rb +6 -0
  54. metadata +25 -32
  55. data/.gitattributes +0 -1
  56. data/.github/FUNDING.yml +0 -3
  57. data/.github/copilot-instructions.md +0 -169
  58. data/.github/workflows/linter.yml +0 -30
  59. data/.github/workflows/make_documents.yml +0 -34
  60. data/.github/workflows/test_on_macos.yml +0 -103
  61. data/.github/workflows/test_on_ubuntu.yml +0 -108
  62. data/.github/workflows/test_on_windows.yml +0 -71
  63. data/.gitignore +0 -19
  64. data/.rubocop.yml +0 -34
  65. data/CONTRIBUTION.md +0 -45
  66. data/Dockerfile +0 -22
  67. data/Gemfile +0 -16
  68. data/Gemfile.lock +0 -77
  69. data/benchmark/converter_hugeint_ips.rb +0 -27
  70. data/benchmark/get_converter_module_ips.rb +0 -26
  71. data/benchmark/to_intern_ips.rb +0 -72
  72. data/docker-compose.yml +0 -11
  73. data/duckdb.gemspec +0 -33
  74. data/ext/duckdb/value_impl.c +0 -126
  75. data/ext/duckdb/value_impl.h +0 -15
  76. data/getduckdb.sh +0 -18
  77. data/sample/async_query.rb +0 -25
  78. data/sample/issue922.rb +0 -54
  79. data/sample/issue922_benchmark.rb +0 -169
  80. data/sample/issue930.rb +0 -49
  81. data/sample/issue930_benchmark.rb +0 -70
@@ -0,0 +1,791 @@
1
+ #include "ruby-duckdb.h"
2
+
3
+ VALUE cDuckDBAggregateFunction;
4
+
5
+ /*
6
+ * Global Ruby Hash used to keep aggregate state Ruby VALUEs alive during
7
+ * aggregation. Keys are monotonic state IDs (see state_registry_key) that
8
+ * survive DuckDB's internal memcpy of state buffers. Values are the Ruby
9
+ * VALUE returned from the user's init_proc and later passed to
10
+ * finalize_proc.
11
+ *
12
+ * Protected from GC via rb_gc_register_mark_object on init.
13
+ */
14
+ static VALUE g_aggregate_state_registry;
15
+
16
+ /*
17
+ * Monotonic counter for aggregate state IDs. Each state_init_callback
18
+ * assigns the next ID; because DuckDB memcpy's state buffers internally
19
+ * (e.g. from a temporary allocation into the hash-table row layout), the
20
+ * embedded ID is the only reliable way to match a state across init /
21
+ * combine / finalize / destroy calls.
22
+ */
23
+ static unsigned long long g_next_state_id = 0;
24
+
25
+ typedef struct {
26
+ unsigned long long state_id;
27
+ VALUE ruby_state;
28
+ } ruby_aggregate_state;
29
+
30
+ static void mark(void *);
31
+ static void deallocate(void *);
32
+ static VALUE allocate(VALUE klass);
33
+ static size_t memsize(const void *p);
34
+ static void compact(void *);
35
+ static VALUE duckdb_aggregate_function_initialize(VALUE self);
36
+ static VALUE rbduckdb_aggregate_function_set_name(VALUE self, VALUE name);
37
+ static VALUE rbduckdb_aggregate_function__set_return_type(VALUE self, VALUE logical_type);
38
+ static VALUE rbduckdb_aggregate_function_add_parameter(VALUE self, VALUE logical_type);
39
+ static VALUE rbduckdb_aggregate_function_set_init(VALUE self);
40
+ static VALUE rbduckdb_aggregate_function_set_update(VALUE self);
41
+ static VALUE rbduckdb_aggregate_function_set_combine(VALUE self);
42
+ static VALUE rbduckdb_aggregate_function_set_finalize(VALUE self);
43
+ static VALUE rbduckdb_aggregate_function__set_special_handling(VALUE self);
44
+
45
+ static const rb_data_type_t aggregate_function_data_type = {
46
+ "DuckDB/AggregateFunction",
47
+ {mark, deallocate, memsize, compact},
48
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
49
+ };
50
+
51
+ static void mark(void *ctx) {
52
+ rubyDuckDBAggregateFunction *p = (rubyDuckDBAggregateFunction *)ctx;
53
+ rb_gc_mark_movable(p->init_proc);
54
+ rb_gc_mark_movable(p->update_proc);
55
+ rb_gc_mark_movable(p->combine_proc);
56
+ rb_gc_mark_movable(p->finalize_proc);
57
+ }
58
+
59
+ static void deallocate(void *ctx) {
60
+ rubyDuckDBAggregateFunction *p = (rubyDuckDBAggregateFunction *)ctx;
61
+ duckdb_destroy_aggregate_function(&(p->aggregate_function));
62
+ xfree(p);
63
+ }
64
+
65
+ static void compact(void *ctx) {
66
+ rubyDuckDBAggregateFunction *p = (rubyDuckDBAggregateFunction *)ctx;
67
+ if (p->init_proc != Qnil) {
68
+ p->init_proc = rb_gc_location(p->init_proc);
69
+ }
70
+ if (p->update_proc != Qnil) {
71
+ p->update_proc = rb_gc_location(p->update_proc);
72
+ }
73
+ if (p->combine_proc != Qnil) {
74
+ p->combine_proc = rb_gc_location(p->combine_proc);
75
+ }
76
+ if (p->finalize_proc != Qnil) {
77
+ p->finalize_proc = rb_gc_location(p->finalize_proc);
78
+ }
79
+ }
80
+
81
+ static VALUE allocate(VALUE klass) {
82
+ rubyDuckDBAggregateFunction *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBAggregateFunction));
83
+ return TypedData_Wrap_Struct(klass, &aggregate_function_data_type, ctx);
84
+ }
85
+
86
+ static size_t memsize(const void *p) {
87
+ return sizeof(rubyDuckDBAggregateFunction);
88
+ }
89
+
90
+ rubyDuckDBAggregateFunction *get_struct_aggregate_function(VALUE obj) {
91
+ rubyDuckDBAggregateFunction *ctx;
92
+ TypedData_Get_Struct(obj, rubyDuckDBAggregateFunction, &aggregate_function_data_type, ctx);
93
+ return ctx;
94
+ }
95
+
96
+ static VALUE duckdb_aggregate_function_initialize(VALUE self) {
97
+ rubyDuckDBAggregateFunction *p;
98
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
99
+ p->aggregate_function = duckdb_create_aggregate_function();
100
+ p->init_proc = Qnil;
101
+ p->update_proc = Qnil;
102
+ p->combine_proc = Qnil;
103
+ p->finalize_proc = Qnil;
104
+ p->special_handling = false;
105
+ return self;
106
+ }
107
+
108
+ static VALUE rbduckdb_aggregate_function_set_name(VALUE self, VALUE name) {
109
+ rubyDuckDBAggregateFunction *p;
110
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
111
+
112
+ const char *str = StringValuePtr(name);
113
+ duckdb_aggregate_function_set_name(p->aggregate_function, str);
114
+
115
+ return self;
116
+ }
117
+
118
+ static VALUE rbduckdb_aggregate_function__set_return_type(VALUE self, VALUE logical_type) {
119
+ rubyDuckDBAggregateFunction *p;
120
+ rubyDuckDBLogicalType *lt;
121
+
122
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
123
+ lt = get_struct_logical_type(logical_type);
124
+
125
+ duckdb_aggregate_function_set_return_type(p->aggregate_function, lt->logical_type);
126
+
127
+ return self;
128
+ }
129
+
130
+ static VALUE rbduckdb_aggregate_function_add_parameter(VALUE self, VALUE logical_type) {
131
+ rubyDuckDBAggregateFunction *p;
132
+ rubyDuckDBLogicalType *lt;
133
+
134
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
135
+ lt = get_struct_logical_type(logical_type);
136
+
137
+ duckdb_aggregate_function_add_parameter(p->aggregate_function, lt->logical_type);
138
+
139
+ return self;
140
+ }
141
+
142
+ /*
143
+ * Build a Ruby Hash key from the state's embedded ID.
144
+ * Used for the g_aggregate_state_registry GC root.
145
+ */
146
+ static inline VALUE state_registry_key(ruby_aggregate_state *state) {
147
+ return ULL2NUM(state->state_id);
148
+ }
149
+
150
+ /*
151
+ * Store (or update) a Ruby VALUE in the global state registry so that
152
+ * it stays reachable by the GC for the lifetime of the aggregate state.
153
+ */
154
+ static inline void state_registry_store(ruby_aggregate_state *state, VALUE value) {
155
+ rb_hash_aset(g_aggregate_state_registry, state_registry_key(state), value);
156
+ }
157
+
158
+ /*
159
+ * Remove a state entry from the registry. Safe to call even if the
160
+ * entry was already removed (rb_hash_delete is a no-op for missing keys).
161
+ */
162
+ static inline void state_registry_remove(ruby_aggregate_state *state) {
163
+ rb_hash_delete(g_aggregate_state_registry, state_registry_key(state));
164
+ }
165
+
166
+ /*
167
+ * Report a pending Ruby exception to DuckDB via
168
+ * duckdb_aggregate_function_set_error and clear it from errinfo.
169
+ * Caller must only invoke this when rb_protect reported exception_state != 0.
170
+ */
171
+ static void report_ruby_error_to_duckdb(duckdb_function_info info) {
172
+ VALUE errinfo = rb_errinfo();
173
+ if (errinfo != Qnil) {
174
+ VALUE msg = rb_funcall(errinfo, rb_intern("message"), 0);
175
+ duckdb_aggregate_function_set_error(info, StringValueCStr(msg));
176
+ }
177
+ rb_set_errinfo(Qnil);
178
+ }
179
+
180
+ /* state_size callback: constant buffer per state. */
181
+ static idx_t state_size_callback(duckdb_function_info info) {
182
+ (void)info;
183
+ return sizeof(ruby_aggregate_state);
184
+ }
185
+
186
+ /* init callback dispatch argument */
187
+ struct init_callback_arg {
188
+ rubyDuckDBAggregateFunction *ctx;
189
+ duckdb_function_info info;
190
+ duckdb_aggregate_state state_p;
191
+ };
192
+
193
+ static VALUE call_init_proc(VALUE varg) {
194
+ struct init_callback_arg *arg = (struct init_callback_arg *)varg;
195
+ return rb_funcall(arg->ctx->init_proc, rb_intern("call"), 0);
196
+ }
197
+
198
+ static void execute_init_callback_protected(void *user_data) {
199
+ struct init_callback_arg *arg = (struct init_callback_arg *)user_data;
200
+ ruby_aggregate_state *state = (ruby_aggregate_state *)arg->state_p;
201
+ int exception_state;
202
+ VALUE result;
203
+
204
+ /* Initialise buffer to a safe value before calling Ruby. */
205
+ state->ruby_state = Qnil;
206
+ state->state_id = ++g_next_state_id;
207
+
208
+ result = rb_protect(call_init_proc, (VALUE)arg, &exception_state);
209
+ if (exception_state) {
210
+ report_ruby_error_to_duckdb(arg->info);
211
+ return;
212
+ }
213
+
214
+ state->ruby_state = result;
215
+ state_registry_store(state, result);
216
+ }
217
+
218
+ static void state_init_callback(duckdb_function_info info, duckdb_aggregate_state state_p) {
219
+ rubyDuckDBAggregateFunction *ctx;
220
+ struct init_callback_arg arg;
221
+
222
+ ctx = (rubyDuckDBAggregateFunction *)duckdb_aggregate_function_get_extra_info(info);
223
+ if (ctx == NULL || ctx->init_proc == Qnil) {
224
+ /* Defensive: maybe_set_functions only wires callbacks when init_proc
225
+ * is set, so this branch should be unreachable in practice. Zero the
226
+ * buffer anyway to keep the Ruby state slot well-defined. */
227
+ ruby_aggregate_state *state = (ruby_aggregate_state *)state_p;
228
+ state->ruby_state = Qnil;
229
+ state->state_id = 0;
230
+ return;
231
+ }
232
+
233
+ arg.ctx = ctx;
234
+ arg.info = info;
235
+ arg.state_p = state_p;
236
+
237
+ rbduckdb_function_executor_dispatch(execute_init_callback_protected, &arg);
238
+ }
239
+
240
+ /* No-op update: used when no update_proc has been supplied. */
241
+ static void noop_update_callback(duckdb_function_info info,
242
+ duckdb_data_chunk input,
243
+ duckdb_aggregate_state *states) {
244
+ (void)info;
245
+ (void)input;
246
+ (void)states;
247
+ }
248
+
249
+ /* update callback dispatch argument */
250
+ struct update_callback_arg {
251
+ rubyDuckDBAggregateFunction *ctx;
252
+ duckdb_function_info info;
253
+ duckdb_data_chunk input;
254
+ duckdb_aggregate_state *states;
255
+ duckdb_vector *input_vectors;
256
+ duckdb_logical_type *input_types;
257
+ VALUE *args;
258
+ idx_t row_count;
259
+ idx_t col_count;
260
+ };
261
+
262
+ struct update_one_arg {
263
+ VALUE update_proc;
264
+ int argc;
265
+ VALUE *argv;
266
+ };
267
+
268
+ static VALUE call_update_proc(VALUE varg) {
269
+ struct update_one_arg *arg = (struct update_one_arg *)varg;
270
+ return rb_funcallv(arg->update_proc, rb_intern("call"), arg->argc, arg->argv);
271
+ }
272
+
273
+ /*
274
+ * Body of the update callback: allocate input buffers, walk each row,
275
+ * dispatch to the user's update_proc. Runs inside rb_ensure so that
276
+ * update_cleanup_callback always runs — even if rbduckdb_vector_value_at
277
+ * or the Ruby proc call raises, allocated buffers and logical types are
278
+ * released on the unwind path.
279
+ *
280
+ * Ruby exceptions raised by the user's proc are caught inline via
281
+ * rb_protect and reported to DuckDB as scalar errors; other Ruby
282
+ * exceptions (e.g. from vector_value_at) propagate and are cleaned up
283
+ * by rb_ensure.
284
+ */
285
+ static VALUE update_process_rows(VALUE varg) {
286
+ struct update_callback_arg *arg = (struct update_callback_arg *)varg;
287
+ ruby_aggregate_state **states = (ruby_aggregate_state **)arg->states;
288
+ idx_t i, j;
289
+
290
+ arg->input_vectors = ALLOC_N(duckdb_vector, arg->col_count);
291
+ arg->input_types = ALLOC_N(duckdb_logical_type, arg->col_count);
292
+ arg->args = ALLOC_N(VALUE, arg->col_count + 1);
293
+
294
+ for (j = 0; j < arg->col_count; j++) {
295
+ arg->input_vectors[j] = duckdb_data_chunk_get_vector(arg->input, j);
296
+ arg->input_types[j] = duckdb_vector_get_column_type(arg->input_vectors[j]);
297
+ }
298
+
299
+ for (i = 0; i < arg->row_count; i++) {
300
+ ruby_aggregate_state *state = states[i];
301
+ struct update_one_arg one;
302
+ int exception_state;
303
+ VALUE ret;
304
+
305
+ /*
306
+ * Without set_special_handling, DuckDB's default behaviour is to
307
+ * skip rows where any input value is NULL. Check the validity mask
308
+ * of every input column and skip the row if any value is invalid.
309
+ * When special_handling is enabled the callback receives all rows,
310
+ * including those with NULL inputs.
311
+ */
312
+ if (!arg->ctx->special_handling) {
313
+ int has_null = 0;
314
+ for (j = 0; j < arg->col_count; j++) {
315
+ uint64_t *validity = duckdb_vector_get_validity(arg->input_vectors[j]);
316
+ if (validity && !duckdb_validity_row_is_valid(validity, i)) {
317
+ has_null = 1;
318
+ break;
319
+ }
320
+ }
321
+ if (has_null) {
322
+ continue;
323
+ }
324
+ }
325
+
326
+ arg->args[0] = state->ruby_state;
327
+ for (j = 0; j < arg->col_count; j++) {
328
+ arg->args[j + 1] = rbduckdb_vector_value_at(arg->input_vectors[j], arg->input_types[j], i);
329
+ }
330
+
331
+ one.update_proc = arg->ctx->update_proc;
332
+ one.argc = (int)(arg->col_count + 1);
333
+ one.argv = arg->args;
334
+
335
+ ret = rb_protect(call_update_proc, (VALUE)&one, &exception_state);
336
+ if (exception_state) {
337
+ report_ruby_error_to_duckdb(arg->info);
338
+ /*
339
+ * DuckDB does not call the destroy callback on the update error
340
+ * path, so we must remove reachable states from the registry
341
+ * ourselves to avoid leaking Ruby VALUEs. Iterate all rows in
342
+ * the chunk — multiple rows may share the same state (same
343
+ * group), but state_registry_remove is idempotent.
344
+ */
345
+ for (j = 0; j < arg->row_count; j++) {
346
+ state_registry_remove(states[j]);
347
+ }
348
+ return Qnil;
349
+ }
350
+
351
+ state->ruby_state = ret;
352
+ state_registry_store(state, ret);
353
+ }
354
+
355
+ return Qnil;
356
+ }
357
+
358
+ static VALUE update_cleanup_callback(VALUE varg) {
359
+ struct update_callback_arg *arg = (struct update_callback_arg *)varg;
360
+ idx_t j;
361
+
362
+ if (arg->input_types != NULL) {
363
+ for (j = 0; j < arg->col_count; j++) {
364
+ duckdb_destroy_logical_type(&arg->input_types[j]);
365
+ }
366
+ xfree(arg->input_types);
367
+ }
368
+ if (arg->args != NULL) {
369
+ xfree(arg->args);
370
+ }
371
+ if (arg->input_vectors != NULL) {
372
+ xfree(arg->input_vectors);
373
+ }
374
+
375
+ return Qnil;
376
+ }
377
+
378
+ static void execute_update_callback_protected(void *user_data) {
379
+ struct update_callback_arg *arg = (struct update_callback_arg *)user_data;
380
+ rb_ensure(update_process_rows, (VALUE)arg, update_cleanup_callback, (VALUE)arg);
381
+ }
382
+
383
+ static void update_callback(duckdb_function_info info,
384
+ duckdb_data_chunk input,
385
+ duckdb_aggregate_state *states) {
386
+ rubyDuckDBAggregateFunction *ctx;
387
+ struct update_callback_arg arg;
388
+
389
+ ctx = (rubyDuckDBAggregateFunction *)duckdb_aggregate_function_get_extra_info(info);
390
+ if (ctx == NULL || ctx->update_proc == Qnil) {
391
+ return;
392
+ }
393
+
394
+ arg.ctx = ctx;
395
+ arg.info = info;
396
+ arg.input = input;
397
+ arg.states = states;
398
+ arg.input_vectors = NULL;
399
+ arg.input_types = NULL;
400
+ arg.args = NULL;
401
+ arg.row_count = duckdb_data_chunk_get_size(input);
402
+ arg.col_count = duckdb_data_chunk_get_column_count(input);
403
+
404
+ rbduckdb_function_executor_dispatch(execute_update_callback_protected, &arg);
405
+ }
406
+
407
+ /* No-op combine: Phase 1.0 does not dispatch combine to Ruby. */
408
+ static void noop_combine_callback(duckdb_function_info info,
409
+ duckdb_aggregate_state *source,
410
+ duckdb_aggregate_state *target,
411
+ idx_t count) {
412
+ (void)info;
413
+ (void)source;
414
+ (void)target;
415
+ (void)count;
416
+ }
417
+
418
+ /*
419
+ * Fallback combine used when update_proc is supplied but the user did not
420
+ * register a combine_proc via set_combine.
421
+ *
422
+ * DuckDB invokes combine even for single-partition aggregates: after update
423
+ * has accumulated values into a source state, DuckDB freshly initialises a
424
+ * target state and calls combine to merge source into target before finalize.
425
+ *
426
+ * Without a user-provided combine_proc we cannot perform an arbitrary merge,
427
+ * so this minimal implementation overwrites target->ruby_state with the
428
+ * source value. This is correct for the common single-group/single-thread
429
+ * path; parallel execution requires the user to supply a combine_proc via
430
+ * set_combine, in which case combine_callback is wired instead of this
431
+ * fallback.
432
+ */
433
+ static void default_combine_callback(duckdb_function_info info,
434
+ duckdb_aggregate_state *source,
435
+ duckdb_aggregate_state *target,
436
+ idx_t count) {
437
+ ruby_aggregate_state **src = (ruby_aggregate_state **)source;
438
+ ruby_aggregate_state **tgt = (ruby_aggregate_state **)target;
439
+ idx_t i;
440
+ (void)info;
441
+
442
+ for (i = 0; i < count; i++) {
443
+ tgt[i]->ruby_state = src[i]->ruby_state;
444
+ /*
445
+ * Do NOT call any Ruby API here. This callback is invoked by a
446
+ * DuckDB worker thread that does not hold the GVL; any rb_* call
447
+ * from this context is unsafe and causes a SIGSEGV on Windows.
448
+ *
449
+ * The copied VALUE is already GC-protected via the source state's
450
+ * existing registry entry — which shares the same state_id (because
451
+ * DuckDB memcpy'd the buffer). The destructor callback will clean
452
+ * up that entry when DuckDB frees the source state.
453
+ */
454
+ }
455
+ }
456
+
457
+ /* combine_callback dispatch argument */
458
+ struct combine_callback_arg {
459
+ rubyDuckDBAggregateFunction *ctx;
460
+ duckdb_function_info info;
461
+ duckdb_aggregate_state *source;
462
+ duckdb_aggregate_state *target;
463
+ idx_t count;
464
+ };
465
+
466
+ struct combine_one_arg {
467
+ VALUE combine_proc;
468
+ VALUE source_state;
469
+ VALUE target_state;
470
+ };
471
+
472
+ static VALUE call_combine_proc(VALUE varg) {
473
+ struct combine_one_arg *arg = (struct combine_one_arg *)varg;
474
+ VALUE argv[2];
475
+ argv[0] = arg->source_state;
476
+ argv[1] = arg->target_state;
477
+ return rb_funcallv(arg->combine_proc, rb_intern("call"), 2, argv);
478
+ }
479
+
480
+ static void execute_combine_callback_protected(void *user_data) {
481
+ struct combine_callback_arg *arg = (struct combine_callback_arg *)user_data;
482
+ ruby_aggregate_state **src = (ruby_aggregate_state **)arg->source;
483
+ ruby_aggregate_state **tgt = (ruby_aggregate_state **)arg->target;
484
+ idx_t i;
485
+
486
+ for (i = 0; i < arg->count; i++) {
487
+ struct combine_one_arg one;
488
+ int exception_state;
489
+ VALUE ret;
490
+
491
+ one.combine_proc = arg->ctx->combine_proc;
492
+ one.source_state = src[i]->ruby_state;
493
+ one.target_state = tgt[i]->ruby_state;
494
+
495
+ ret = rb_protect(call_combine_proc, (VALUE)&one, &exception_state);
496
+ if (exception_state) {
497
+ report_ruby_error_to_duckdb(arg->info);
498
+ return;
499
+ }
500
+
501
+ tgt[i]->ruby_state = ret;
502
+ state_registry_store(tgt[i], ret);
503
+
504
+ /* source state is consumed by combine; release its registry entry
505
+ * so the Ruby VALUE can be GC'd. */
506
+ state_registry_remove(src[i]);
507
+ }
508
+ }
509
+
510
+ static void combine_callback(duckdb_function_info info,
511
+ duckdb_aggregate_state *source,
512
+ duckdb_aggregate_state *target,
513
+ idx_t count) {
514
+ rubyDuckDBAggregateFunction *ctx;
515
+ struct combine_callback_arg arg;
516
+
517
+ ctx = (rubyDuckDBAggregateFunction *)duckdb_aggregate_function_get_extra_info(info);
518
+ if (ctx == NULL || ctx->combine_proc == Qnil) {
519
+ return;
520
+ }
521
+
522
+ arg.ctx = ctx;
523
+ arg.info = info;
524
+ arg.source = source;
525
+ arg.target = target;
526
+ arg.count = count;
527
+
528
+ rbduckdb_function_executor_dispatch(execute_combine_callback_protected, &arg);
529
+ }
530
+
531
+ /* finalize callback dispatch argument */
532
+ struct finalize_callback_arg {
533
+ rubyDuckDBAggregateFunction *ctx;
534
+ duckdb_function_info info;
535
+ duckdb_aggregate_state *source_p;
536
+ duckdb_vector result;
537
+ idx_t count;
538
+ idx_t offset;
539
+ };
540
+
541
+ struct finalize_one_arg {
542
+ VALUE finalize_proc;
543
+ VALUE ruby_state;
544
+ };
545
+
546
+ static VALUE call_finalize_proc(VALUE varg) {
547
+ struct finalize_one_arg *arg = (struct finalize_one_arg *)varg;
548
+ return rb_funcall(arg->finalize_proc, rb_intern("call"), 1, arg->ruby_state);
549
+ }
550
+
551
+ struct vector_set_arg {
552
+ duckdb_vector vector;
553
+ duckdb_logical_type element_type;
554
+ idx_t index;
555
+ VALUE value;
556
+ };
557
+
558
+ static VALUE call_vector_set_value_at(VALUE varg) {
559
+ struct vector_set_arg *a = (struct vector_set_arg *)varg;
560
+ rbduckdb_vector_set_value_at(a->vector, a->element_type, a->index, a->value);
561
+ return Qnil;
562
+ }
563
+
564
+ static void execute_finalize_callback_protected(void *user_data) {
565
+ struct finalize_callback_arg *arg = (struct finalize_callback_arg *)user_data;
566
+ ruby_aggregate_state **states = (ruby_aggregate_state **)arg->source_p;
567
+ duckdb_logical_type result_type = duckdb_vector_get_column_type(arg->result);
568
+ idx_t i;
569
+
570
+ for (i = 0; i < arg->count; i++) {
571
+ ruby_aggregate_state *state = states[i];
572
+ struct finalize_one_arg one;
573
+ struct vector_set_arg vsa;
574
+ int exception_state;
575
+ VALUE ret;
576
+
577
+ one.finalize_proc = arg->ctx->finalize_proc;
578
+ one.ruby_state = state->ruby_state;
579
+
580
+ ret = rb_protect(call_finalize_proc, (VALUE)&one, &exception_state);
581
+ if (exception_state) {
582
+ report_ruby_error_to_duckdb(arg->info);
583
+ goto cleanup;
584
+ }
585
+
586
+ vsa.vector = arg->result;
587
+ vsa.element_type = result_type;
588
+ vsa.index = arg->offset + i;
589
+ vsa.value = ret;
590
+
591
+ rb_protect(call_vector_set_value_at, (VALUE)&vsa, &exception_state);
592
+ if (exception_state) {
593
+ report_ruby_error_to_duckdb(arg->info);
594
+ goto cleanup;
595
+ }
596
+
597
+ /* Release Ruby state from the GC registry. */
598
+ state_registry_remove(state);
599
+ }
600
+
601
+ cleanup:
602
+ /* Clean up registry entries for the current (failed) state and any
603
+ remaining unprocessed states so we don't leak GC-registered objects. */
604
+ for (; i < arg->count; i++) {
605
+ state_registry_remove(states[i]);
606
+ }
607
+ duckdb_destroy_logical_type(&result_type);
608
+ }
609
+
610
+ static void finalize_callback(duckdb_function_info info,
611
+ duckdb_aggregate_state *source,
612
+ duckdb_vector result,
613
+ idx_t count,
614
+ idx_t offset) {
615
+ rubyDuckDBAggregateFunction *ctx;
616
+ struct finalize_callback_arg arg;
617
+
618
+ ctx = (rubyDuckDBAggregateFunction *)duckdb_aggregate_function_get_extra_info(info);
619
+ if (ctx == NULL || ctx->finalize_proc == Qnil) {
620
+ return;
621
+ }
622
+
623
+ arg.ctx = ctx;
624
+ arg.info = info;
625
+ arg.source_p = source;
626
+ arg.result = result;
627
+ arg.count = count;
628
+ arg.offset = offset;
629
+
630
+ rbduckdb_function_executor_dispatch(execute_finalize_callback_protected, &arg);
631
+ }
632
+
633
+ /* destroy_callback dispatch argument */
634
+ struct destroy_callback_arg {
635
+ duckdb_aggregate_state *states;
636
+ idx_t count;
637
+ };
638
+
639
+ static void execute_destroy_callback(void *data) {
640
+ struct destroy_callback_arg *arg = (struct destroy_callback_arg *)data;
641
+ ruby_aggregate_state **s = (ruby_aggregate_state **)arg->states;
642
+ idx_t i;
643
+ for (i = 0; i < arg->count; i++) {
644
+ state_registry_remove(s[i]);
645
+ }
646
+ }
647
+
648
+ /*
649
+ * Called by DuckDB when it frees aggregate state buffers. On success paths
650
+ * this runs after finalize has already removed the final-state entries, so
651
+ * the delete is a harmless no-op for those; for intermediate states created
652
+ * by DuckDB's internal memcpy, this is the only cleanup path.
653
+ *
654
+ * Dispatches through the executor thread so that rb_hash_delete is called
655
+ * with the GVL held.
656
+ *
657
+ * The executor thread is guaranteed to be running because
658
+ * maybe_set_functions() calls rbduckdb_function_executor_ensure_started()
659
+ * before registering this destructor.
660
+ */
661
+ static void destroy_callback(duckdb_aggregate_state *states, idx_t count) {
662
+ struct destroy_callback_arg arg;
663
+ arg.states = states;
664
+ arg.count = count;
665
+ rbduckdb_function_executor_dispatch(execute_destroy_callback, &arg);
666
+ }
667
+
668
+ /*
669
+ * Wire up all 5 DuckDB aggregate callbacks on the underlying aggregate_function.
670
+ * Called once both init_proc and finalize_proc have been supplied.
671
+ */
672
+ static void maybe_set_functions(rubyDuckDBAggregateFunction *p) {
673
+ if (p->init_proc == Qnil || p->finalize_proc == Qnil) {
674
+ return;
675
+ }
676
+ duckdb_aggregate_function_set_extra_info(p->aggregate_function, p, NULL);
677
+ duckdb_aggregate_function_set_functions(
678
+ p->aggregate_function,
679
+ state_size_callback,
680
+ state_init_callback,
681
+ (p->update_proc != Qnil) ? update_callback : noop_update_callback,
682
+ (p->combine_proc != Qnil) ? combine_callback :
683
+ ((p->update_proc != Qnil) ? default_combine_callback : noop_combine_callback),
684
+ finalize_callback);
685
+ duckdb_aggregate_function_set_destructor(p->aggregate_function, destroy_callback);
686
+
687
+ /* Ensure the global executor thread is running for multi-thread dispatch.
688
+ * Deferred until callbacks are actually wired to DuckDB. */
689
+ rbduckdb_function_executor_ensure_started();
690
+ }
691
+
692
+ /* :nodoc: */
693
+ static VALUE rbduckdb_aggregate_function_set_init(VALUE self) {
694
+ rubyDuckDBAggregateFunction *p;
695
+
696
+ if (!rb_block_given_p()) {
697
+ rb_raise(rb_eArgError, "block is required");
698
+ }
699
+
700
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
701
+ p->init_proc = rb_block_proc();
702
+
703
+ maybe_set_functions(p);
704
+
705
+ return self;
706
+ }
707
+
708
+ /* :nodoc: */
709
+ static VALUE rbduckdb_aggregate_function_set_update(VALUE self) {
710
+ rubyDuckDBAggregateFunction *p;
711
+
712
+ if (!rb_block_given_p()) {
713
+ rb_raise(rb_eArgError, "block is required");
714
+ }
715
+
716
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
717
+ p->update_proc = rb_block_proc();
718
+
719
+ maybe_set_functions(p);
720
+
721
+ return self;
722
+ }
723
+
724
+ /* :nodoc: */
725
+ static VALUE rbduckdb_aggregate_function_set_combine(VALUE self) {
726
+ rubyDuckDBAggregateFunction *p;
727
+
728
+ if (!rb_block_given_p()) {
729
+ rb_raise(rb_eArgError, "block is required");
730
+ }
731
+
732
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
733
+ p->combine_proc = rb_block_proc();
734
+
735
+ maybe_set_functions(p);
736
+
737
+ return self;
738
+ }
739
+
740
+ /* :nodoc: */
741
+ static VALUE rbduckdb_aggregate_function_set_finalize(VALUE self) {
742
+ rubyDuckDBAggregateFunction *p;
743
+
744
+ if (!rb_block_given_p()) {
745
+ rb_raise(rb_eArgError, "block is required");
746
+ }
747
+
748
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
749
+ p->finalize_proc = rb_block_proc();
750
+
751
+ maybe_set_functions(p);
752
+
753
+ return self;
754
+ }
755
+
756
+ /* :nodoc: */
757
+ static VALUE rbduckdb_aggregate_function__set_special_handling(VALUE self) {
758
+ rubyDuckDBAggregateFunction *p;
759
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunction, &aggregate_function_data_type, p);
760
+ p->special_handling = true;
761
+ duckdb_aggregate_function_set_special_handling(p->aggregate_function);
762
+ return self;
763
+ }
764
+
765
+ /* Returns the number of Ruby states currently tracked in the registry. */
766
+ static VALUE aggregate_function_state_registry_size(VALUE klass) {
767
+ (void)klass;
768
+ return LONG2NUM((long)RHASH_SIZE(g_aggregate_state_registry));
769
+ }
770
+
771
+ void rbduckdb_init_duckdb_aggregate_function(void) {
772
+ #if 0
773
+ VALUE mDuckDB = rb_define_module("DuckDB");
774
+ #endif
775
+ cDuckDBAggregateFunction = rb_define_class_under(mDuckDB, "AggregateFunction", rb_cObject);
776
+ rb_define_alloc_func(cDuckDBAggregateFunction, allocate);
777
+ rb_define_method(cDuckDBAggregateFunction, "initialize", duckdb_aggregate_function_initialize, 0);
778
+ rb_define_method(cDuckDBAggregateFunction, "name=", rbduckdb_aggregate_function_set_name, 1);
779
+ rb_define_private_method(cDuckDBAggregateFunction, "_set_return_type", rbduckdb_aggregate_function__set_return_type, 1);
780
+ rb_define_private_method(cDuckDBAggregateFunction, "_add_parameter", rbduckdb_aggregate_function_add_parameter, 1);
781
+ rb_define_method(cDuckDBAggregateFunction, "set_init", rbduckdb_aggregate_function_set_init, 0);
782
+ rb_define_method(cDuckDBAggregateFunction, "set_update", rbduckdb_aggregate_function_set_update, 0);
783
+ rb_define_method(cDuckDBAggregateFunction, "set_combine", rbduckdb_aggregate_function_set_combine, 0);
784
+ rb_define_method(cDuckDBAggregateFunction, "set_finalize", rbduckdb_aggregate_function_set_finalize, 0);
785
+ rb_define_private_method(cDuckDBAggregateFunction, "_set_special_handling", rbduckdb_aggregate_function__set_special_handling, 0);
786
+ rb_define_singleton_method(cDuckDBAggregateFunction, "_state_registry_size",
787
+ aggregate_function_state_registry_size, 0);
788
+
789
+ g_aggregate_state_registry = rb_hash_new();
790
+ rb_gc_register_mark_object(g_aggregate_state_registry);
791
+ }