duckdb 1.5.2.1 → 1.5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +40 -0
  3. data/README.md +52 -0
  4. data/ext/duckdb/aggregate_function.c +1 -1
  5. data/ext/duckdb/aggregate_function_set.c +86 -0
  6. data/ext/duckdb/aggregate_function_set.h +14 -0
  7. data/ext/duckdb/appender.c +62 -4
  8. data/ext/duckdb/arrow_array_stream.c +226 -0
  9. data/ext/duckdb/arrow_array_stream.h +61 -0
  10. data/ext/duckdb/arrow_import.c +165 -0
  11. data/ext/duckdb/arrow_import.h +6 -0
  12. data/ext/duckdb/blob.c +1 -1
  13. data/ext/duckdb/blob.h +1 -2
  14. data/ext/duckdb/config.c +1 -1
  15. data/ext/duckdb/config.h +1 -1
  16. data/ext/duckdb/connection.c +26 -3
  17. data/ext/duckdb/converter.h +1 -0
  18. data/ext/duckdb/conveter.c +39 -9
  19. data/ext/duckdb/data_chunk.c +10 -0
  20. data/ext/duckdb/data_chunk.h +1 -0
  21. data/ext/duckdb/duckdb.c +14 -11
  22. data/ext/duckdb/error.c +1 -1
  23. data/ext/duckdb/error.h +1 -3
  24. data/ext/duckdb/extconf.rb +28 -13
  25. data/ext/duckdb/function_executor.c +308 -2
  26. data/ext/duckdb/function_executor.h +44 -0
  27. data/ext/duckdb/prepared_statement.c +38 -0
  28. data/ext/duckdb/result.c +49 -53
  29. data/ext/duckdb/result.h +11 -0
  30. data/ext/duckdb/ruby-duckdb.h +4 -0
  31. data/ext/duckdb/scalar_function.c +97 -29
  32. data/ext/duckdb/scalar_function.h +2 -4
  33. data/ext/duckdb/scalar_function_bind_info.c +13 -13
  34. data/ext/duckdb/scalar_function_bind_info.h +1 -1
  35. data/ext/duckdb/scalar_function_set.c +9 -9
  36. data/ext/duckdb/scalar_function_set.h +2 -2
  37. data/ext/duckdb/table_description.c +19 -19
  38. data/ext/duckdb/table_description.h +1 -1
  39. data/ext/duckdb/table_function.c +94 -28
  40. data/ext/duckdb/table_function.h +2 -2
  41. data/ext/duckdb/table_function_bind_info.c +20 -20
  42. data/ext/duckdb/table_function_bind_info.h +2 -2
  43. data/ext/duckdb/table_function_function_info.c +5 -5
  44. data/ext/duckdb/table_function_function_info.h +2 -2
  45. data/ext/duckdb/table_function_init_info.c +70 -5
  46. data/ext/duckdb/table_function_init_info.h +2 -2
  47. data/lib/duckdb/aggregate_function.rb +7 -1
  48. data/lib/duckdb/aggregate_function_set.rb +29 -0
  49. data/lib/duckdb/appender.rb +97 -0
  50. data/lib/duckdb/arrow_array_stream.rb +33 -0
  51. data/lib/duckdb/connection.rb +139 -9
  52. data/lib/duckdb/prepared_statement.rb +35 -0
  53. data/lib/duckdb/result.rb +39 -2
  54. data/lib/duckdb/scalar_function.rb +9 -4
  55. data/lib/duckdb/scalar_function_set.rb +0 -1
  56. data/lib/duckdb/table_description.rb +7 -0
  57. data/lib/duckdb/table_name_parser.rb +58 -0
  58. data/lib/duckdb/version.rb +1 -1
  59. data/lib/duckdb.rb +3 -0
  60. metadata +11 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4c7c57e0282ec2213b1efb3844445804e5fbbc09e03496f2ba71c1bd24152843
4
- data.tar.gz: 9057fa45fafeba0dc2110562f98cd49eecf53ec0f0a48a989756742525c82b4a
3
+ metadata.gz: c9c37c59f2e64ac10b72765250ec674ef40e82d83a7499a6c9e1117ee8e9e9d0
4
+ data.tar.gz: e9510873a0de444b35bb7c3de2d26f8d9a9ebcd01466a95383fa16d38d277e97
5
5
  SHA512:
6
- metadata.gz: 78bbd166f587491085267791ffca949f2b310597f802424b7e0525c8c4f35f345c2362b17b33401bee374601d5f52746d10ac21f60ea8344bd3bf08e28d7f4b5
7
- data.tar.gz: 6fc88b5e289ef3451691f57cb404c390b9deabbafddf81e8aedd300121276a9df62b0492559f2602eb4aeaa57ed2a83cc98ccfbef3a728771c34a565a022f78a
6
+ metadata.gz: 2ab204dc5d6034b846d3267ca78fd4d47f75f000e3d6c97e5673ca9791618c84e7130230cd1ad7634858ff25fd5cc5952bac500263ef228225767a730684749c
7
+ data.tar.gz: f55e20a3dc6a97aea0606dca4b047746da44941c4f84145a48c45248d150fd58ef800ca617adc7921ae6418ac0f8275bf29e24405f3cdbf2e917377029b8641f
data/CHANGELOG.md CHANGED
@@ -4,6 +4,46 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  # Unreleased
6
6
 
7
+ # 1.5.4.0 - 2026-06-20
8
+ - bump up DuckDB 1.5.4 and 1.4.5 on CI.
9
+ - add experimental `DuckDB::Result#arrow_c_stream` returning `DuckDB::ArrowArrayStream` to export a query result as an Arrow C stream (Arrow C Data Interface). The stream can be consumed directly by ruby-polars (`Polars::DataFrame.new(result)`) and red-arrow (`Arrow::RecordBatchReader.import(stream.to_i)`).
10
+ - add experimental `DuckDB::Connection#append_arrow(table, producer)` to import an Arrow producer (any object responding to `#arrow_c_stream`, such as a Polars `DataFrame` or a `DuckDB::Result`) into an existing table, returning the number of rows appended.
11
+ - drop Ruby 3.2.
12
+ - add `DuckDB::TableFunction::InitInfo#max_threads=` (and `#set_max_threads`) to hint DuckDB how many worker threads can execute a custom table function concurrently.
13
+ - add `DuckDB::TableFunction::InitInfo#column_count` to get the number of projected result columns of a custom table function scan.
14
+ - add `DuckDB::TableFunction::InitInfo#column_index` to get the source column index of a given projected result column of a custom table function scan.
15
+ - add `DuckDB::Appender#append_uuid(value)` to append a UUID value to a `UUID` column. `value` must be a String in canonical UUID format (`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`).
16
+ - add `DuckDB::PreparedStatement#bind_uuid(index, value)` to bind a UUID parameter. `value` must be a String in canonical UUID format.
17
+
18
+ # 1.5.3.0 - 2026-05-24
19
+ - bump up DuckDB 1.5.3 on CI.
20
+ - add `DuckDB::AggregateFunctionSet` class and `DuckDB::Connection#register_aggregate_function_set` to register multiple overloads of a custom aggregate function under one SQL name.
21
+ - add `DuckDB::Appender#append_default_to_chunk`.
22
+ - add `DuckDB::TableNameParser` module with shared table name parsing logic (quoting and dot-notation), included by `DuckDB::Appender` and `DuckDB::TableDescription`.
23
+ - add `DuckDB::PreparedStatement#bind_timestamp_tz` to bind a `TIMESTAMP WITH TIME ZONE` (TIMESTAMPTZ) parameter from a `Time` or timestamp string.
24
+ - `DuckDB::AggregateFunction#name=` and `DuckDB::ScalarFunction#name=` now accept Symbol arguments (coerced to String).
25
+ ## Breaking Changes
26
+ - `DuckDB::ScalarFunction.create`: `name:` is now a required keyword argument (previously optional with `nil` default). Parameter order changed to `name:, return_type:, ...`.
27
+ - `DuckDB::ScalarFunctionSet#add`: no longer overrides the scalar function's name with the set's name. The individual function must have its own name set before being added to the set.
28
+ - `DuckDB::TableDescription.new`: the 2nd argument now parses dot-notation and quoting:
29
+ - `'schema.table'` is interpreted as schema-qualified (deprecated; use `schema:` keyword instead).
30
+ - `'"schema.table"'` or `"'schema.table'"` — quotes are stripped and the name is treated as a literal table name containing a dot.
31
+ - `DuckDB::Appender.new`: the 2nd argument now parses dot-notation and quoting (previously only `Connection#appender` did this):
32
+ - `'schema.table'` is interpreted as schema-qualified (deprecated; use `schema:` keyword instead).
33
+ - `'"schema.table"'` or `"'schema.table'"` — quotes are stripped and the name is treated as a literal table name containing a dot.
34
+ - `DuckDB::Connection#appender`: table name parsing (dot-notation, quoting) is now delegated to `DuckDB::Appender.new` internally. Behavior is unchanged — `'schema.table'` has always split on dot in `Connection#appender`. No deprecation warning is emitted.
35
+ - `DuckDB::Connection#appender`: table names surrounded by double or single quotes (e.g. `'"a.b"'` or `"'a.b'"`) are treated as literal table names — the quotes are stripped and no dot-splitting is performed.
36
+ - fix: `Connection#appender('a.b')` no longer emits a misleading deprecation warning (the behavior was not deprecated — it was already the correct behavior).
37
+ - add `DuckDB::Appender.new(con, table, schema: nil, catalog: nil)` keyword argument form.
38
+ - add `DuckDB::Connection#appender(table, schema: nil, catalog: nil)` keyword argument form.
39
+ - deprecate `DuckDB::Result#_column_type(i)` private method. use `columns[i].send(:_type)` instead.
40
+ - `DuckDB::Result#enum_dictionary_values` checks invalid column index.
41
+ - deprecate `DuckDB::Result#_enum_dictionary_size` private method.
42
+ - deprecate `DuckDB::Result#_enum_dictionary_value` private method.
43
+ ## Deprecations
44
+ - deprecate `DuckDB::Appender.new(con, schema, table)` 3-positional-argument form. Use `DuckDB::Appender.new(con, table, schema: schema)` instead.
45
+ - deprecate passing dot-notation string (e.g. `'schema.table'`) directly to `DuckDB::Appender.new`. Use `DuckDB::Appender.new(con, table, schema: schema)` instead.
46
+
7
47
  # 1.5.2.1 - 2026-04-24
8
48
 
9
49
  - add `DuckDB::AggregateFunction.create`.
data/README.md CHANGED
@@ -266,6 +266,58 @@ res.first.first # => 4
266
266
 
267
267
  Set `DuckDB.default_timezone` to control how TIMESTAMP and TIME values without time zone are converted to Ruby `Time` objects. The default is `:local`, but you can use `:utc` for UTC conversion.
268
268
 
269
+ ### Arrow interop (experimental)
270
+
271
+ `DuckDB::Result#arrow_c_stream` exports a query result as an Arrow C stream
272
+ ([Arrow C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)).
273
+ The returned `DuckDB::ArrowArrayStream` satisfies the Ruby Arrow C stream
274
+ protocol (`#arrow_c_stream` / `#to_i`), so query results can be handed to
275
+ Arrow consumers in columnar form, without converting each row to Ruby objects.
276
+
277
+ With [ruby-polars](https://github.com/ankane/ruby-polars), no glue code is needed:
278
+
279
+ ```ruby
280
+ result = con.query('SELECT * FROM users')
281
+ df = Polars::DataFrame.new(result)
282
+ ```
283
+
284
+ With [red-arrow](https://github.com/apache/arrow/tree/main/ruby/red-arrow),
285
+ pass the stream address to `Arrow::RecordBatchReader.import`:
286
+
287
+ ```ruby
288
+ result = con.query('SELECT * FROM users')
289
+ reader = Arrow::RecordBatchReader.import(result.arrow_c_stream.to_i)
290
+ ```
291
+
292
+ The consumer takes ownership of the stream's contents, so a result can be
293
+ exported only once; exporting the same result again raises `DuckDB::Error`.
294
+
295
+ In the other direction, `DuckDB::Connection#append_arrow` imports an Arrow
296
+ producer into an existing table. Any object responding to `#arrow_c_stream`
297
+ works as the producer — for example a Polars `DataFrame`, or another
298
+ `DuckDB::Result`:
299
+
300
+ ```ruby
301
+ con.query('CREATE TABLE users (id BIGINT, name VARCHAR)')
302
+ rows = con.append_arrow('users', polars_df) # => number of rows appended
303
+ con.query('SELECT * FROM users').to_a
304
+ ```
305
+
306
+ The producer's columns must line up with the table's columns by count and
307
+ position. DuckDB casts compatible column types (e.g. INTEGER into a BIGINT
308
+ column); a type that cannot be cast raises `DuckDB::Error`. `append_arrow` is
309
+ not transactional — wrap it in your own transaction if you need all-or-nothing.
310
+
311
+ These features are **experimental**: they are built on DuckDB's unstable Arrow
312
+ C API and may change in any minor release.
313
+
314
+ Note: [red-arrow-format](https://github.com/apache/arrow/tree/main/ruby/red-arrow-format)
315
+ (the pure-Ruby Arrow implementation) supports only the Arrow IPC
316
+ serialization format, not the C Data Interface. To exchange data with it,
317
+ write/read Arrow IPC files through DuckDB's
318
+ [arrow community extension](https://duckdb.org/community_extensions/extensions/arrow.html)
319
+ (`COPY ... TO 'data.arrows'` / `read_arrow(...)`).
320
+
269
321
  ## Versioning and DuckDB support
270
322
 
271
323
  The first three digits of the gem version track the DuckDB release that the
@@ -736,7 +736,7 @@ void rbduckdb_init_aggregate_function(void) {
736
736
  cDuckDBAggregateFunction = rb_define_class_under(mDuckDB, "AggregateFunction", rb_cObject);
737
737
  rb_define_alloc_func(cDuckDBAggregateFunction, allocate);
738
738
  rb_define_method(cDuckDBAggregateFunction, "initialize", aggregate_function_initialize, 0);
739
- rb_define_method(cDuckDBAggregateFunction, "name=", aggregate_function_set_name, 1);
739
+ rb_define_method(cDuckDBAggregateFunction, "set_name", aggregate_function_set_name, 1);
740
740
  rb_define_private_method(cDuckDBAggregateFunction, "_set_return_type", aggregate_function__set_return_type, 1);
741
741
  rb_define_private_method(cDuckDBAggregateFunction, "_add_parameter", aggregate_function__add_parameter, 1);
742
742
  rb_define_private_method(cDuckDBAggregateFunction, "_set_init", aggregate_function__set_init, 0);
@@ -0,0 +1,86 @@
1
+ #include "ruby-duckdb.h"
2
+
3
+ VALUE cDuckDBAggregateFunctionSet;
4
+
5
+ static void mark(void *);
6
+ static void deallocate(void *);
7
+ static VALUE allocate(VALUE klass);
8
+ static size_t memsize(const void *p);
9
+ static void compact(void *);
10
+ static VALUE aggregate_function_set__initialize(VALUE self, VALUE name);
11
+ static VALUE aggregate_function_set__add(VALUE self, VALUE aggregate_function);
12
+
13
+ static const rb_data_type_t aggregate_function_set_data_type = {
14
+ "DuckDB/AggregateFunctionSet",
15
+ {mark, deallocate, memsize, compact},
16
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
17
+ };
18
+
19
+ static void mark(void *ctx) {
20
+ rubyDuckDBAggregateFunctionSet *p = (rubyDuckDBAggregateFunctionSet *)ctx;
21
+ rb_gc_mark(p->functions);
22
+ }
23
+
24
+ static void deallocate(void *ctx) {
25
+ rubyDuckDBAggregateFunctionSet *p = (rubyDuckDBAggregateFunctionSet *)ctx;
26
+ duckdb_destroy_aggregate_function_set(&(p->aggregate_function_set));
27
+ xfree(p);
28
+ }
29
+
30
+ static void compact(void *ctx) {
31
+ rubyDuckDBAggregateFunctionSet *p = (rubyDuckDBAggregateFunctionSet *)ctx;
32
+ p->functions = rb_gc_location(p->functions);
33
+ }
34
+
35
+ static VALUE allocate(VALUE klass) {
36
+ rubyDuckDBAggregateFunctionSet *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBAggregateFunctionSet));
37
+ VALUE obj = TypedData_Wrap_Struct(klass, &aggregate_function_set_data_type, ctx);
38
+ ctx->functions = rb_ary_new();
39
+ RB_GC_GUARD(ctx->functions);
40
+ return obj;
41
+ }
42
+
43
+ static size_t memsize(const void *p) {
44
+ return sizeof(rubyDuckDBAggregateFunctionSet);
45
+ }
46
+
47
+ rubyDuckDBAggregateFunctionSet *rbduckdb_get_struct_aggregate_function_set(VALUE obj) {
48
+ rubyDuckDBAggregateFunctionSet *ctx;
49
+ TypedData_Get_Struct(obj, rubyDuckDBAggregateFunctionSet, &aggregate_function_set_data_type, ctx);
50
+ return ctx;
51
+ }
52
+
53
+ /* :nodoc: */
54
+ static VALUE aggregate_function_set__initialize(VALUE self, VALUE name) {
55
+ rubyDuckDBAggregateFunctionSet *p;
56
+
57
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunctionSet, &aggregate_function_set_data_type, p);
58
+ p->aggregate_function_set = duckdb_create_aggregate_function_set(StringValueCStr(name));
59
+ return self;
60
+ }
61
+
62
+ /* :nodoc: */
63
+ static VALUE aggregate_function_set__add(VALUE self, VALUE aggregate_function) {
64
+ rubyDuckDBAggregateFunctionSet *p;
65
+ rubyDuckDBAggregateFunction *af;
66
+
67
+ TypedData_Get_Struct(self, rubyDuckDBAggregateFunctionSet, &aggregate_function_set_data_type, p);
68
+ af = rbduckdb_get_struct_aggregate_function(aggregate_function);
69
+
70
+ if (duckdb_add_aggregate_function_to_set(p->aggregate_function_set, af->aggregate_function) == DuckDBError) {
71
+ rb_raise(eDuckDBError, "failed to add aggregate function to set (duplicate overload?)");
72
+ }
73
+
74
+ rb_ary_push(p->functions, aggregate_function);
75
+ return self;
76
+ }
77
+
78
+ void rbduckdb_init_aggregate_function_set(void) {
79
+ #if 0
80
+ VALUE mDuckDB = rb_define_module("DuckDB");
81
+ #endif
82
+ cDuckDBAggregateFunctionSet = rb_define_class_under(mDuckDB, "AggregateFunctionSet", rb_cObject);
83
+ rb_define_alloc_func(cDuckDBAggregateFunctionSet, allocate);
84
+ rb_define_private_method(cDuckDBAggregateFunctionSet, "_initialize", aggregate_function_set__initialize, 1);
85
+ rb_define_private_method(cDuckDBAggregateFunctionSet, "_add", aggregate_function_set__add, 1);
86
+ }
@@ -0,0 +1,14 @@
1
+ #ifndef RUBY_DUCKDB_AGGREGATE_FUNCTION_SET_H
2
+ #define RUBY_DUCKDB_AGGREGATE_FUNCTION_SET_H
3
+
4
+ struct _rubyDuckDBAggregateFunctionSet {
5
+ duckdb_aggregate_function_set aggregate_function_set;
6
+ VALUE functions; /* Ruby Array of AggregateFunction objects — prevents GC collection */
7
+ };
8
+
9
+ typedef struct _rubyDuckDBAggregateFunctionSet rubyDuckDBAggregateFunctionSet;
10
+
11
+ void rbduckdb_init_aggregate_function_set(void);
12
+ rubyDuckDBAggregateFunctionSet *rbduckdb_get_struct_aggregate_function_set(VALUE obj);
13
+
14
+ #endif
@@ -9,7 +9,8 @@ static size_t memsize(const void *p);
9
9
 
10
10
  static VALUE appender_s_create_query(VALUE klass, VALUE con, VALUE query, VALUE types, VALUE table, VALUE columns);
11
11
 
12
- static VALUE appender_initialize(VALUE klass, VALUE con, VALUE schema, VALUE table);
12
+ static VALUE appender__initialize(VALUE self, VALUE con, VALUE schema, VALUE table);
13
+ static VALUE appender__initialize_ext(VALUE self, VALUE con, VALUE catalog, VALUE schema, VALUE table);
13
14
  static VALUE appender_error_message(VALUE self);
14
15
  static VALUE appender__append_bool(VALUE self, VALUE val);
15
16
  static VALUE appender__append_int8(VALUE self, VALUE val);
@@ -34,8 +35,10 @@ static VALUE appender__append_time(VALUE self, VALUE hour, VALUE min, VALUE sec,
34
35
  static VALUE appender__append_timestamp(VALUE self, VALUE year, VALUE month, VALUE day, VALUE hour, VALUE min, VALUE sec, VALUE micros);
35
36
  static VALUE appender__append_hugeint(VALUE self, VALUE lower, VALUE upper);
36
37
  static VALUE appender__append_uhugeint(VALUE self, VALUE lower, VALUE upper);
38
+ static VALUE appender__append_uuid(VALUE self, VALUE val);
37
39
  static VALUE appender__append_value(VALUE self, VALUE val);
38
40
  static VALUE appender__append_data_chunk(VALUE self, VALUE chunk);
41
+ static VALUE appender__append_default_to_chunk(VALUE self, VALUE chunk, VALUE col, VALUE row);
39
42
  static VALUE appender__flush(VALUE self);
40
43
 
41
44
  #ifdef HAVE_DUCKDB_H_GE_V1_5_0
@@ -130,14 +133,14 @@ static VALUE appender_s_create_query(VALUE klass, VALUE con, VALUE query, VALUE
130
133
  return appender;
131
134
  }
132
135
 
133
- static VALUE appender_initialize(VALUE self, VALUE con, VALUE schema, VALUE table) {
136
+ static VALUE appender__initialize(VALUE self, VALUE con, VALUE schema, VALUE table) {
134
137
 
135
138
  rubyDuckDBConnection *ctxcon;
136
139
  rubyDuckDBAppender *ctx;
137
140
  char *pschema = 0;
138
141
 
139
142
  if (!rb_obj_is_kind_of(con, cDuckDBConnection)) {
140
- rb_raise(rb_eTypeError, "1st argument should be instance of DackDB::Connection");
143
+ rb_raise(rb_eTypeError, "1st argument should be instance of DuckDB::Connection");
141
144
  }
142
145
 
143
146
  TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
@@ -153,6 +156,32 @@ static VALUE appender_initialize(VALUE self, VALUE con, VALUE schema, VALUE tabl
153
156
  return self;
154
157
  }
155
158
 
159
+ static VALUE appender__initialize_ext(VALUE self, VALUE con, VALUE catalog, VALUE schema, VALUE table) {
160
+ rubyDuckDBConnection *ctxcon;
161
+ rubyDuckDBAppender *ctx;
162
+ char *pcatalog = 0;
163
+ char *pschema = 0;
164
+
165
+ if (!rb_obj_is_kind_of(con, cDuckDBConnection)) {
166
+ rb_raise(rb_eTypeError, "1st argument should be instance of DuckDB::Connection");
167
+ }
168
+
169
+ TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
170
+ ctxcon = rbduckdb_get_struct_connection(con);
171
+
172
+ if (catalog != Qnil) {
173
+ pcatalog = StringValuePtr(catalog);
174
+ }
175
+ if (schema != Qnil) {
176
+ pschema = StringValuePtr(schema);
177
+ }
178
+
179
+ if (duckdb_appender_create_ext(ctxcon->con, pcatalog, pschema, StringValuePtr(table), &(ctx->appender)) == DuckDBError) {
180
+ rb_raise(eDuckDBError, "failed to create appender");
181
+ }
182
+ return self;
183
+ }
184
+
156
185
  /* call-seq:
157
186
  * appender.error_message -> String
158
187
  *
@@ -425,6 +454,21 @@ static VALUE appender__append_uhugeint(VALUE self, VALUE lower, VALUE upper) {
425
454
  return state_to_rbool(duckdb_append_uhugeint(ctx->appender, uhugeint));
426
455
  }
427
456
 
457
+ /* :nodoc: */
458
+ static VALUE appender__append_uuid(VALUE self, VALUE val) {
459
+ rubyDuckDBAppender *ctx;
460
+ duckdb_uhugeint uhugeint;
461
+ duckdb_value uuid_val;
462
+ duckdb_state state;
463
+
464
+ TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
465
+ rbduckdb_uuid_str_to_uhugeint(val, &uhugeint);
466
+ uuid_val = duckdb_create_uuid(uhugeint);
467
+ state = duckdb_append_value(ctx->appender, uuid_val);
468
+ duckdb_destroy_value(&uuid_val);
469
+ return state_to_rbool(state);
470
+ }
471
+
428
472
  /* :nodoc: */
429
473
  static VALUE appender__append_value(VALUE self, VALUE val) {
430
474
  rubyDuckDBAppender *ctx;
@@ -447,6 +491,17 @@ static VALUE appender__append_data_chunk(VALUE self, VALUE chunk) {
447
491
  return state_to_rbool(duckdb_append_data_chunk(ctx->appender, chunk_ctx->data_chunk));
448
492
  }
449
493
 
494
+ /* :nodoc: */
495
+ static VALUE appender__append_default_to_chunk(VALUE self, VALUE chunk, VALUE col, VALUE row) {
496
+ rubyDuckDBAppender *ctx;
497
+ rubyDuckDBDataChunk *chunk_ctx;
498
+
499
+ TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
500
+ chunk_ctx = rbduckdb_get_struct_data_chunk(chunk);
501
+
502
+ return state_to_rbool(duckdb_append_default_to_chunk(ctx->appender, chunk_ctx->data_chunk, NUM2ULL(col), NUM2ULL(row)));
503
+ }
504
+
450
505
  /* :nodoc: */
451
506
  static VALUE appender__flush(VALUE self) {
452
507
  rubyDuckDBAppender *ctx;
@@ -504,7 +559,8 @@ void rbduckdb_init_appender(void) {
504
559
  cDuckDBAppender = rb_define_class_under(mDuckDB, "Appender", rb_cObject);
505
560
  rb_define_alloc_func(cDuckDBAppender, allocate);
506
561
  rb_define_singleton_method(cDuckDBAppender, "create_query", appender_s_create_query, 5);
507
- rb_define_method(cDuckDBAppender, "initialize", appender_initialize, 3);
562
+ rb_define_private_method(cDuckDBAppender, "_initialize", appender__initialize, 3);
563
+ rb_define_private_method(cDuckDBAppender, "_initialize_ext", appender__initialize_ext, 4);
508
564
  rb_define_method(cDuckDBAppender, "error_message", appender_error_message, 0);
509
565
  rb_define_private_method(cDuckDBAppender, "_end_row", appender__end_row, 0);
510
566
  rb_define_private_method(cDuckDBAppender, "_flush", appender__flush, 0);
@@ -538,6 +594,8 @@ void rbduckdb_init_appender(void) {
538
594
  rb_define_private_method(cDuckDBAppender, "_append_timestamp", appender__append_timestamp, 7);
539
595
  rb_define_private_method(cDuckDBAppender, "_append_hugeint", appender__append_hugeint, 2);
540
596
  rb_define_private_method(cDuckDBAppender, "_append_uhugeint", appender__append_uhugeint, 2);
597
+ rb_define_private_method(cDuckDBAppender, "_append_uuid", appender__append_uuid, 1);
541
598
  rb_define_private_method(cDuckDBAppender, "_append_value", appender__append_value, 1);
542
599
  rb_define_private_method(cDuckDBAppender, "_append_data_chunk", appender__append_data_chunk, 1);
600
+ rb_define_private_method(cDuckDBAppender, "_append_default_to_chunk", appender__append_default_to_chunk, 3);
543
601
  }
@@ -0,0 +1,226 @@
1
+ #include "ruby-duckdb.h"
2
+ #include <errno.h>
3
+
4
+ static VALUE cDuckDBArrowArrayStream;
5
+
6
+ typedef struct {
7
+ struct ArrowArrayStream stream;
8
+ } rubyDuckDBArrowArrayStream;
9
+
10
+ /*
11
+ * Heap-allocated context referenced by stream.private_data. Consumers may
12
+ * move the stream struct contents out and keep using the callbacks after
13
+ * the Ruby DuckDB::ArrowArrayStream object is gone, so this context is
14
+ * freed only by the stream release callback, and it holds a reference on
15
+ * the result struct (rbduckdb_result_ref) until then. The release callback
16
+ * must not call any Ruby API: it can run during GC sweep (via deallocate
17
+ * of an unconsumed stream) or from a non-Ruby thread.
18
+ */
19
+ typedef struct {
20
+ rubyDuckDBResult *presult_ctx;
21
+ duckdb_arrow_options arrow_options;
22
+ char *last_error;
23
+ } arrowArrayStreamContext;
24
+
25
+ static void deallocate(void *ctx);
26
+ static VALUE allocate(VALUE klass);
27
+ static size_t memsize(const void *p);
28
+ static VALUE arrow_array_stream_to_i(VALUE self);
29
+ static VALUE arrow_array_stream_arrow_c_stream(VALUE self);
30
+ static void stream_set_error(arrowArrayStreamContext *ctx, const char *msg);
31
+ static int stream_check_error(arrowArrayStreamContext *ctx, duckdb_error_data error_data);
32
+ static int stream_get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out);
33
+ static int stream_get_next(struct ArrowArrayStream *stream, struct ArrowArray *out);
34
+ static const char *stream_get_last_error(struct ArrowArrayStream *stream);
35
+ static void stream_release(struct ArrowArrayStream *stream);
36
+
37
+ static const rb_data_type_t arrow_array_stream_data_type = {
38
+ "DuckDB/ArrowArrayStream",
39
+ {NULL, deallocate, memsize,},
40
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
41
+ };
42
+
43
+ static void deallocate(void *ctx) {
44
+ rubyDuckDBArrowArrayStream *p = (rubyDuckDBArrowArrayStream *)ctx;
45
+
46
+ if (p->stream.release != NULL) {
47
+ p->stream.release(&(p->stream));
48
+ }
49
+ xfree(p);
50
+ }
51
+
52
+ static VALUE allocate(VALUE klass) {
53
+ rubyDuckDBArrowArrayStream *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBArrowArrayStream));
54
+ return TypedData_Wrap_Struct(klass, &arrow_array_stream_data_type, ctx);
55
+ }
56
+
57
+ static size_t memsize(const void *p) {
58
+ return sizeof(rubyDuckDBArrowArrayStream);
59
+ }
60
+
61
+ /* Context memory is managed with plain malloc/free because the release
62
+ * callback may run outside Ruby's memory bookkeeping. */
63
+ static void stream_set_error(arrowArrayStreamContext *ctx, const char *msg) {
64
+ size_t len = strlen(msg) + 1;
65
+
66
+ free(ctx->last_error);
67
+ ctx->last_error = malloc(len);
68
+ if (ctx->last_error != NULL) {
69
+ memcpy(ctx->last_error, msg, len);
70
+ }
71
+ }
72
+
73
+ static int stream_check_error(arrowArrayStreamContext *ctx, duckdb_error_data error_data) {
74
+ if (error_data == NULL) {
75
+ return 0;
76
+ }
77
+ if (!duckdb_error_data_has_error(error_data)) {
78
+ duckdb_destroy_error_data(&error_data);
79
+ return 0;
80
+ }
81
+ stream_set_error(ctx, duckdb_error_data_message(error_data));
82
+ duckdb_destroy_error_data(&error_data);
83
+ return EIO;
84
+ }
85
+
86
+ static int stream_get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out) {
87
+ arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
88
+ duckdb_error_data error_data;
89
+ duckdb_logical_type *types;
90
+ const char **names;
91
+ idx_t column_count;
92
+ idx_t i;
93
+
94
+ column_count = duckdb_column_count(&(ctx->presult_ctx->result));
95
+ types = calloc((size_t)column_count, sizeof(duckdb_logical_type));
96
+ names = calloc((size_t)column_count, sizeof(const char *));
97
+ if (column_count > 0 && (types == NULL || names == NULL)) {
98
+ free(types);
99
+ free(names);
100
+ stream_set_error(ctx, "failed to allocate memory for Arrow schema conversion");
101
+ return ENOMEM;
102
+ }
103
+ for (i = 0; i < column_count; i++) {
104
+ types[i] = duckdb_column_logical_type(&(ctx->presult_ctx->result), i);
105
+ names[i] = duckdb_column_name(&(ctx->presult_ctx->result), i);
106
+ }
107
+
108
+ error_data = duckdb_to_arrow_schema(ctx->arrow_options, types, names, column_count, out);
109
+
110
+ for (i = 0; i < column_count; i++) {
111
+ duckdb_destroy_logical_type(&types[i]);
112
+ }
113
+ free(types);
114
+ free(names);
115
+ return stream_check_error(ctx, error_data);
116
+ }
117
+
118
+ static int stream_get_next(struct ArrowArrayStream *stream, struct ArrowArray *out) {
119
+ arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
120
+ duckdb_data_chunk chunk;
121
+ duckdb_error_data error_data;
122
+
123
+ chunk = duckdb_fetch_chunk(ctx->presult_ctx->result);
124
+ if (chunk == NULL) {
125
+ /* End of stream: a released (release == NULL) array. */
126
+ memset(out, 0, sizeof(struct ArrowArray));
127
+ return 0;
128
+ }
129
+ /* duckdb_data_chunk_to_arrow copies the chunk into Arrow-owned buffers,
130
+ * so the chunk can be destroyed right after conversion. */
131
+ error_data = duckdb_data_chunk_to_arrow(ctx->arrow_options, chunk, out);
132
+ duckdb_destroy_data_chunk(&chunk);
133
+ return stream_check_error(ctx, error_data);
134
+ }
135
+
136
+ static const char *stream_get_last_error(struct ArrowArrayStream *stream) {
137
+ arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
138
+
139
+ return ctx == NULL ? NULL : ctx->last_error;
140
+ }
141
+
142
+ static void stream_release(struct ArrowArrayStream *stream) {
143
+ arrowArrayStreamContext *ctx;
144
+
145
+ if (stream == NULL || stream->release == NULL) {
146
+ return;
147
+ }
148
+ ctx = (arrowArrayStreamContext *)stream->private_data;
149
+ if (ctx != NULL) {
150
+ rbduckdb_result_unref(ctx->presult_ctx);
151
+ if (ctx->arrow_options != NULL) {
152
+ duckdb_destroy_arrow_options(&(ctx->arrow_options));
153
+ }
154
+ free(ctx->last_error);
155
+ free(ctx);
156
+ }
157
+ stream->private_data = NULL;
158
+ stream->release = NULL;
159
+ }
160
+
161
+ VALUE rbduckdb_create_arrow_array_stream(VALUE oDuckDBResult) {
162
+ VALUE obj;
163
+ rubyDuckDBArrowArrayStream *p;
164
+ rubyDuckDBResult *presult_ctx;
165
+ arrowArrayStreamContext *ctx;
166
+
167
+ obj = allocate(cDuckDBArrowArrayStream);
168
+ TypedData_Get_Struct(obj, rubyDuckDBArrowArrayStream, &arrow_array_stream_data_type, p);
169
+ presult_ctx = rbduckdb_get_struct_result(oDuckDBResult);
170
+
171
+ ctx = calloc((size_t)1, sizeof(arrowArrayStreamContext));
172
+ if (ctx == NULL) {
173
+ rb_raise(rb_eNoMemError, "failed to allocate ArrowArrayStream context");
174
+ }
175
+
176
+ rbduckdb_result_ref(presult_ctx);
177
+ ctx->presult_ctx = presult_ctx;
178
+ ctx->arrow_options = duckdb_result_get_arrow_options(&(presult_ctx->result));
179
+
180
+ p->stream.get_schema = stream_get_schema;
181
+ p->stream.get_next = stream_get_next;
182
+ p->stream.get_last_error = stream_get_last_error;
183
+ p->stream.release = stream_release;
184
+ p->stream.private_data = ctx;
185
+
186
+ return obj;
187
+ }
188
+
189
+ /*
190
+ * call-seq:
191
+ * stream.to_i -> Integer
192
+ *
193
+ * Returns the address of the underlying C <code>struct ArrowArrayStream</code>.
194
+ * Arrow consumers such as red-arrow accept this address directly:
195
+ *
196
+ * reader = Arrow::RecordBatchReader.import(stream.to_i)
197
+ */
198
+ static VALUE arrow_array_stream_to_i(VALUE self) {
199
+ rubyDuckDBArrowArrayStream *p;
200
+
201
+ TypedData_Get_Struct(self, rubyDuckDBArrowArrayStream, &arrow_array_stream_data_type, p);
202
+ return ULL2NUM((unsigned long long)(uintptr_t)&(p->stream));
203
+ }
204
+
205
+ /*
206
+ * call-seq:
207
+ * stream.arrow_c_stream -> self
208
+ *
209
+ * Returns self. Defined so that the stream object itself satisfies the
210
+ * Arrow C stream protocol used by ruby-polars and others.
211
+ */
212
+ static VALUE arrow_array_stream_arrow_c_stream(VALUE self) {
213
+ return self;
214
+ }
215
+
216
+ void rbduckdb_init_arrow_array_stream(void) {
217
+ #if 0
218
+ VALUE mDuckDB = rb_define_module("DuckDB");
219
+ #endif
220
+ cDuckDBArrowArrayStream = rb_define_class_under(mDuckDB, "ArrowArrayStream", rb_cObject);
221
+
222
+ rb_define_alloc_func(cDuckDBArrowArrayStream, allocate);
223
+
224
+ rb_define_method(cDuckDBArrowArrayStream, "to_i", arrow_array_stream_to_i, 0);
225
+ rb_define_method(cDuckDBArrowArrayStream, "arrow_c_stream", arrow_array_stream_arrow_c_stream, 0);
226
+ }
@@ -0,0 +1,61 @@
1
+ #ifndef RUBY_DUCKDB_ARROW_ARRAY_STREAM_H
2
+ #define RUBY_DUCKDB_ARROW_ARRAY_STREAM_H
3
+
4
+ /*
5
+ * Canonical Arrow C Data Interface and Arrow C Stream Interface definitions.
6
+ * https://arrow.apache.org/docs/format/CDataInterface.html
7
+ * duckdb.h only forward-declares these structs.
8
+ */
9
+ #ifndef ARROW_C_DATA_INTERFACE
10
+ #define ARROW_C_DATA_INTERFACE
11
+
12
+ #define ARROW_FLAG_DICTIONARY_ORDERED 1
13
+ #define ARROW_FLAG_NULLABLE 2
14
+ #define ARROW_FLAG_MAP_KEYS_SORTED 4
15
+
16
+ struct ArrowSchema {
17
+ const char *format;
18
+ const char *name;
19
+ const char *metadata;
20
+ int64_t flags;
21
+ int64_t n_children;
22
+ struct ArrowSchema **children;
23
+ struct ArrowSchema *dictionary;
24
+
25
+ void (*release)(struct ArrowSchema *);
26
+ void *private_data;
27
+ };
28
+
29
+ struct ArrowArray {
30
+ int64_t length;
31
+ int64_t null_count;
32
+ int64_t offset;
33
+ int64_t n_buffers;
34
+ int64_t n_children;
35
+ const void **buffers;
36
+ struct ArrowArray **children;
37
+ struct ArrowArray *dictionary;
38
+
39
+ void (*release)(struct ArrowArray *);
40
+ void *private_data;
41
+ };
42
+
43
+ #endif /* ARROW_C_DATA_INTERFACE */
44
+
45
+ #ifndef ARROW_C_STREAM_INTERFACE
46
+ #define ARROW_C_STREAM_INTERFACE
47
+
48
+ struct ArrowArrayStream {
49
+ int (*get_schema)(struct ArrowArrayStream *, struct ArrowSchema *out);
50
+ int (*get_next)(struct ArrowArrayStream *, struct ArrowArray *out);
51
+ const char *(*get_last_error)(struct ArrowArrayStream *);
52
+ void (*release)(struct ArrowArrayStream *);
53
+ void *private_data;
54
+ };
55
+
56
+ #endif /* ARROW_C_STREAM_INTERFACE */
57
+
58
+ void rbduckdb_init_arrow_array_stream(void);
59
+ VALUE rbduckdb_create_arrow_array_stream(VALUE oDuckDBResult);
60
+
61
+ #endif