RubyGems - duckdb - Versions diffs - 1.5.2.1 → 1.5.4.0 - Mend

duckdb 1.5.2.1 → 1.5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +40 -0
data/README.md +52 -0
data/ext/duckdb/aggregate_function.c +1 -1
data/ext/duckdb/aggregate_function_set.c +86 -0
data/ext/duckdb/aggregate_function_set.h +14 -0
data/ext/duckdb/appender.c +62 -4
data/ext/duckdb/arrow_array_stream.c +226 -0
data/ext/duckdb/arrow_array_stream.h +61 -0
data/ext/duckdb/arrow_import.c +165 -0
data/ext/duckdb/arrow_import.h +6 -0
data/ext/duckdb/blob.c +1 -1
data/ext/duckdb/blob.h +1 -2
data/ext/duckdb/config.c +1 -1
data/ext/duckdb/config.h +1 -1
data/ext/duckdb/connection.c +26 -3
data/ext/duckdb/converter.h +1 -0
data/ext/duckdb/conveter.c +39 -9
data/ext/duckdb/data_chunk.c +10 -0
data/ext/duckdb/data_chunk.h +1 -0
data/ext/duckdb/duckdb.c +14 -11
data/ext/duckdb/error.c +1 -1
data/ext/duckdb/error.h +1 -3
data/ext/duckdb/extconf.rb +28 -13
data/ext/duckdb/function_executor.c +308 -2
data/ext/duckdb/function_executor.h +44 -0
data/ext/duckdb/prepared_statement.c +38 -0
data/ext/duckdb/result.c +49 -53
data/ext/duckdb/result.h +11 -0
data/ext/duckdb/ruby-duckdb.h +4 -0
data/ext/duckdb/scalar_function.c +97 -29
data/ext/duckdb/scalar_function.h +2 -4
data/ext/duckdb/scalar_function_bind_info.c +13 -13
data/ext/duckdb/scalar_function_bind_info.h +1 -1
data/ext/duckdb/scalar_function_set.c +9 -9
data/ext/duckdb/scalar_function_set.h +2 -2
data/ext/duckdb/table_description.c +19 -19
data/ext/duckdb/table_description.h +1 -1
data/ext/duckdb/table_function.c +94 -28
data/ext/duckdb/table_function.h +2 -2
data/ext/duckdb/table_function_bind_info.c +20 -20
data/ext/duckdb/table_function_bind_info.h +2 -2
data/ext/duckdb/table_function_function_info.c +5 -5
data/ext/duckdb/table_function_function_info.h +2 -2
data/ext/duckdb/table_function_init_info.c +70 -5
data/ext/duckdb/table_function_init_info.h +2 -2
data/lib/duckdb/aggregate_function.rb +7 -1
data/lib/duckdb/aggregate_function_set.rb +29 -0
data/lib/duckdb/appender.rb +97 -0
data/lib/duckdb/arrow_array_stream.rb +33 -0
data/lib/duckdb/connection.rb +139 -9
data/lib/duckdb/prepared_statement.rb +35 -0
data/lib/duckdb/result.rb +39 -2
data/lib/duckdb/scalar_function.rb +9 -4
data/lib/duckdb/scalar_function_set.rb +0 -1
data/lib/duckdb/table_description.rb +7 -0
data/lib/duckdb/table_name_parser.rb +58 -0
data/lib/duckdb/version.rb +1 -1
data/lib/duckdb.rb +3 -0
metadata +11 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4c7c57e0282ec2213b1efb3844445804e5fbbc09e03496f2ba71c1bd24152843
-  data.tar.gz: 9057fa45fafeba0dc2110562f98cd49eecf53ec0f0a48a989756742525c82b4a
+  metadata.gz: c9c37c59f2e64ac10b72765250ec674ef40e82d83a7499a6c9e1117ee8e9e9d0
+  data.tar.gz: e9510873a0de444b35bb7c3de2d26f8d9a9ebcd01466a95383fa16d38d277e97
 SHA512:
-  metadata.gz: 78bbd166f587491085267791ffca949f2b310597f802424b7e0525c8c4f35f345c2362b17b33401bee374601d5f52746d10ac21f60ea8344bd3bf08e28d7f4b5
-  data.tar.gz: 6fc88b5e289ef3451691f57cb404c390b9deabbafddf81e8aedd300121276a9df62b0492559f2602eb4aeaa57ed2a83cc98ccfbef3a728771c34a565a022f78a
+  metadata.gz: 2ab204dc5d6034b846d3267ca78fd4d47f75f000e3d6c97e5673ca9791618c84e7130230cd1ad7634858ff25fd5cc5952bac500263ef228225767a730684749c
+  data.tar.gz: f55e20a3dc6a97aea0606dca4b047746da44941c4f84145a48c45248d150fd58ef800ca617adc7921ae6418ac0f8275bf29e24405f3cdbf2e917377029b8641f

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,46 @@ All notable changes to this project will be documented in this file.
 # Unreleased
+# 1.5.4.0 - 2026-06-20
+- bump up DuckDB 1.5.4 and 1.4.5 on CI.
+- add experimental `DuckDB::Result#arrow_c_stream` returning `DuckDB::ArrowArrayStream` to export a query result as an Arrow C stream (Arrow C Data Interface). The stream can be consumed directly by ruby-polars (`Polars::DataFrame.new(result)`) and red-arrow (`Arrow::RecordBatchReader.import(stream.to_i)`).
+- add experimental `DuckDB::Connection#append_arrow(table, producer)` to import an Arrow producer (any object responding to `#arrow_c_stream`, such as a Polars `DataFrame` or a `DuckDB::Result`) into an existing table, returning the number of rows appended.
+- drop Ruby 3.2.
+- add `DuckDB::TableFunction::InitInfo#max_threads=` (and `#set_max_threads`) to hint DuckDB how many worker threads can execute a custom table function concurrently.
+- add `DuckDB::TableFunction::InitInfo#column_count` to get the number of projected result columns of a custom table function scan.
+- add `DuckDB::TableFunction::InitInfo#column_index` to get the source column index of a given projected result column of a custom table function scan.
+- add `DuckDB::Appender#append_uuid(value)` to append a UUID value to a `UUID` column. `value` must be a String in canonical UUID format (`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`).
+- add `DuckDB::PreparedStatement#bind_uuid(index, value)` to bind a UUID parameter. `value` must be a String in canonical UUID format.
+# 1.5.3.0 - 2026-05-24
+- bump up DuckDB 1.5.3 on CI.
+- add `DuckDB::AggregateFunctionSet` class and `DuckDB::Connection#register_aggregate_function_set` to register multiple overloads of a custom aggregate function under one SQL name.
+- add `DuckDB::Appender#append_default_to_chunk`.
+- add `DuckDB::TableNameParser` module with shared table name parsing logic (quoting and dot-notation), included by `DuckDB::Appender` and `DuckDB::TableDescription`.
+- add `DuckDB::PreparedStatement#bind_timestamp_tz` to bind a `TIMESTAMP WITH TIME ZONE` (TIMESTAMPTZ) parameter from a `Time` or timestamp string.
+- `DuckDB::AggregateFunction#name=` and `DuckDB::ScalarFunction#name=` now accept Symbol arguments (coerced to String).
+## Breaking Changes
+- `DuckDB::ScalarFunction.create`: `name:` is now a required keyword argument (previously optional with `nil` default). Parameter order changed to `name:, return_type:, ...`.
+- `DuckDB::ScalarFunctionSet#add`: no longer overrides the scalar function's name with the set's name. The individual function must have its own name set before being added to the set.
+- `DuckDB::TableDescription.new`: the 2nd argument now parses dot-notation and quoting:
+  - `'schema.table'` is interpreted as schema-qualified (deprecated; use `schema:` keyword instead).
+  - `'"schema.table"'` or `"'schema.table'"` — quotes are stripped and the name is treated as a literal table name containing a dot.
+- `DuckDB::Appender.new`: the 2nd argument now parses dot-notation and quoting (previously only `Connection#appender` did this):
+  - `'schema.table'` is interpreted as schema-qualified (deprecated; use `schema:` keyword instead).
+  - `'"schema.table"'` or `"'schema.table'"` — quotes are stripped and the name is treated as a literal table name containing a dot.
+- `DuckDB::Connection#appender`: table name parsing (dot-notation, quoting) is now delegated to `DuckDB::Appender.new` internally. Behavior is unchanged — `'schema.table'` has always split on dot in `Connection#appender`. No deprecation warning is emitted.
+- `DuckDB::Connection#appender`: table names surrounded by double or single quotes (e.g. `'"a.b"'` or `"'a.b'"`) are treated as literal table names — the quotes are stripped and no dot-splitting is performed.
+- fix: `Connection#appender('a.b')` no longer emits a misleading deprecation warning (the behavior was not deprecated — it was already the correct behavior).
+- add `DuckDB::Appender.new(con, table, schema: nil, catalog: nil)` keyword argument form.
+- add `DuckDB::Connection#appender(table, schema: nil, catalog: nil)` keyword argument form.
+- deprecate `DuckDB::Result#_column_type(i)` private method. use `columns[i].send(:_type)` instead.
+- `DuckDB::Result#enum_dictionary_values` checks invalid column index.
+- deprecate `DuckDB::Result#_enum_dictionary_size` private method.
+- deprecate `DuckDB::Result#_enum_dictionary_value` private method.
+## Deprecations
+- deprecate `DuckDB::Appender.new(con, schema, table)` 3-positional-argument form. Use `DuckDB::Appender.new(con, table, schema: schema)` instead.
+- deprecate passing dot-notation string (e.g. `'schema.table'`) directly to `DuckDB::Appender.new`. Use `DuckDB::Appender.new(con, table, schema: schema)` instead.
 # 1.5.2.1 - 2026-04-24
 - add `DuckDB::AggregateFunction.create`.

data/README.md CHANGED Viewed

@@ -266,6 +266,58 @@ res.first.first # => 4
 Set `DuckDB.default_timezone` to control how TIMESTAMP and TIME values without time zone are converted to Ruby `Time` objects. The default is `:local`, but you can use `:utc` for UTC conversion.
+### Arrow interop (experimental)
+`DuckDB::Result#arrow_c_stream` exports a query result as an Arrow C stream
+([Arrow C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)).
+The returned `DuckDB::ArrowArrayStream` satisfies the Ruby Arrow C stream
+protocol (`#arrow_c_stream` / `#to_i`), so query results can be handed to
+Arrow consumers in columnar form, without converting each row to Ruby objects.
+With [ruby-polars](https://github.com/ankane/ruby-polars), no glue code is needed:
+```ruby
+result = con.query('SELECT * FROM users')
+df = Polars::DataFrame.new(result)
+```
+With [red-arrow](https://github.com/apache/arrow/tree/main/ruby/red-arrow),
+pass the stream address to `Arrow::RecordBatchReader.import`:
+```ruby
+result = con.query('SELECT * FROM users')
+reader = Arrow::RecordBatchReader.import(result.arrow_c_stream.to_i)
+```
+The consumer takes ownership of the stream's contents, so a result can be
+exported only once; exporting the same result again raises `DuckDB::Error`.
+In the other direction, `DuckDB::Connection#append_arrow` imports an Arrow
+producer into an existing table. Any object responding to `#arrow_c_stream`
+works as the producer — for example a Polars `DataFrame`, or another
+`DuckDB::Result`:
+```ruby
+con.query('CREATE TABLE users (id BIGINT, name VARCHAR)')
+rows = con.append_arrow('users', polars_df) # => number of rows appended
+con.query('SELECT * FROM users').to_a
+```
+The producer's columns must line up with the table's columns by count and
+position. DuckDB casts compatible column types (e.g. INTEGER into a BIGINT
+column); a type that cannot be cast raises `DuckDB::Error`. `append_arrow` is
+not transactional — wrap it in your own transaction if you need all-or-nothing.
+These features are **experimental**: they are built on DuckDB's unstable Arrow
+C API and may change in any minor release.
+Note: [red-arrow-format](https://github.com/apache/arrow/tree/main/ruby/red-arrow-format)
+(the pure-Ruby Arrow implementation) supports only the Arrow IPC
+serialization format, not the C Data Interface. To exchange data with it,
+write/read Arrow IPC files through DuckDB's
+[arrow community extension](https://duckdb.org/community_extensions/extensions/arrow.html)
+(`COPY ... TO 'data.arrows'` / `read_arrow(...)`).
 ## Versioning and DuckDB support
 The first three digits of the gem version track the DuckDB release that the

data/ext/duckdb/aggregate_function.c CHANGED Viewed

@@ -736,7 +736,7 @@ void rbduckdb_init_aggregate_function(void) {
     cDuckDBAggregateFunction = rb_define_class_under(mDuckDB, "AggregateFunction", rb_cObject);
     rb_define_alloc_func(cDuckDBAggregateFunction, allocate);
     rb_define_method(cDuckDBAggregateFunction, "initialize", aggregate_function_initialize, 0);
-    rb_define_method(cDuckDBAggregateFunction, "name=", aggregate_function_set_name, 1);
+    rb_define_method(cDuckDBAggregateFunction, "set_name", aggregate_function_set_name, 1);
     rb_define_private_method(cDuckDBAggregateFunction, "_set_return_type", aggregate_function__set_return_type, 1);
     rb_define_private_method(cDuckDBAggregateFunction, "_add_parameter", aggregate_function__add_parameter, 1);
     rb_define_private_method(cDuckDBAggregateFunction, "_set_init", aggregate_function__set_init, 0);

data/ext/duckdb/aggregate_function_set.c ADDED Viewed

@@ -0,0 +1,86 @@
+#include "ruby-duckdb.h"
+VALUE cDuckDBAggregateFunctionSet;
+static void mark(void *);
+static void deallocate(void *);
+static VALUE allocate(VALUE klass);
+static size_t memsize(const void *p);
+static void compact(void *);
+static VALUE aggregate_function_set__initialize(VALUE self, VALUE name);
+static VALUE aggregate_function_set__add(VALUE self, VALUE aggregate_function);
+static const rb_data_type_t aggregate_function_set_data_type = {
+    "DuckDB/AggregateFunctionSet",
+    {mark, deallocate, memsize, compact},
+    0, 0, RUBY_TYPED_FREE_IMMEDIATELY
+};
+static void mark(void *ctx) {
+    rubyDuckDBAggregateFunctionSet *p = (rubyDuckDBAggregateFunctionSet *)ctx;
+    rb_gc_mark(p->functions);
+}
+static void deallocate(void *ctx) {
+    rubyDuckDBAggregateFunctionSet *p = (rubyDuckDBAggregateFunctionSet *)ctx;
+    duckdb_destroy_aggregate_function_set(&(p->aggregate_function_set));
+    xfree(p);
+}
+static void compact(void *ctx) {
+    rubyDuckDBAggregateFunctionSet *p = (rubyDuckDBAggregateFunctionSet *)ctx;
+    p->functions = rb_gc_location(p->functions);
+}
+static VALUE allocate(VALUE klass) {
+    rubyDuckDBAggregateFunctionSet *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBAggregateFunctionSet));
+    VALUE obj = TypedData_Wrap_Struct(klass, &aggregate_function_set_data_type, ctx);
+    ctx->functions = rb_ary_new();
+    RB_GC_GUARD(ctx->functions);
+    return obj;
+}
+static size_t memsize(const void *p) {
+    return sizeof(rubyDuckDBAggregateFunctionSet);
+}
+rubyDuckDBAggregateFunctionSet *rbduckdb_get_struct_aggregate_function_set(VALUE obj) {
+    rubyDuckDBAggregateFunctionSet *ctx;
+    TypedData_Get_Struct(obj, rubyDuckDBAggregateFunctionSet, &aggregate_function_set_data_type, ctx);
+    return ctx;
+}
+/* :nodoc: */
+static VALUE aggregate_function_set__initialize(VALUE self, VALUE name) {
+    rubyDuckDBAggregateFunctionSet *p;
+    TypedData_Get_Struct(self, rubyDuckDBAggregateFunctionSet, &aggregate_function_set_data_type, p);
+    p->aggregate_function_set = duckdb_create_aggregate_function_set(StringValueCStr(name));
+    return self;
+}
+/* :nodoc: */
+static VALUE aggregate_function_set__add(VALUE self, VALUE aggregate_function) {
+    rubyDuckDBAggregateFunctionSet *p;
+    rubyDuckDBAggregateFunction *af;
+    TypedData_Get_Struct(self, rubyDuckDBAggregateFunctionSet, &aggregate_function_set_data_type, p);
+    af = rbduckdb_get_struct_aggregate_function(aggregate_function);
+    if (duckdb_add_aggregate_function_to_set(p->aggregate_function_set, af->aggregate_function) == DuckDBError) {
+        rb_raise(eDuckDBError, "failed to add aggregate function to set (duplicate overload?)");
+    }
+    rb_ary_push(p->functions, aggregate_function);
+    return self;
+}
+void rbduckdb_init_aggregate_function_set(void) {
+#if 0
+    VALUE mDuckDB = rb_define_module("DuckDB");
+#endif
+    cDuckDBAggregateFunctionSet = rb_define_class_under(mDuckDB, "AggregateFunctionSet", rb_cObject);
+    rb_define_alloc_func(cDuckDBAggregateFunctionSet, allocate);
+    rb_define_private_method(cDuckDBAggregateFunctionSet, "_initialize", aggregate_function_set__initialize, 1);
+    rb_define_private_method(cDuckDBAggregateFunctionSet, "_add", aggregate_function_set__add, 1);
+}

data/ext/duckdb/aggregate_function_set.h ADDED Viewed

@@ -0,0 +1,14 @@
+#ifndef RUBY_DUCKDB_AGGREGATE_FUNCTION_SET_H
+#define RUBY_DUCKDB_AGGREGATE_FUNCTION_SET_H
+struct _rubyDuckDBAggregateFunctionSet {
+    duckdb_aggregate_function_set aggregate_function_set;
+    VALUE functions; /* Ruby Array of AggregateFunction objects — prevents GC collection */
+};
+typedef struct _rubyDuckDBAggregateFunctionSet rubyDuckDBAggregateFunctionSet;
+void rbduckdb_init_aggregate_function_set(void);
+rubyDuckDBAggregateFunctionSet *rbduckdb_get_struct_aggregate_function_set(VALUE obj);
+#endif

data/ext/duckdb/appender.c CHANGED Viewed

@@ -9,7 +9,8 @@ static size_t memsize(const void *p);
 static VALUE appender_s_create_query(VALUE klass, VALUE con, VALUE query, VALUE types, VALUE table, VALUE columns);
-static VALUE appender_initialize(VALUE klass, VALUE con, VALUE schema, VALUE table);
+static VALUE appender__initialize(VALUE self, VALUE con, VALUE schema, VALUE table);
+static VALUE appender__initialize_ext(VALUE self, VALUE con, VALUE catalog, VALUE schema, VALUE table);
 static VALUE appender_error_message(VALUE self);
 static VALUE appender__append_bool(VALUE self, VALUE val);
 static VALUE appender__append_int8(VALUE self, VALUE val);
@@ -34,8 +35,10 @@ static VALUE appender__append_time(VALUE self, VALUE hour, VALUE min, VALUE sec,
 static VALUE appender__append_timestamp(VALUE self, VALUE year, VALUE month, VALUE day, VALUE hour, VALUE min, VALUE sec, VALUE micros);
 static VALUE appender__append_hugeint(VALUE self, VALUE lower, VALUE upper);
 static VALUE appender__append_uhugeint(VALUE self, VALUE lower, VALUE upper);
+static VALUE appender__append_uuid(VALUE self, VALUE val);
 static VALUE appender__append_value(VALUE self, VALUE val);
 static VALUE appender__append_data_chunk(VALUE self, VALUE chunk);
+static VALUE appender__append_default_to_chunk(VALUE self, VALUE chunk, VALUE col, VALUE row);
 static VALUE appender__flush(VALUE self);
 #ifdef HAVE_DUCKDB_H_GE_V1_5_0
@@ -130,14 +133,14 @@ static VALUE appender_s_create_query(VALUE klass, VALUE con, VALUE query, VALUE
     return appender;
 }
-static VALUE appender_initialize(VALUE self, VALUE con, VALUE schema, VALUE table) {
+static VALUE appender__initialize(VALUE self, VALUE con, VALUE schema, VALUE table) {
     rubyDuckDBConnection *ctxcon;
     rubyDuckDBAppender *ctx;
     char *pschema = 0;
     if (!rb_obj_is_kind_of(con, cDuckDBConnection)) {
-        rb_raise(rb_eTypeError, "1st argument should be instance of DackDB::Connection");
+        rb_raise(rb_eTypeError, "1st argument should be instance of DuckDB::Connection");
     }
     TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
@@ -153,6 +156,32 @@ static VALUE appender_initialize(VALUE self, VALUE con, VALUE schema, VALUE tabl
     return self;
 }
+static VALUE appender__initialize_ext(VALUE self, VALUE con, VALUE catalog, VALUE schema, VALUE table) {
+    rubyDuckDBConnection *ctxcon;
+    rubyDuckDBAppender *ctx;
+    char *pcatalog = 0;
+    char *pschema = 0;
+    if (!rb_obj_is_kind_of(con, cDuckDBConnection)) {
+        rb_raise(rb_eTypeError, "1st argument should be instance of DuckDB::Connection");
+    }
+    TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
+    ctxcon = rbduckdb_get_struct_connection(con);
+    if (catalog != Qnil) {
+        pcatalog = StringValuePtr(catalog);
+    }
+    if (schema != Qnil) {
+        pschema = StringValuePtr(schema);
+    }
+    if (duckdb_appender_create_ext(ctxcon->con, pcatalog, pschema, StringValuePtr(table), &(ctx->appender)) == DuckDBError) {
+        rb_raise(eDuckDBError, "failed to create appender");
+    }
+    return self;
+}
 /* call-seq:
  *   appender.error_message -> String
  *
@@ -425,6 +454,21 @@ static VALUE appender__append_uhugeint(VALUE self, VALUE lower, VALUE upper) {
     return state_to_rbool(duckdb_append_uhugeint(ctx->appender, uhugeint));
 }
+/* :nodoc: */
+static VALUE appender__append_uuid(VALUE self, VALUE val) {
+    rubyDuckDBAppender *ctx;
+    duckdb_uhugeint uhugeint;
+    duckdb_value uuid_val;
+    duckdb_state state;
+    TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
+    rbduckdb_uuid_str_to_uhugeint(val, &uhugeint);
+    uuid_val = duckdb_create_uuid(uhugeint);
+    state = duckdb_append_value(ctx->appender, uuid_val);
+    duckdb_destroy_value(&uuid_val);
+    return state_to_rbool(state);
+}
 /* :nodoc: */
 static VALUE appender__append_value(VALUE self, VALUE val) {
     rubyDuckDBAppender *ctx;
@@ -447,6 +491,17 @@ static VALUE appender__append_data_chunk(VALUE self, VALUE chunk) {
     return state_to_rbool(duckdb_append_data_chunk(ctx->appender, chunk_ctx->data_chunk));
 }
+/* :nodoc: */
+static VALUE appender__append_default_to_chunk(VALUE self, VALUE chunk, VALUE col, VALUE row) {
+    rubyDuckDBAppender *ctx;
+    rubyDuckDBDataChunk *chunk_ctx;
+    TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
+    chunk_ctx = rbduckdb_get_struct_data_chunk(chunk);
+    return state_to_rbool(duckdb_append_default_to_chunk(ctx->appender, chunk_ctx->data_chunk, NUM2ULL(col), NUM2ULL(row)));
+}
 /* :nodoc: */
 static VALUE appender__flush(VALUE self) {
     rubyDuckDBAppender *ctx;
@@ -504,7 +559,8 @@ void rbduckdb_init_appender(void) {
     cDuckDBAppender = rb_define_class_under(mDuckDB, "Appender", rb_cObject);
     rb_define_alloc_func(cDuckDBAppender, allocate);
     rb_define_singleton_method(cDuckDBAppender, "create_query", appender_s_create_query, 5);
-    rb_define_method(cDuckDBAppender, "initialize", appender_initialize, 3);
+    rb_define_private_method(cDuckDBAppender, "_initialize", appender__initialize, 3);
+    rb_define_private_method(cDuckDBAppender, "_initialize_ext", appender__initialize_ext, 4);
     rb_define_method(cDuckDBAppender, "error_message", appender_error_message, 0);
     rb_define_private_method(cDuckDBAppender, "_end_row", appender__end_row, 0);
     rb_define_private_method(cDuckDBAppender, "_flush", appender__flush, 0);
@@ -538,6 +594,8 @@ void rbduckdb_init_appender(void) {
     rb_define_private_method(cDuckDBAppender, "_append_timestamp", appender__append_timestamp, 7);
     rb_define_private_method(cDuckDBAppender, "_append_hugeint", appender__append_hugeint, 2);
     rb_define_private_method(cDuckDBAppender, "_append_uhugeint", appender__append_uhugeint, 2);
+    rb_define_private_method(cDuckDBAppender, "_append_uuid", appender__append_uuid, 1);
     rb_define_private_method(cDuckDBAppender, "_append_value", appender__append_value, 1);
     rb_define_private_method(cDuckDBAppender, "_append_data_chunk", appender__append_data_chunk, 1);
+    rb_define_private_method(cDuckDBAppender, "_append_default_to_chunk", appender__append_default_to_chunk, 3);
 }

data/ext/duckdb/arrow_array_stream.c ADDED Viewed

@@ -0,0 +1,226 @@
+#include "ruby-duckdb.h"
+#include <errno.h>
+static VALUE cDuckDBArrowArrayStream;
+typedef struct {
+    struct ArrowArrayStream stream;
+} rubyDuckDBArrowArrayStream;
+/*
+ * Heap-allocated context referenced by stream.private_data. Consumers may
+ * move the stream struct contents out and keep using the callbacks after
+ * the Ruby DuckDB::ArrowArrayStream object is gone, so this context is
+ * freed only by the stream release callback, and it holds a reference on
+ * the result struct (rbduckdb_result_ref) until then. The release callback
+ * must not call any Ruby API: it can run during GC sweep (via deallocate
+ * of an unconsumed stream) or from a non-Ruby thread.
+ */
+typedef struct {
+    rubyDuckDBResult *presult_ctx;
+    duckdb_arrow_options arrow_options;
+    char *last_error;
+} arrowArrayStreamContext;
+static void deallocate(void *ctx);
+static VALUE allocate(VALUE klass);
+static size_t memsize(const void *p);
+static VALUE arrow_array_stream_to_i(VALUE self);
+static VALUE arrow_array_stream_arrow_c_stream(VALUE self);
+static void stream_set_error(arrowArrayStreamContext *ctx, const char *msg);
+static int stream_check_error(arrowArrayStreamContext *ctx, duckdb_error_data error_data);
+static int stream_get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out);
+static int stream_get_next(struct ArrowArrayStream *stream, struct ArrowArray *out);
+static const char *stream_get_last_error(struct ArrowArrayStream *stream);
+static void stream_release(struct ArrowArrayStream *stream);
+static const rb_data_type_t arrow_array_stream_data_type = {
+    "DuckDB/ArrowArrayStream",
+    {NULL, deallocate, memsize,},
+    0, 0, RUBY_TYPED_FREE_IMMEDIATELY
+};
+static void deallocate(void *ctx) {
+    rubyDuckDBArrowArrayStream *p = (rubyDuckDBArrowArrayStream *)ctx;
+    if (p->stream.release != NULL) {
+        p->stream.release(&(p->stream));
+    }
+    xfree(p);
+}
+static VALUE allocate(VALUE klass) {
+    rubyDuckDBArrowArrayStream *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBArrowArrayStream));
+    return TypedData_Wrap_Struct(klass, &arrow_array_stream_data_type, ctx);
+}
+static size_t memsize(const void *p) {
+    return sizeof(rubyDuckDBArrowArrayStream);
+}
+/* Context memory is managed with plain malloc/free because the release
+ * callback may run outside Ruby's memory bookkeeping. */
+static void stream_set_error(arrowArrayStreamContext *ctx, const char *msg) {
+    size_t len = strlen(msg) + 1;
+    free(ctx->last_error);
+    ctx->last_error = malloc(len);
+    if (ctx->last_error != NULL) {
+        memcpy(ctx->last_error, msg, len);
+    }
+}
+static int stream_check_error(arrowArrayStreamContext *ctx, duckdb_error_data error_data) {
+    if (error_data == NULL) {
+        return 0;
+    }
+    if (!duckdb_error_data_has_error(error_data)) {
+        duckdb_destroy_error_data(&error_data);
+        return 0;
+    }
+    stream_set_error(ctx, duckdb_error_data_message(error_data));
+    duckdb_destroy_error_data(&error_data);
+    return EIO;
+}
+static int stream_get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out) {
+    arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
+    duckdb_error_data error_data;
+    duckdb_logical_type *types;
+    const char **names;
+    idx_t column_count;
+    idx_t i;
+    column_count = duckdb_column_count(&(ctx->presult_ctx->result));
+    types = calloc((size_t)column_count, sizeof(duckdb_logical_type));
+    names = calloc((size_t)column_count, sizeof(const char *));
+    if (column_count > 0 && (types == NULL || names == NULL)) {
+        free(types);
+        free(names);
+        stream_set_error(ctx, "failed to allocate memory for Arrow schema conversion");
+        return ENOMEM;
+    }
+    for (i = 0; i < column_count; i++) {
+        types[i] = duckdb_column_logical_type(&(ctx->presult_ctx->result), i);
+        names[i] = duckdb_column_name(&(ctx->presult_ctx->result), i);
+    }
+    error_data = duckdb_to_arrow_schema(ctx->arrow_options, types, names, column_count, out);
+    for (i = 0; i < column_count; i++) {
+        duckdb_destroy_logical_type(&types[i]);
+    }
+    free(types);
+    free(names);
+    return stream_check_error(ctx, error_data);
+}
+static int stream_get_next(struct ArrowArrayStream *stream, struct ArrowArray *out) {
+    arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
+    duckdb_data_chunk chunk;
+    duckdb_error_data error_data;
+    chunk = duckdb_fetch_chunk(ctx->presult_ctx->result);
+    if (chunk == NULL) {
+        /* End of stream: a released (release == NULL) array. */
+        memset(out, 0, sizeof(struct ArrowArray));
+        return 0;
+    }
+    /* duckdb_data_chunk_to_arrow copies the chunk into Arrow-owned buffers,
+     * so the chunk can be destroyed right after conversion. */
+    error_data = duckdb_data_chunk_to_arrow(ctx->arrow_options, chunk, out);
+    duckdb_destroy_data_chunk(&chunk);
+    return stream_check_error(ctx, error_data);
+}
+static const char *stream_get_last_error(struct ArrowArrayStream *stream) {
+    arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
+    return ctx == NULL ? NULL : ctx->last_error;
+}
+static void stream_release(struct ArrowArrayStream *stream) {
+    arrowArrayStreamContext *ctx;
+    if (stream == NULL || stream->release == NULL) {
+        return;
+    }
+    ctx = (arrowArrayStreamContext *)stream->private_data;
+    if (ctx != NULL) {
+        rbduckdb_result_unref(ctx->presult_ctx);
+        if (ctx->arrow_options != NULL) {
+            duckdb_destroy_arrow_options(&(ctx->arrow_options));
+        }
+        free(ctx->last_error);
+        free(ctx);
+    }
+    stream->private_data = NULL;
+    stream->release = NULL;
+}
+VALUE rbduckdb_create_arrow_array_stream(VALUE oDuckDBResult) {
+    VALUE obj;
+    rubyDuckDBArrowArrayStream *p;
+    rubyDuckDBResult *presult_ctx;
+    arrowArrayStreamContext *ctx;
+    obj = allocate(cDuckDBArrowArrayStream);
+    TypedData_Get_Struct(obj, rubyDuckDBArrowArrayStream, &arrow_array_stream_data_type, p);
+    presult_ctx = rbduckdb_get_struct_result(oDuckDBResult);
+    ctx = calloc((size_t)1, sizeof(arrowArrayStreamContext));
+    if (ctx == NULL) {
+        rb_raise(rb_eNoMemError, "failed to allocate ArrowArrayStream context");
+    }
+    rbduckdb_result_ref(presult_ctx);
+    ctx->presult_ctx = presult_ctx;
+    ctx->arrow_options = duckdb_result_get_arrow_options(&(presult_ctx->result));
+    p->stream.get_schema = stream_get_schema;
+    p->stream.get_next = stream_get_next;
+    p->stream.get_last_error = stream_get_last_error;
+    p->stream.release = stream_release;
+    p->stream.private_data = ctx;
+    return obj;
+}
+/*
+ *  call-seq:
+ *    stream.to_i -> Integer
+ *
+ *  Returns the address of the underlying C <code>struct ArrowArrayStream</code>.
+ *  Arrow consumers such as red-arrow accept this address directly:
+ *
+ *    reader = Arrow::RecordBatchReader.import(stream.to_i)
+ */
+static VALUE arrow_array_stream_to_i(VALUE self) {
+    rubyDuckDBArrowArrayStream *p;
+    TypedData_Get_Struct(self, rubyDuckDBArrowArrayStream, &arrow_array_stream_data_type, p);
+    return ULL2NUM((unsigned long long)(uintptr_t)&(p->stream));
+}
+/*
+ *  call-seq:
+ *    stream.arrow_c_stream -> self
+ *
+ *  Returns self. Defined so that the stream object itself satisfies the
+ *  Arrow C stream protocol used by ruby-polars and others.
+ */
+static VALUE arrow_array_stream_arrow_c_stream(VALUE self) {
+    return self;
+}
+void rbduckdb_init_arrow_array_stream(void) {
+#if 0
+    VALUE mDuckDB = rb_define_module("DuckDB");
+#endif
+    cDuckDBArrowArrayStream = rb_define_class_under(mDuckDB, "ArrowArrayStream", rb_cObject);
+    rb_define_alloc_func(cDuckDBArrowArrayStream, allocate);
+    rb_define_method(cDuckDBArrowArrayStream, "to_i", arrow_array_stream_to_i, 0);
+    rb_define_method(cDuckDBArrowArrayStream, "arrow_c_stream", arrow_array_stream_arrow_c_stream, 0);
+}

data/ext/duckdb/arrow_array_stream.h ADDED Viewed

@@ -0,0 +1,61 @@
+#ifndef RUBY_DUCKDB_ARROW_ARRAY_STREAM_H
+#define RUBY_DUCKDB_ARROW_ARRAY_STREAM_H
+/*
+ * Canonical Arrow C Data Interface and Arrow C Stream Interface definitions.
+ * https://arrow.apache.org/docs/format/CDataInterface.html
+ * duckdb.h only forward-declares these structs.
+ */
+#ifndef ARROW_C_DATA_INTERFACE
+#define ARROW_C_DATA_INTERFACE
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+struct ArrowSchema {
+    const char *format;
+    const char *name;
+    const char *metadata;
+    int64_t flags;
+    int64_t n_children;
+    struct ArrowSchema **children;
+    struct ArrowSchema *dictionary;
+    void (*release)(struct ArrowSchema *);
+    void *private_data;
+};
+struct ArrowArray {
+    int64_t length;
+    int64_t null_count;
+    int64_t offset;
+    int64_t n_buffers;
+    int64_t n_children;
+    const void **buffers;
+    struct ArrowArray **children;
+    struct ArrowArray *dictionary;
+    void (*release)(struct ArrowArray *);
+    void *private_data;
+};
+#endif /* ARROW_C_DATA_INTERFACE */
+#ifndef ARROW_C_STREAM_INTERFACE
+#define ARROW_C_STREAM_INTERFACE
+struct ArrowArrayStream {
+    int (*get_schema)(struct ArrowArrayStream *, struct ArrowSchema *out);
+    int (*get_next)(struct ArrowArrayStream *, struct ArrowArray *out);
+    const char *(*get_last_error)(struct ArrowArrayStream *);
+    void (*release)(struct ArrowArrayStream *);
+    void *private_data;
+};
+#endif /* ARROW_C_STREAM_INTERFACE */
+void rbduckdb_init_arrow_array_stream(void);
+VALUE rbduckdb_create_arrow_array_stream(VALUE oDuckDBResult);
+#endif