duckdb 1.5.3.0 → 1.5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -0
  3. data/README.md +52 -0
  4. data/ext/duckdb/aggregate_function.c +0 -1
  5. data/ext/duckdb/appender.c +17 -0
  6. data/ext/duckdb/arrow_array_stream.c +226 -0
  7. data/ext/duckdb/arrow_array_stream.h +61 -0
  8. data/ext/duckdb/arrow_import.c +165 -0
  9. data/ext/duckdb/arrow_import.h +6 -0
  10. data/ext/duckdb/blob.c +1 -1
  11. data/ext/duckdb/blob.h +1 -2
  12. data/ext/duckdb/config.c +1 -1
  13. data/ext/duckdb/config.h +1 -1
  14. data/ext/duckdb/connection.c +3 -3
  15. data/ext/duckdb/converter.h +1 -0
  16. data/ext/duckdb/conveter.c +39 -9
  17. data/ext/duckdb/data_chunk.c +10 -0
  18. data/ext/duckdb/data_chunk.h +1 -0
  19. data/ext/duckdb/duckdb.c +13 -11
  20. data/ext/duckdb/error.c +1 -1
  21. data/ext/duckdb/error.h +1 -3
  22. data/ext/duckdb/function_executor.c +308 -2
  23. data/ext/duckdb/function_executor.h +44 -0
  24. data/ext/duckdb/prepared_statement.c +21 -0
  25. data/ext/duckdb/result.c +49 -3
  26. data/ext/duckdb/result.h +11 -0
  27. data/ext/duckdb/ruby-duckdb.h +3 -0
  28. data/ext/duckdb/scalar_function.c +97 -29
  29. data/ext/duckdb/scalar_function.h +2 -4
  30. data/ext/duckdb/scalar_function_bind_info.c +13 -13
  31. data/ext/duckdb/scalar_function_bind_info.h +1 -1
  32. data/ext/duckdb/scalar_function_set.c +9 -9
  33. data/ext/duckdb/scalar_function_set.h +2 -2
  34. data/ext/duckdb/table_description.c +19 -19
  35. data/ext/duckdb/table_description.h +1 -1
  36. data/ext/duckdb/table_function.c +94 -28
  37. data/ext/duckdb/table_function.h +2 -2
  38. data/ext/duckdb/table_function_bind_info.c +20 -20
  39. data/ext/duckdb/table_function_bind_info.h +2 -2
  40. data/ext/duckdb/table_function_function_info.c +5 -5
  41. data/ext/duckdb/table_function_function_info.h +2 -2
  42. data/ext/duckdb/table_function_init_info.c +70 -5
  43. data/ext/duckdb/table_function_init_info.h +2 -2
  44. data/lib/duckdb/appender.rb +23 -0
  45. data/lib/duckdb/arrow_array_stream.rb +33 -0
  46. data/lib/duckdb/connection.rb +54 -0
  47. data/lib/duckdb/prepared_statement.rb +17 -0
  48. data/lib/duckdb/version.rb +1 -1
  49. data/lib/duckdb.rb +1 -0
  50. metadata +6 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4226fa5e46b5d23fec1fd817577faa2329dced7fa67cedacaa75140849c2de16
4
- data.tar.gz: 931c46acf582c8d01057c7a5b81f8356d430d1ed44cfad85ff79fd16a1935664
3
+ metadata.gz: c9c37c59f2e64ac10b72765250ec674ef40e82d83a7499a6c9e1117ee8e9e9d0
4
+ data.tar.gz: e9510873a0de444b35bb7c3de2d26f8d9a9ebcd01466a95383fa16d38d277e97
5
5
  SHA512:
6
- metadata.gz: e41cee0179f52dab024e361a69725468a7f088a6e5314df76b1d4d5d2b216b687f075c6873a4d294026e29466e99223b79653c6391a8ddf44a574dc0ec1e633e
7
- data.tar.gz: 49c1f2218424af2b13009fff358de4ace4050385323d5a6e06dee4c3b301fe8ee4855abbcbd944593b1ada48ec59eabd6af1e963aed3e70085da7f74b9257fd6
6
+ metadata.gz: 2ab204dc5d6034b846d3267ca78fd4d47f75f000e3d6c97e5673ca9791618c84e7130230cd1ad7634858ff25fd5cc5952bac500263ef228225767a730684749c
7
+ data.tar.gz: f55e20a3dc6a97aea0606dca4b047746da44941c4f84145a48c45248d150fd58ef800ca617adc7921ae6418ac0f8275bf29e24405f3cdbf2e917377029b8641f
data/CHANGELOG.md CHANGED
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  # Unreleased
6
6
 
7
+ # 1.5.4.0 - 2026-06-20
8
+ - bump up DuckDB 1.5.4 and 1.4.5 on CI.
9
+ - add experimental `DuckDB::Result#arrow_c_stream` returning `DuckDB::ArrowArrayStream` to export a query result as an Arrow C stream (Arrow C Data Interface). The stream can be consumed directly by ruby-polars (`Polars::DataFrame.new(result)`) and red-arrow (`Arrow::RecordBatchReader.import(stream.to_i)`).
10
+ - add experimental `DuckDB::Connection#append_arrow(table, producer)` to import an Arrow producer (any object responding to `#arrow_c_stream`, such as a Polars `DataFrame` or a `DuckDB::Result`) into an existing table, returning the number of rows appended.
11
+ - drop Ruby 3.2.
12
+ - add `DuckDB::TableFunction::InitInfo#max_threads=` (and `#set_max_threads`) to hint DuckDB how many worker threads can execute a custom table function concurrently.
13
+ - add `DuckDB::TableFunction::InitInfo#column_count` to get the number of projected result columns of a custom table function scan.
14
+ - add `DuckDB::TableFunction::InitInfo#column_index` to get the source column index of a given projected result column of a custom table function scan.
15
+ - add `DuckDB::Appender#append_uuid(value)` to append a UUID value to a `UUID` column. `value` must be a String in canonical UUID format (`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`).
16
+ - add `DuckDB::PreparedStatement#bind_uuid(index, value)` to bind a UUID parameter. `value` must be a String in canonical UUID format.
17
+
7
18
  # 1.5.3.0 - 2026-05-24
8
19
  - bump up DuckDB 1.5.3 on CI.
9
20
  - add `DuckDB::AggregateFunctionSet` class and `DuckDB::Connection#register_aggregate_function_set` to register multiple overloads of a custom aggregate function under one SQL name.
data/README.md CHANGED
@@ -266,6 +266,58 @@ res.first.first # => 4
266
266
 
267
267
  Set `DuckDB.default_timezone` to control how TIMESTAMP and TIME values without time zone are converted to Ruby `Time` objects. The default is `:local`, but you can use `:utc` for UTC conversion.
268
268
 
269
+ ### Arrow interop (experimental)
270
+
271
+ `DuckDB::Result#arrow_c_stream` exports a query result as an Arrow C stream
272
+ ([Arrow C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)).
273
+ The returned `DuckDB::ArrowArrayStream` satisfies the Ruby Arrow C stream
274
+ protocol (`#arrow_c_stream` / `#to_i`), so query results can be handed to
275
+ Arrow consumers in columnar form, without converting each row to Ruby objects.
276
+
277
+ With [ruby-polars](https://github.com/ankane/ruby-polars), no glue code is needed:
278
+
279
+ ```ruby
280
+ result = con.query('SELECT * FROM users')
281
+ df = Polars::DataFrame.new(result)
282
+ ```
283
+
284
+ With [red-arrow](https://github.com/apache/arrow/tree/main/ruby/red-arrow),
285
+ pass the stream address to `Arrow::RecordBatchReader.import`:
286
+
287
+ ```ruby
288
+ result = con.query('SELECT * FROM users')
289
+ reader = Arrow::RecordBatchReader.import(result.arrow_c_stream.to_i)
290
+ ```
291
+
292
+ The consumer takes ownership of the stream's contents, so a result can be
293
+ exported only once; exporting the same result again raises `DuckDB::Error`.
294
+
295
+ In the other direction, `DuckDB::Connection#append_arrow` imports an Arrow
296
+ producer into an existing table. Any object responding to `#arrow_c_stream`
297
+ works as the producer — for example a Polars `DataFrame`, or another
298
+ `DuckDB::Result`:
299
+
300
+ ```ruby
301
+ con.query('CREATE TABLE users (id BIGINT, name VARCHAR)')
302
+ rows = con.append_arrow('users', polars_df) # => number of rows appended
303
+ con.query('SELECT * FROM users').to_a
304
+ ```
305
+
306
+ The producer's columns must line up with the table's columns by count and
307
+ position. DuckDB casts compatible column types (e.g. INTEGER into a BIGINT
308
+ column); a type that cannot be cast raises `DuckDB::Error`. `append_arrow` is
309
+ not transactional — wrap it in your own transaction if you need all-or-nothing.
310
+
311
+ These features are **experimental**: they are built on DuckDB's unstable Arrow
312
+ C API and may change in any minor release.
313
+
314
+ Note: [red-arrow-format](https://github.com/apache/arrow/tree/main/ruby/red-arrow-format)
315
+ (the pure-Ruby Arrow implementation) supports only the Arrow IPC
316
+ serialization format, not the C Data Interface. To exchange data with it,
317
+ write/read Arrow IPC files through DuckDB's
318
+ [arrow community extension](https://duckdb.org/community_extensions/extensions/arrow.html)
319
+ (`COPY ... TO 'data.arrows'` / `read_arrow(...)`).
320
+
269
321
  ## Versioning and DuckDB support
270
322
 
271
323
  The first three digits of the gem version track the DuckDB release that the
@@ -737,7 +737,6 @@ void rbduckdb_init_aggregate_function(void) {
737
737
  rb_define_alloc_func(cDuckDBAggregateFunction, allocate);
738
738
  rb_define_method(cDuckDBAggregateFunction, "initialize", aggregate_function_initialize, 0);
739
739
  rb_define_method(cDuckDBAggregateFunction, "set_name", aggregate_function_set_name, 1);
740
- rb_define_method(cDuckDBAggregateFunction, "name=", aggregate_function_set_name, 1);
741
740
  rb_define_private_method(cDuckDBAggregateFunction, "_set_return_type", aggregate_function__set_return_type, 1);
742
741
  rb_define_private_method(cDuckDBAggregateFunction, "_add_parameter", aggregate_function__add_parameter, 1);
743
742
  rb_define_private_method(cDuckDBAggregateFunction, "_set_init", aggregate_function__set_init, 0);
@@ -35,6 +35,7 @@ static VALUE appender__append_time(VALUE self, VALUE hour, VALUE min, VALUE sec,
35
35
  static VALUE appender__append_timestamp(VALUE self, VALUE year, VALUE month, VALUE day, VALUE hour, VALUE min, VALUE sec, VALUE micros);
36
36
  static VALUE appender__append_hugeint(VALUE self, VALUE lower, VALUE upper);
37
37
  static VALUE appender__append_uhugeint(VALUE self, VALUE lower, VALUE upper);
38
+ static VALUE appender__append_uuid(VALUE self, VALUE val);
38
39
  static VALUE appender__append_value(VALUE self, VALUE val);
39
40
  static VALUE appender__append_data_chunk(VALUE self, VALUE chunk);
40
41
  static VALUE appender__append_default_to_chunk(VALUE self, VALUE chunk, VALUE col, VALUE row);
@@ -453,6 +454,21 @@ static VALUE appender__append_uhugeint(VALUE self, VALUE lower, VALUE upper) {
453
454
  return state_to_rbool(duckdb_append_uhugeint(ctx->appender, uhugeint));
454
455
  }
455
456
 
457
+ /* :nodoc: */
458
+ static VALUE appender__append_uuid(VALUE self, VALUE val) {
459
+ rubyDuckDBAppender *ctx;
460
+ duckdb_uhugeint uhugeint;
461
+ duckdb_value uuid_val;
462
+ duckdb_state state;
463
+
464
+ TypedData_Get_Struct(self, rubyDuckDBAppender, &appender_data_type, ctx);
465
+ rbduckdb_uuid_str_to_uhugeint(val, &uhugeint);
466
+ uuid_val = duckdb_create_uuid(uhugeint);
467
+ state = duckdb_append_value(ctx->appender, uuid_val);
468
+ duckdb_destroy_value(&uuid_val);
469
+ return state_to_rbool(state);
470
+ }
471
+
456
472
  /* :nodoc: */
457
473
  static VALUE appender__append_value(VALUE self, VALUE val) {
458
474
  rubyDuckDBAppender *ctx;
@@ -578,6 +594,7 @@ void rbduckdb_init_appender(void) {
578
594
  rb_define_private_method(cDuckDBAppender, "_append_timestamp", appender__append_timestamp, 7);
579
595
  rb_define_private_method(cDuckDBAppender, "_append_hugeint", appender__append_hugeint, 2);
580
596
  rb_define_private_method(cDuckDBAppender, "_append_uhugeint", appender__append_uhugeint, 2);
597
+ rb_define_private_method(cDuckDBAppender, "_append_uuid", appender__append_uuid, 1);
581
598
  rb_define_private_method(cDuckDBAppender, "_append_value", appender__append_value, 1);
582
599
  rb_define_private_method(cDuckDBAppender, "_append_data_chunk", appender__append_data_chunk, 1);
583
600
  rb_define_private_method(cDuckDBAppender, "_append_default_to_chunk", appender__append_default_to_chunk, 3);
@@ -0,0 +1,226 @@
1
+ #include "ruby-duckdb.h"
2
+ #include <errno.h>
3
+
4
+ static VALUE cDuckDBArrowArrayStream;
5
+
6
+ typedef struct {
7
+ struct ArrowArrayStream stream;
8
+ } rubyDuckDBArrowArrayStream;
9
+
10
+ /*
11
+ * Heap-allocated context referenced by stream.private_data. Consumers may
12
+ * move the stream struct contents out and keep using the callbacks after
13
+ * the Ruby DuckDB::ArrowArrayStream object is gone, so this context is
14
+ * freed only by the stream release callback, and it holds a reference on
15
+ * the result struct (rbduckdb_result_ref) until then. The release callback
16
+ * must not call any Ruby API: it can run during GC sweep (via deallocate
17
+ * of an unconsumed stream) or from a non-Ruby thread.
18
+ */
19
+ typedef struct {
20
+ rubyDuckDBResult *presult_ctx;
21
+ duckdb_arrow_options arrow_options;
22
+ char *last_error;
23
+ } arrowArrayStreamContext;
24
+
25
+ static void deallocate(void *ctx);
26
+ static VALUE allocate(VALUE klass);
27
+ static size_t memsize(const void *p);
28
+ static VALUE arrow_array_stream_to_i(VALUE self);
29
+ static VALUE arrow_array_stream_arrow_c_stream(VALUE self);
30
+ static void stream_set_error(arrowArrayStreamContext *ctx, const char *msg);
31
+ static int stream_check_error(arrowArrayStreamContext *ctx, duckdb_error_data error_data);
32
+ static int stream_get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out);
33
+ static int stream_get_next(struct ArrowArrayStream *stream, struct ArrowArray *out);
34
+ static const char *stream_get_last_error(struct ArrowArrayStream *stream);
35
+ static void stream_release(struct ArrowArrayStream *stream);
36
+
37
+ static const rb_data_type_t arrow_array_stream_data_type = {
38
+ "DuckDB/ArrowArrayStream",
39
+ {NULL, deallocate, memsize,},
40
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
41
+ };
42
+
43
+ static void deallocate(void *ctx) {
44
+ rubyDuckDBArrowArrayStream *p = (rubyDuckDBArrowArrayStream *)ctx;
45
+
46
+ if (p->stream.release != NULL) {
47
+ p->stream.release(&(p->stream));
48
+ }
49
+ xfree(p);
50
+ }
51
+
52
+ static VALUE allocate(VALUE klass) {
53
+ rubyDuckDBArrowArrayStream *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBArrowArrayStream));
54
+ return TypedData_Wrap_Struct(klass, &arrow_array_stream_data_type, ctx);
55
+ }
56
+
57
+ static size_t memsize(const void *p) {
58
+ return sizeof(rubyDuckDBArrowArrayStream);
59
+ }
60
+
61
+ /* Context memory is managed with plain malloc/free because the release
62
+ * callback may run outside Ruby's memory bookkeeping. */
63
+ static void stream_set_error(arrowArrayStreamContext *ctx, const char *msg) {
64
+ size_t len = strlen(msg) + 1;
65
+
66
+ free(ctx->last_error);
67
+ ctx->last_error = malloc(len);
68
+ if (ctx->last_error != NULL) {
69
+ memcpy(ctx->last_error, msg, len);
70
+ }
71
+ }
72
+
73
+ static int stream_check_error(arrowArrayStreamContext *ctx, duckdb_error_data error_data) {
74
+ if (error_data == NULL) {
75
+ return 0;
76
+ }
77
+ if (!duckdb_error_data_has_error(error_data)) {
78
+ duckdb_destroy_error_data(&error_data);
79
+ return 0;
80
+ }
81
+ stream_set_error(ctx, duckdb_error_data_message(error_data));
82
+ duckdb_destroy_error_data(&error_data);
83
+ return EIO;
84
+ }
85
+
86
+ static int stream_get_schema(struct ArrowArrayStream *stream, struct ArrowSchema *out) {
87
+ arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
88
+ duckdb_error_data error_data;
89
+ duckdb_logical_type *types;
90
+ const char **names;
91
+ idx_t column_count;
92
+ idx_t i;
93
+
94
+ column_count = duckdb_column_count(&(ctx->presult_ctx->result));
95
+ types = calloc((size_t)column_count, sizeof(duckdb_logical_type));
96
+ names = calloc((size_t)column_count, sizeof(const char *));
97
+ if (column_count > 0 && (types == NULL || names == NULL)) {
98
+ free(types);
99
+ free(names);
100
+ stream_set_error(ctx, "failed to allocate memory for Arrow schema conversion");
101
+ return ENOMEM;
102
+ }
103
+ for (i = 0; i < column_count; i++) {
104
+ types[i] = duckdb_column_logical_type(&(ctx->presult_ctx->result), i);
105
+ names[i] = duckdb_column_name(&(ctx->presult_ctx->result), i);
106
+ }
107
+
108
+ error_data = duckdb_to_arrow_schema(ctx->arrow_options, types, names, column_count, out);
109
+
110
+ for (i = 0; i < column_count; i++) {
111
+ duckdb_destroy_logical_type(&types[i]);
112
+ }
113
+ free(types);
114
+ free(names);
115
+ return stream_check_error(ctx, error_data);
116
+ }
117
+
118
+ static int stream_get_next(struct ArrowArrayStream *stream, struct ArrowArray *out) {
119
+ arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
120
+ duckdb_data_chunk chunk;
121
+ duckdb_error_data error_data;
122
+
123
+ chunk = duckdb_fetch_chunk(ctx->presult_ctx->result);
124
+ if (chunk == NULL) {
125
+ /* End of stream: a released (release == NULL) array. */
126
+ memset(out, 0, sizeof(struct ArrowArray));
127
+ return 0;
128
+ }
129
+ /* duckdb_data_chunk_to_arrow copies the chunk into Arrow-owned buffers,
130
+ * so the chunk can be destroyed right after conversion. */
131
+ error_data = duckdb_data_chunk_to_arrow(ctx->arrow_options, chunk, out);
132
+ duckdb_destroy_data_chunk(&chunk);
133
+ return stream_check_error(ctx, error_data);
134
+ }
135
+
136
+ static const char *stream_get_last_error(struct ArrowArrayStream *stream) {
137
+ arrowArrayStreamContext *ctx = (arrowArrayStreamContext *)stream->private_data;
138
+
139
+ return ctx == NULL ? NULL : ctx->last_error;
140
+ }
141
+
142
+ static void stream_release(struct ArrowArrayStream *stream) {
143
+ arrowArrayStreamContext *ctx;
144
+
145
+ if (stream == NULL || stream->release == NULL) {
146
+ return;
147
+ }
148
+ ctx = (arrowArrayStreamContext *)stream->private_data;
149
+ if (ctx != NULL) {
150
+ rbduckdb_result_unref(ctx->presult_ctx);
151
+ if (ctx->arrow_options != NULL) {
152
+ duckdb_destroy_arrow_options(&(ctx->arrow_options));
153
+ }
154
+ free(ctx->last_error);
155
+ free(ctx);
156
+ }
157
+ stream->private_data = NULL;
158
+ stream->release = NULL;
159
+ }
160
+
161
+ VALUE rbduckdb_create_arrow_array_stream(VALUE oDuckDBResult) {
162
+ VALUE obj;
163
+ rubyDuckDBArrowArrayStream *p;
164
+ rubyDuckDBResult *presult_ctx;
165
+ arrowArrayStreamContext *ctx;
166
+
167
+ obj = allocate(cDuckDBArrowArrayStream);
168
+ TypedData_Get_Struct(obj, rubyDuckDBArrowArrayStream, &arrow_array_stream_data_type, p);
169
+ presult_ctx = rbduckdb_get_struct_result(oDuckDBResult);
170
+
171
+ ctx = calloc((size_t)1, sizeof(arrowArrayStreamContext));
172
+ if (ctx == NULL) {
173
+ rb_raise(rb_eNoMemError, "failed to allocate ArrowArrayStream context");
174
+ }
175
+
176
+ rbduckdb_result_ref(presult_ctx);
177
+ ctx->presult_ctx = presult_ctx;
178
+ ctx->arrow_options = duckdb_result_get_arrow_options(&(presult_ctx->result));
179
+
180
+ p->stream.get_schema = stream_get_schema;
181
+ p->stream.get_next = stream_get_next;
182
+ p->stream.get_last_error = stream_get_last_error;
183
+ p->stream.release = stream_release;
184
+ p->stream.private_data = ctx;
185
+
186
+ return obj;
187
+ }
188
+
189
+ /*
190
+ * call-seq:
191
+ * stream.to_i -> Integer
192
+ *
193
+ * Returns the address of the underlying C <code>struct ArrowArrayStream</code>.
194
+ * Arrow consumers such as red-arrow accept this address directly:
195
+ *
196
+ * reader = Arrow::RecordBatchReader.import(stream.to_i)
197
+ */
198
+ static VALUE arrow_array_stream_to_i(VALUE self) {
199
+ rubyDuckDBArrowArrayStream *p;
200
+
201
+ TypedData_Get_Struct(self, rubyDuckDBArrowArrayStream, &arrow_array_stream_data_type, p);
202
+ return ULL2NUM((unsigned long long)(uintptr_t)&(p->stream));
203
+ }
204
+
205
+ /*
206
+ * call-seq:
207
+ * stream.arrow_c_stream -> self
208
+ *
209
+ * Returns self. Defined so that the stream object itself satisfies the
210
+ * Arrow C stream protocol used by ruby-polars and others.
211
+ */
212
+ static VALUE arrow_array_stream_arrow_c_stream(VALUE self) {
213
+ return self;
214
+ }
215
+
216
+ void rbduckdb_init_arrow_array_stream(void) {
217
+ #if 0
218
+ VALUE mDuckDB = rb_define_module("DuckDB");
219
+ #endif
220
+ cDuckDBArrowArrayStream = rb_define_class_under(mDuckDB, "ArrowArrayStream", rb_cObject);
221
+
222
+ rb_define_alloc_func(cDuckDBArrowArrayStream, allocate);
223
+
224
+ rb_define_method(cDuckDBArrowArrayStream, "to_i", arrow_array_stream_to_i, 0);
225
+ rb_define_method(cDuckDBArrowArrayStream, "arrow_c_stream", arrow_array_stream_arrow_c_stream, 0);
226
+ }
@@ -0,0 +1,61 @@
1
+ #ifndef RUBY_DUCKDB_ARROW_ARRAY_STREAM_H
2
+ #define RUBY_DUCKDB_ARROW_ARRAY_STREAM_H
3
+
4
+ /*
5
+ * Canonical Arrow C Data Interface and Arrow C Stream Interface definitions.
6
+ * https://arrow.apache.org/docs/format/CDataInterface.html
7
+ * duckdb.h only forward-declares these structs.
8
+ */
9
+ #ifndef ARROW_C_DATA_INTERFACE
10
+ #define ARROW_C_DATA_INTERFACE
11
+
12
+ #define ARROW_FLAG_DICTIONARY_ORDERED 1
13
+ #define ARROW_FLAG_NULLABLE 2
14
+ #define ARROW_FLAG_MAP_KEYS_SORTED 4
15
+
16
+ struct ArrowSchema {
17
+ const char *format;
18
+ const char *name;
19
+ const char *metadata;
20
+ int64_t flags;
21
+ int64_t n_children;
22
+ struct ArrowSchema **children;
23
+ struct ArrowSchema *dictionary;
24
+
25
+ void (*release)(struct ArrowSchema *);
26
+ void *private_data;
27
+ };
28
+
29
+ struct ArrowArray {
30
+ int64_t length;
31
+ int64_t null_count;
32
+ int64_t offset;
33
+ int64_t n_buffers;
34
+ int64_t n_children;
35
+ const void **buffers;
36
+ struct ArrowArray **children;
37
+ struct ArrowArray *dictionary;
38
+
39
+ void (*release)(struct ArrowArray *);
40
+ void *private_data;
41
+ };
42
+
43
+ #endif /* ARROW_C_DATA_INTERFACE */
44
+
45
+ #ifndef ARROW_C_STREAM_INTERFACE
46
+ #define ARROW_C_STREAM_INTERFACE
47
+
48
+ struct ArrowArrayStream {
49
+ int (*get_schema)(struct ArrowArrayStream *, struct ArrowSchema *out);
50
+ int (*get_next)(struct ArrowArrayStream *, struct ArrowArray *out);
51
+ const char *(*get_last_error)(struct ArrowArrayStream *);
52
+ void (*release)(struct ArrowArrayStream *);
53
+ void *private_data;
54
+ };
55
+
56
+ #endif /* ARROW_C_STREAM_INTERFACE */
57
+
58
+ void rbduckdb_init_arrow_array_stream(void);
59
+ VALUE rbduckdb_create_arrow_array_stream(VALUE oDuckDBResult);
60
+
61
+ #endif
@@ -0,0 +1,165 @@
1
+ #include "ruby-duckdb.h"
2
+
3
+ /*
4
+ * Internal helpers backing DuckDB::Connection#append_arrow. They consume an
5
+ * Arrow producer's struct ArrowArrayStream (given by its address) and convert
6
+ * each chunk into a DuckDB::DataChunk using DuckDB's unstable Arrow C API. The
7
+ * Ruby layer owns the loop, the appender lifecycle, and error handling; these
8
+ * primitives only do the raw-pointer / C-API work.
9
+ */
10
+
11
+ static VALUE cDuckDBArrowConvertedSchema;
12
+
13
+ typedef struct {
14
+ duckdb_arrow_converted_schema converted_schema;
15
+ } rubyDuckDBArrowConvertedSchema;
16
+
17
+ static void deallocate(void *ctx);
18
+ static VALUE allocate(VALUE klass);
19
+ static size_t memsize(const void *p);
20
+ static void raise_error_data(duckdb_error_data error_data);
21
+
22
+ static VALUE connection__arrow_converted_schema(VALUE self, VALUE address);
23
+ static VALUE connection__arrow_next_chunk(VALUE self, VALUE address, VALUE converted);
24
+ static VALUE connection__arrow_release(VALUE self, VALUE address);
25
+
26
+ static const rb_data_type_t arrow_converted_schema_data_type = {
27
+ "DuckDB/ArrowConvertedSchema",
28
+ {NULL, deallocate, memsize,},
29
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
30
+ };
31
+
32
+ static void deallocate(void *ctx) {
33
+ rubyDuckDBArrowConvertedSchema *p = (rubyDuckDBArrowConvertedSchema *)ctx;
34
+
35
+ if (p->converted_schema) {
36
+ duckdb_destroy_arrow_converted_schema(&(p->converted_schema));
37
+ }
38
+ xfree(p);
39
+ }
40
+
41
+ static VALUE allocate(VALUE klass) {
42
+ rubyDuckDBArrowConvertedSchema *ctx = xcalloc((size_t)1, sizeof(rubyDuckDBArrowConvertedSchema));
43
+ return TypedData_Wrap_Struct(klass, &arrow_converted_schema_data_type, ctx);
44
+ }
45
+
46
+ static size_t memsize(const void *p) {
47
+ return sizeof(rubyDuckDBArrowConvertedSchema);
48
+ }
49
+
50
+ static void raise_error_data(duckdb_error_data error_data) {
51
+ VALUE message;
52
+
53
+ if (error_data == NULL) {
54
+ return;
55
+ }
56
+ if (!duckdb_error_data_has_error(error_data)) {
57
+ duckdb_destroy_error_data(&error_data);
58
+ return;
59
+ }
60
+ message = rb_str_new_cstr(duckdb_error_data_message(error_data));
61
+ duckdb_destroy_error_data(&error_data);
62
+ rb_raise(eDuckDBError, "%s", StringValueCStr(message));
63
+ }
64
+
65
+ static struct ArrowArrayStream *stream_from_address(VALUE address) {
66
+ return (struct ArrowArrayStream *)(uintptr_t)NUM2ULL(address);
67
+ }
68
+
69
+ /* :nodoc: */
70
+ static VALUE connection__arrow_converted_schema(VALUE self, VALUE address) {
71
+ rubyDuckDBConnection *ctx;
72
+ struct ArrowArrayStream *stream;
73
+ struct ArrowSchema schema;
74
+ duckdb_arrow_converted_schema converted_schema = NULL;
75
+ duckdb_error_data error_data;
76
+ VALUE obj;
77
+ rubyDuckDBArrowConvertedSchema *schema_ctx;
78
+ int rc;
79
+
80
+ ctx = rbduckdb_get_struct_connection(self);
81
+ stream = stream_from_address(address);
82
+ if (stream == NULL) {
83
+ rb_raise(eDuckDBError, "Arrow producer returned a NULL stream");
84
+ }
85
+
86
+ memset(&schema, 0, sizeof(schema));
87
+ rc = stream->get_schema(stream, &schema);
88
+ if (rc != 0) {
89
+ const char *err = stream->get_last_error(stream);
90
+ rb_raise(eDuckDBError, "failed to get Arrow schema: %s", err ? err : "unknown error");
91
+ }
92
+
93
+ error_data = duckdb_schema_from_arrow(ctx->con, &schema, &converted_schema);
94
+ if (schema.release != NULL) {
95
+ schema.release(&schema);
96
+ }
97
+ raise_error_data(error_data);
98
+
99
+ obj = allocate(cDuckDBArrowConvertedSchema);
100
+ TypedData_Get_Struct(obj, rubyDuckDBArrowConvertedSchema, &arrow_converted_schema_data_type, schema_ctx);
101
+ schema_ctx->converted_schema = converted_schema;
102
+ return obj;
103
+ }
104
+
105
+ /* :nodoc: */
106
+ static VALUE connection__arrow_next_chunk(VALUE self, VALUE address, VALUE converted) {
107
+ rubyDuckDBConnection *ctx;
108
+ rubyDuckDBArrowConvertedSchema *schema_ctx;
109
+ struct ArrowArrayStream *stream;
110
+ struct ArrowArray array;
111
+ duckdb_data_chunk chunk = NULL;
112
+ duckdb_error_data error_data;
113
+ int rc;
114
+
115
+ ctx = rbduckdb_get_struct_connection(self);
116
+ TypedData_Get_Struct(converted, rubyDuckDBArrowConvertedSchema, &arrow_converted_schema_data_type, schema_ctx);
117
+ stream = stream_from_address(address);
118
+
119
+ memset(&array, 0, sizeof(array));
120
+ rc = stream->get_next(stream, &array);
121
+ if (rc != 0) {
122
+ const char *err = stream->get_last_error(stream);
123
+ rb_raise(eDuckDBError, "failed to get next Arrow chunk: %s", err ? err : "unknown error");
124
+ }
125
+ /* End of stream: a released array (release == NULL). */
126
+ if (array.release == NULL) {
127
+ return Qnil;
128
+ }
129
+
130
+ /* duckdb_data_chunk_from_arrow takes ownership of the array (nulls its
131
+ * release). On error before that, we still own it and must release it. */
132
+ error_data = duckdb_data_chunk_from_arrow(ctx->con, &array, schema_ctx->converted_schema, &chunk);
133
+ if (error_data != NULL && duckdb_error_data_has_error(error_data)) {
134
+ if (array.release != NULL) {
135
+ array.release(&array);
136
+ }
137
+ raise_error_data(error_data);
138
+ } else if (error_data != NULL) {
139
+ duckdb_destroy_error_data(&error_data);
140
+ }
141
+
142
+ return rbduckdb_create_data_chunk(chunk, true);
143
+ }
144
+
145
+ /* :nodoc: */
146
+ static VALUE connection__arrow_release(VALUE self, VALUE address) {
147
+ struct ArrowArrayStream *stream = stream_from_address(address);
148
+
149
+ if (stream != NULL && stream->release != NULL) {
150
+ stream->release(stream);
151
+ }
152
+ return Qnil;
153
+ }
154
+
155
+ void rbduckdb_init_arrow_import(void) {
156
+ #if 0
157
+ VALUE mDuckDB = rb_define_module("DuckDB");
158
+ #endif
159
+ cDuckDBArrowConvertedSchema = rb_define_class_under(mDuckDB, "ArrowConvertedSchema", rb_cObject);
160
+ rb_define_alloc_func(cDuckDBArrowConvertedSchema, allocate);
161
+
162
+ rb_define_private_method(cDuckDBConnection, "_arrow_converted_schema", connection__arrow_converted_schema, 1);
163
+ rb_define_private_method(cDuckDBConnection, "_arrow_next_chunk", connection__arrow_next_chunk, 2);
164
+ rb_define_private_method(cDuckDBConnection, "_arrow_release", connection__arrow_release, 1);
165
+ }
@@ -0,0 +1,6 @@
1
+ #ifndef RUBY_DUCKDB_ARROW_IMPORT_H
2
+ #define RUBY_DUCKDB_ARROW_IMPORT_H
3
+
4
+ void rbduckdb_init_arrow_import(void);
5
+
6
+ #endif
data/ext/duckdb/blob.c CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  VALUE cDuckDBBlob;
4
4
 
5
- void rbduckdb_init_duckdb_blob(void) {
5
+ void rbduckdb_init_blob(void) {
6
6
  #if 0
7
7
  VALUE mDuckDB = rb_define_module("DuckDB");
8
8
  #endif
data/ext/duckdb/blob.h CHANGED
@@ -1,7 +1,6 @@
1
1
  #ifndef RUBY_DUCKDB_BLOB_H
2
2
  #define RUBY_DUCKDB_BLOB_H
3
3
 
4
- void rbduckdb_init_duckdb_blob(void);
4
+ void rbduckdb_init_blob(void);
5
5
 
6
6
  #endif
7
-
data/ext/duckdb/config.c CHANGED
@@ -79,7 +79,7 @@ static VALUE config_set_config(VALUE self, VALUE key, VALUE value) {
79
79
  return self;
80
80
  }
81
81
 
82
- void rbduckdb_init_duckdb_config(void) {
82
+ void rbduckdb_init_config(void) {
83
83
  #if 0
84
84
  VALUE mDuckDB = rb_define_module("DuckDB");
85
85
  #endif
data/ext/duckdb/config.h CHANGED
@@ -9,6 +9,6 @@ typedef struct _rubyDuckDBConfig rubyDuckDBConfig;
9
9
 
10
10
  rubyDuckDBConfig *get_struct_config(VALUE obj);
11
11
 
12
- void rbduckdb_init_duckdb_config(void);
12
+ void rbduckdb_init_config(void);
13
13
 
14
14
  #endif
@@ -231,7 +231,7 @@ static VALUE connection__register_scalar_function(VALUE self, VALUE scalar_funct
231
231
  duckdb_state state;
232
232
 
233
233
  ctxcon = rbduckdb_get_struct_connection(self);
234
- ctxsf = get_struct_scalar_function(scalar_function);
234
+ ctxsf = rbduckdb_get_struct_scalar_function(scalar_function);
235
235
 
236
236
  state = duckdb_register_scalar_function(ctxcon->con, ctxsf->scalar_function);
237
237
 
@@ -252,7 +252,7 @@ static VALUE connection__register_scalar_function_set(VALUE self, VALUE scalar_f
252
252
  duckdb_state state;
253
253
 
254
254
  ctxcon = rbduckdb_get_struct_connection(self);
255
- ctxsfs = get_struct_scalar_function_set(scalar_function_set);
255
+ ctxsfs = rbduckdb_get_struct_scalar_function_set(scalar_function_set);
256
256
 
257
257
  state = duckdb_register_scalar_function_set(ctxcon->con, ctxsfs->scalar_function_set);
258
258
 
@@ -314,7 +314,7 @@ static VALUE connection__register_table_function(VALUE self, VALUE table_functio
314
314
  duckdb_state state;
315
315
 
316
316
  ctxcon = rbduckdb_get_struct_connection(self);
317
- ctxtf = get_struct_table_function(table_function);
317
+ ctxtf = rbduckdb_get_struct_table_function(table_function);
318
318
 
319
319
  state = duckdb_register_table_function(ctxcon->con, ctxtf->table_function);
320
320
 
@@ -19,6 +19,7 @@ extern ID id__decimal_to_unscaled;
19
19
  VALUE rbduckdb_uuid_to_ruby(duckdb_hugeint h);
20
20
  VALUE rbduckdb_uuid_uhugeint_to_ruby(duckdb_uhugeint h);
21
21
  void rbduckdb_uuid_str_to_hugeint(VALUE uuid_str, duckdb_hugeint *out);
22
+ void rbduckdb_uuid_str_to_uhugeint(VALUE uuid_str, duckdb_uhugeint *out);
22
23
  VALUE rbduckdb_interval_to_ruby(duckdb_interval i);
23
24
  VALUE rbduckdb_hugeint_to_ruby(duckdb_hugeint h);
24
25
  VALUE rbduckdb_uhugeint_to_ruby(duckdb_uhugeint h);