red-arrow-duckdb 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +12 -0
- data/ext/arrow-duckdb/arrow-duckdb-registration.cpp +32 -28
- data/ext/arrow-duckdb/arrow-duckdb.cpp +8 -10
- data/ext/arrow-duckdb/extconf.rb +5 -1
- data/lib/arrow-duckdb/version.rb +2 -2
- metadata +3 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63fb3f7af8e84c386b1dcec7309b967d507ec85a65355471167c15426a5cf2f3
|
4
|
+
data.tar.gz: 9ae59ace06c47648b5fd4fa14723773de03f80c1f51f121e1b8fa3b225d1c56a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b63175c712cce70c9a48f2becb6581ff05ac5d153257bd740a178626222508f3f74c5bdab9e3618bc977e0b605fc79aa181c81cce8cfeaa33671d2ccf07bf763
|
7
|
+
data.tar.gz: 350c3eb4925cc5bd76af437c977f0cc8959c3ed91fcb4f119cb85c1c6f43a8709cb166dd993e53e9088edd4747b5950b875f0cc3548b03e512813c43b023e207
|
data/doc/text/news.md
CHANGED
@@ -24,7 +24,8 @@
|
|
24
24
|
#include <duckdb.hpp>
|
25
25
|
#ifndef DUCKDB_AMALGAMATION
|
26
26
|
# include <duckdb.h>
|
27
|
-
# include <duckdb/common/arrow_wrapper.hpp>
|
27
|
+
# include <duckdb/common/arrow/arrow_wrapper.hpp>
|
28
|
+
# include <duckdb/function/table/arrow.hpp>
|
28
29
|
# include <duckdb/function/table_function.hpp>
|
29
30
|
# include <duckdb/main/connection.hpp>
|
30
31
|
# include <duckdb/planner/filter/conjunction_filter.hpp>
|
@@ -162,14 +163,11 @@ namespace {
|
|
162
163
|
}
|
163
164
|
|
164
165
|
arrow::compute::Expression
|
165
|
-
convert_filters(
|
166
|
-
idx_t,
|
167
|
-
std::unique_ptr<duckdb::TableFilter>
|
168
|
-
> &filters,
|
166
|
+
convert_filters(duckdb::TableFilterSet *filter_set,
|
169
167
|
std::unordered_map<idx_t, std::string> &column_names)
|
170
168
|
{
|
171
169
|
std::vector<arrow::compute::Expression> expressions;
|
172
|
-
for (auto it = filters.begin(); it != filters.end(); ++it) {
|
170
|
+
for (auto it = filter_set->filters.begin(); it != filter_set->filters.end(); ++it) {
|
173
171
|
expressions.emplace_back(
|
174
172
|
std::move(convert_filter(it->second.get(), column_names[it->first])));
|
175
173
|
}
|
@@ -178,11 +176,7 @@ namespace {
|
|
178
176
|
|
179
177
|
arrow::Result<std::unique_ptr<duckdb::ArrowArrayStreamWrapper>>
|
180
178
|
arrow_table_produce_internal(uintptr_t data,
|
181
|
-
|
182
|
-
std::unordered_map<idx_t, std::string>,
|
183
|
-
std::vector<std::string>
|
184
|
-
> &project_columns,
|
185
|
-
duckdb::TableFilterCollection *filters)
|
179
|
+
duckdb::ArrowStreamParameters ¶meters)
|
186
180
|
{
|
187
181
|
auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
|
188
182
|
auto arrow_table = garrow_table_get_raw(garrow_table);
|
@@ -190,20 +184,22 @@ namespace {
|
|
190
184
|
std::make_shared<arrow::dataset::InMemoryDataset>(arrow_table);
|
191
185
|
ARROW_ASSIGN_OR_RAISE(auto scanner_builder, dataset->NewScan());
|
192
186
|
bool have_filter =
|
193
|
-
filters &&
|
194
|
-
filters->
|
195
|
-
!filters->table_filters->filters.empty();
|
187
|
+
parameters.filters &&
|
188
|
+
!parameters.filters->filters.empty();
|
196
189
|
if (have_filter) {
|
197
190
|
ARROW_RETURN_NOT_OK(
|
198
|
-
scanner_builder->Filter(
|
199
|
-
|
191
|
+
scanner_builder->Filter(
|
192
|
+
convert_filters(parameters.filters,
|
193
|
+
parameters.projected_columns.projection_map)));
|
200
194
|
}
|
201
|
-
if (!
|
202
|
-
ARROW_RETURN_NOT_OK(
|
195
|
+
if (!parameters.projected_columns.columns.empty()) {
|
196
|
+
ARROW_RETURN_NOT_OK(
|
197
|
+
scanner_builder->Project(
|
198
|
+
parameters.projected_columns.columns));
|
203
199
|
}
|
204
200
|
ARROW_ASSIGN_OR_RAISE(auto scanner, scanner_builder->Finish());
|
205
201
|
ARROW_ASSIGN_OR_RAISE(auto reader, scanner->ToRecordBatchReader());
|
206
|
-
auto stream_wrapper = duckdb::
|
202
|
+
auto stream_wrapper = duckdb::make_uniq<duckdb::ArrowArrayStreamWrapper>();
|
207
203
|
ARROW_RETURN_NOT_OK(
|
208
204
|
arrow::ExportRecordBatchReader(reader,
|
209
205
|
&(stream_wrapper->arrow_array_stream)));
|
@@ -212,14 +208,9 @@ namespace {
|
|
212
208
|
|
213
209
|
std::unique_ptr<duckdb::ArrowArrayStreamWrapper>
|
214
210
|
arrow_table_produce(uintptr_t data,
|
215
|
-
|
216
|
-
std::unordered_map<idx_t, std::string>,
|
217
|
-
std::vector<std::string>
|
218
|
-
> &project_columns,
|
219
|
-
duckdb::TableFilterCollection *filters)
|
211
|
+
duckdb::ArrowStreamParameters ¶meters)
|
220
212
|
{
|
221
|
-
auto stream_wrapper_result =
|
222
|
-
arrow_table_produce_internal(data, project_columns, filters);
|
213
|
+
auto stream_wrapper_result = arrow_table_produce_internal(data, parameters);
|
223
214
|
if (!stream_wrapper_result.ok()) {
|
224
215
|
throw std::runtime_error(
|
225
216
|
std::string("[arrow][produce] failed to produce: ") +
|
@@ -227,6 +218,20 @@ namespace {
|
|
227
218
|
}
|
228
219
|
return std::move(*stream_wrapper_result);
|
229
220
|
}
|
221
|
+
|
222
|
+
void
|
223
|
+
arrow_table_get_schema(uintptr_t data, duckdb::ArrowSchemaWrapper &schema)
|
224
|
+
{
|
225
|
+
auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
|
226
|
+
auto arrow_table = garrow_table_get_raw(garrow_table);
|
227
|
+
auto export_schema_status = arrow::ExportSchema(*(arrow_table->schema()),
|
228
|
+
reinterpret_cast<ArrowSchema *>(&schema));
|
229
|
+
if (!export_schema_status.ok()) {
|
230
|
+
throw std::runtime_error(
|
231
|
+
std::string("[arrow][get_schema] failed to export schema: ") +
|
232
|
+
export_schema_status.ToString());
|
233
|
+
}
|
234
|
+
}
|
230
235
|
}
|
231
236
|
|
232
237
|
namespace arrow_duckdb {
|
@@ -245,14 +250,13 @@ namespace arrow_duckdb {
|
|
245
250
|
{
|
246
251
|
auto c_name = StringValueCStr(name);
|
247
252
|
auto garrow_table = RVAL2GOBJ(arrow_table);
|
248
|
-
const idx_t rows_per_tuple = 1000000;
|
249
253
|
reinterpret_cast<duckdb::Connection *>(connection)
|
250
254
|
->TableFunction(
|
251
255
|
"arrow_scan",
|
252
256
|
{
|
253
257
|
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(garrow_table)),
|
254
258
|
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_produce)),
|
255
|
-
duckdb::Value::
|
259
|
+
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_get_schema)),
|
256
260
|
})
|
257
261
|
->CreateView(c_name, true, true);
|
258
262
|
}
|
@@ -20,7 +20,9 @@
|
|
20
20
|
|
21
21
|
#include <rbgobject.h>
|
22
22
|
|
23
|
+
extern "C" {
|
23
24
|
#include <ruby-duckdb.h>
|
25
|
+
}
|
24
26
|
|
25
27
|
#include "arrow-duckdb-registration.hpp"
|
26
28
|
|
@@ -99,7 +101,7 @@ namespace {
|
|
99
101
|
result_ensure_gschema(Result *result)
|
100
102
|
{
|
101
103
|
ArrowSchema c_abi_schema;
|
102
|
-
|
104
|
+
auto schema = reinterpret_cast<duckdb_arrow_schema>(&c_abi_schema);
|
103
105
|
auto state = duckdb_query_arrow_schema(result->arrow, &schema);
|
104
106
|
if (state == DuckDBError) {
|
105
107
|
free(result->error_message);
|
@@ -121,7 +123,7 @@ namespace {
|
|
121
123
|
result_fetch_internal(VALUE self, Result *result)
|
122
124
|
{
|
123
125
|
ArrowArray c_abi_array = {};
|
124
|
-
|
126
|
+
auto array = reinterpret_cast<duckdb_arrow_array>(&c_abi_array);
|
125
127
|
auto state = duckdb_query_arrow_array(result->arrow, &array);
|
126
128
|
if (state == DuckDBError) {
|
127
129
|
free(result->error_message);
|
@@ -219,8 +221,7 @@ namespace {
|
|
219
221
|
VALUE
|
220
222
|
query_sql_arrow(VALUE self, VALUE sql)
|
221
223
|
{
|
222
|
-
|
223
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
224
|
+
auto ctx = get_struct_connection(self);
|
224
225
|
|
225
226
|
if (!(ctx->con)) {
|
226
227
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -253,8 +254,7 @@ namespace {
|
|
253
254
|
VALUE
|
254
255
|
query_unregister_arrow(VALUE self, VALUE name)
|
255
256
|
{
|
256
|
-
|
257
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
257
|
+
auto ctx = get_struct_connection(self);
|
258
258
|
|
259
259
|
if (!(ctx->con)) {
|
260
260
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -293,8 +293,7 @@ namespace {
|
|
293
293
|
VALUE
|
294
294
|
query_register_arrow(VALUE self, VALUE name, VALUE arrow_table)
|
295
295
|
{
|
296
|
-
|
297
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
296
|
+
auto ctx = get_struct_connection(self);
|
298
297
|
|
299
298
|
if (!(ctx->con)) {
|
300
299
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -329,8 +328,7 @@ namespace {
|
|
329
328
|
VALUE
|
330
329
|
prepared_statement_execute_arrow(VALUE self)
|
331
330
|
{
|
332
|
-
|
333
|
-
Data_Get_Struct(self, rubyDuckDBPreparedStatement, ctx);
|
331
|
+
auto ctx = get_struct_prepared_statement(self);
|
334
332
|
|
335
333
|
ID id_new;
|
336
334
|
CONST_ID(id_new, "new");
|
data/ext/arrow-duckdb/extconf.rb
CHANGED
@@ -16,7 +16,7 @@ require "extpp"
|
|
16
16
|
require "mkmf-gnome"
|
17
17
|
require "native-package-installer"
|
18
18
|
|
19
|
-
checking_for(checking_message("Homebrew")) do
|
19
|
+
homebrew = checking_for(checking_message("Homebrew")) do
|
20
20
|
case NativePackageInstaller::Platform.detect
|
21
21
|
when NativePackageInstaller::Platform::Homebrew
|
22
22
|
openssl_prefix = `brew --prefix openssl`.chomp
|
@@ -40,6 +40,10 @@ unless have_library("duckdb")
|
|
40
40
|
install_missing_native_package(debian: "libduckdb-dev",
|
41
41
|
redhat: "duckdb-devel",
|
42
42
|
homebrew: "duckdb") or exit(false)
|
43
|
+
if homebrew
|
44
|
+
$INCFLAGS << " -I" << File.join(`brew --prefix duckdb`.chomp, "include")
|
45
|
+
$LIBPATH |= [File.join(`brew --prefix duckdb`.chomp, "lib")]
|
46
|
+
end
|
43
47
|
have_library("duckdb") or exit(false)
|
44
48
|
end
|
45
49
|
|
data/lib/arrow-duckdb/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
1
|
+
# Copyright 2021-2022 Sutou Kouhei <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,5 +13,5 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
module ArrowDuckDB
|
16
|
-
VERSION = "1.0.
|
16
|
+
VERSION = "1.0.3"
|
17
17
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow-duckdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sutou Kouhei
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2024-10-10 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: duckdb
|
@@ -104,7 +103,6 @@ homepage: https://github.com/red-data-tools/red-arrow-duckdb
|
|
104
103
|
licenses:
|
105
104
|
- Apache-2.0
|
106
105
|
metadata: {}
|
107
|
-
post_install_message:
|
108
106
|
rdoc_options: []
|
109
107
|
require_paths:
|
110
108
|
- lib
|
@@ -119,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
117
|
- !ruby/object:Gem::Version
|
120
118
|
version: '0'
|
121
119
|
requirements: []
|
122
|
-
rubygems_version: 3.
|
123
|
-
signing_key:
|
120
|
+
rubygems_version: 3.6.0.dev
|
124
121
|
specification_version: 4
|
125
122
|
summary: Red Arrow DuckDB is a library that provides Apache Arrow support to ruby-duckdb.
|
126
123
|
test_files: []
|