red-arrow-duckdb 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/doc/text/news.md +26 -1
- data/ext/arrow-duckdb/arrow-duckdb-registration.cpp +57 -32
- data/ext/arrow-duckdb/arrow-duckdb.cpp +8 -10
- data/ext/arrow-duckdb/extconf.rb +5 -1
- data/lib/arrow-duckdb/version.rb +2 -2
- metadata +3 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63fb3f7af8e84c386b1dcec7309b967d507ec85a65355471167c15426a5cf2f3
|
4
|
+
data.tar.gz: 9ae59ace06c47648b5fd4fa14723773de03f80c1f51f121e1b8fa3b225d1c56a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b63175c712cce70c9a48f2becb6581ff05ac5d153257bd740a178626222508f3f74c5bdab9e3618bc977e0b605fc79aa181c81cce8cfeaa33671d2ccf07bf763
|
7
|
+
data.tar.gz: 350c3eb4925cc5bd76af437c977f0cc8959c3ed91fcb4f119cb85c1c6f43a8709cb166dd993e53e9088edd4747b5950b875f0cc3548b03e512813c43b023e207
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,29 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 1.0.3 - 2024-10-11
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for DuckDB 1.1.0
|
8
|
+
* GH-4
|
9
|
+
* Patch by Sten Larsson
|
10
|
+
|
11
|
+
### Thanks
|
12
|
+
|
13
|
+
* Sten Larsson
|
14
|
+
|
15
|
+
## 1.0.2 - 2022-03-06
|
16
|
+
|
17
|
+
### Improvements
|
18
|
+
|
19
|
+
* Added support for timestamp value pushdown.
|
20
|
+
[GitHub#2](https://github.com/red-data-tools/red-arrow-duckdb/issues/2)
|
21
|
+
[Reported by Torsten Sprenger]
|
22
|
+
|
23
|
+
### Thanks
|
24
|
+
|
25
|
+
* Torsten Sprenger
|
26
|
+
|
3
27
|
## 1.0.1 - 2021-11-06
|
4
28
|
|
5
29
|
### Improvements
|
@@ -7,7 +31,8 @@
|
|
7
31
|
* macOS: Don't require specifying `PKG_CONFIG_PATH` on install such
|
8
32
|
as `PKG_CONFIG_PATH=$(brew --prefix openssl)/lib/pkgconfig gem
|
9
33
|
install red-arrow-duckdb`.
|
10
|
-
[GitHub#1]
|
34
|
+
[GitHub#1](https://github.com/red-data-tools/red-arrow-duckdb/issues/2)
|
35
|
+
[Reported by Konstantin Ilchenko]
|
11
36
|
|
12
37
|
### Thanks
|
13
38
|
|
@@ -24,7 +24,8 @@
|
|
24
24
|
#include <duckdb.hpp>
|
25
25
|
#ifndef DUCKDB_AMALGAMATION
|
26
26
|
# include <duckdb.h>
|
27
|
-
# include <duckdb/common/arrow_wrapper.hpp>
|
27
|
+
# include <duckdb/common/arrow/arrow_wrapper.hpp>
|
28
|
+
# include <duckdb/function/table/arrow.hpp>
|
28
29
|
# include <duckdb/function/table_function.hpp>
|
29
30
|
# include <duckdb/main/connection.hpp>
|
30
31
|
# include <duckdb/planner/filter/conjunction_filter.hpp>
|
@@ -35,6 +36,22 @@
|
|
35
36
|
#include "arrow-duckdb-registration.hpp"
|
36
37
|
|
37
38
|
namespace {
|
39
|
+
std::shared_ptr<arrow::Scalar>
|
40
|
+
convert_constant_timestamp(duckdb::Value &value, arrow::TimeUnit::type unit)
|
41
|
+
{
|
42
|
+
auto scalar_result =
|
43
|
+
arrow::MakeScalar(arrow::timestamp(unit), value.GetValue<int64_t>());
|
44
|
+
if (!scalar_result.ok()) {
|
45
|
+
throw duckdb::InvalidInputException(
|
46
|
+
"[arrow][filter][pushdown][%s] "
|
47
|
+
"failed to convert to Apache Arrow scalar: %s: <%s>",
|
48
|
+
value.type().ToString(),
|
49
|
+
scalar_result.status().ToString(),
|
50
|
+
value.ToString());
|
51
|
+
}
|
52
|
+
return *scalar_result;
|
53
|
+
}
|
54
|
+
|
38
55
|
std::shared_ptr<arrow::Scalar>
|
39
56
|
convert_constant(duckdb::Value &value)
|
40
57
|
{
|
@@ -55,9 +72,14 @@ namespace {
|
|
55
72
|
// return arrow::MakeScalar(arrow::date32(), value.GetValue<int32_t>());
|
56
73
|
// case duckdb::LogicalTypeId::TIME:
|
57
74
|
// return arrow::MakeScalar(arrow::time64(), value.GetValue<int64_t>());
|
58
|
-
|
59
|
-
|
60
|
-
|
75
|
+
case duckdb::LogicalTypeId::TIMESTAMP_SEC:
|
76
|
+
return convert_constant_timestamp(value, arrow::TimeUnit::SECOND);
|
77
|
+
case duckdb::LogicalTypeId::TIMESTAMP_MS:
|
78
|
+
return convert_constant_timestamp(value, arrow::TimeUnit::MILLI);
|
79
|
+
case duckdb::LogicalTypeId::TIMESTAMP:
|
80
|
+
return convert_constant_timestamp(value, arrow::TimeUnit::MICRO);
|
81
|
+
case duckdb::LogicalTypeId::TIMESTAMP_NS:
|
82
|
+
return convert_constant_timestamp(value, arrow::TimeUnit::NANO);
|
61
83
|
case duckdb::LogicalTypeId::UTINYINT:
|
62
84
|
return arrow::MakeScalar(value.GetValue<uint8_t>());
|
63
85
|
case duckdb::LogicalTypeId::USMALLINT:
|
@@ -75,7 +97,7 @@ namespace {
|
|
75
97
|
// case LogicalTypeId::DECIMAL:
|
76
98
|
default:
|
77
99
|
throw duckdb::NotImplementedException(
|
78
|
-
"[arrow][filter][pushdown] not implemented value type
|
100
|
+
"[arrow][filter][pushdown][%s] not implemented value type",
|
79
101
|
value.type().ToString());
|
80
102
|
}
|
81
103
|
}
|
@@ -141,14 +163,11 @@ namespace {
|
|
141
163
|
}
|
142
164
|
|
143
165
|
arrow::compute::Expression
|
144
|
-
convert_filters(
|
145
|
-
idx_t,
|
146
|
-
std::unique_ptr<duckdb::TableFilter>
|
147
|
-
> &filters,
|
166
|
+
convert_filters(duckdb::TableFilterSet *filter_set,
|
148
167
|
std::unordered_map<idx_t, std::string> &column_names)
|
149
168
|
{
|
150
169
|
std::vector<arrow::compute::Expression> expressions;
|
151
|
-
for (auto it = filters.begin(); it != filters.end(); ++it) {
|
170
|
+
for (auto it = filter_set->filters.begin(); it != filter_set->filters.end(); ++it) {
|
152
171
|
expressions.emplace_back(
|
153
172
|
std::move(convert_filter(it->second.get(), column_names[it->first])));
|
154
173
|
}
|
@@ -157,11 +176,7 @@ namespace {
|
|
157
176
|
|
158
177
|
arrow::Result<std::unique_ptr<duckdb::ArrowArrayStreamWrapper>>
|
159
178
|
arrow_table_produce_internal(uintptr_t data,
|
160
|
-
|
161
|
-
std::unordered_map<idx_t, std::string>,
|
162
|
-
std::vector<std::string>
|
163
|
-
> &project_columns,
|
164
|
-
duckdb::TableFilterCollection *filters)
|
179
|
+
duckdb::ArrowStreamParameters ¶meters)
|
165
180
|
{
|
166
181
|
auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
|
167
182
|
auto arrow_table = garrow_table_get_raw(garrow_table);
|
@@ -169,20 +184,22 @@ namespace {
|
|
169
184
|
std::make_shared<arrow::dataset::InMemoryDataset>(arrow_table);
|
170
185
|
ARROW_ASSIGN_OR_RAISE(auto scanner_builder, dataset->NewScan());
|
171
186
|
bool have_filter =
|
172
|
-
filters &&
|
173
|
-
filters->
|
174
|
-
!filters->table_filters->filters.empty();
|
187
|
+
parameters.filters &&
|
188
|
+
!parameters.filters->filters.empty();
|
175
189
|
if (have_filter) {
|
176
190
|
ARROW_RETURN_NOT_OK(
|
177
|
-
scanner_builder->Filter(
|
178
|
-
|
191
|
+
scanner_builder->Filter(
|
192
|
+
convert_filters(parameters.filters,
|
193
|
+
parameters.projected_columns.projection_map)));
|
179
194
|
}
|
180
|
-
if (!
|
181
|
-
ARROW_RETURN_NOT_OK(
|
195
|
+
if (!parameters.projected_columns.columns.empty()) {
|
196
|
+
ARROW_RETURN_NOT_OK(
|
197
|
+
scanner_builder->Project(
|
198
|
+
parameters.projected_columns.columns));
|
182
199
|
}
|
183
200
|
ARROW_ASSIGN_OR_RAISE(auto scanner, scanner_builder->Finish());
|
184
201
|
ARROW_ASSIGN_OR_RAISE(auto reader, scanner->ToRecordBatchReader());
|
185
|
-
auto stream_wrapper = duckdb::
|
202
|
+
auto stream_wrapper = duckdb::make_uniq<duckdb::ArrowArrayStreamWrapper>();
|
186
203
|
ARROW_RETURN_NOT_OK(
|
187
204
|
arrow::ExportRecordBatchReader(reader,
|
188
205
|
&(stream_wrapper->arrow_array_stream)));
|
@@ -191,14 +208,9 @@ namespace {
|
|
191
208
|
|
192
209
|
std::unique_ptr<duckdb::ArrowArrayStreamWrapper>
|
193
210
|
arrow_table_produce(uintptr_t data,
|
194
|
-
|
195
|
-
std::unordered_map<idx_t, std::string>,
|
196
|
-
std::vector<std::string>
|
197
|
-
> &project_columns,
|
198
|
-
duckdb::TableFilterCollection *filters)
|
211
|
+
duckdb::ArrowStreamParameters ¶meters)
|
199
212
|
{
|
200
|
-
auto stream_wrapper_result =
|
201
|
-
arrow_table_produce_internal(data, project_columns, filters);
|
213
|
+
auto stream_wrapper_result = arrow_table_produce_internal(data, parameters);
|
202
214
|
if (!stream_wrapper_result.ok()) {
|
203
215
|
throw std::runtime_error(
|
204
216
|
std::string("[arrow][produce] failed to produce: ") +
|
@@ -206,6 +218,20 @@ namespace {
|
|
206
218
|
}
|
207
219
|
return std::move(*stream_wrapper_result);
|
208
220
|
}
|
221
|
+
|
222
|
+
void
|
223
|
+
arrow_table_get_schema(uintptr_t data, duckdb::ArrowSchemaWrapper &schema)
|
224
|
+
{
|
225
|
+
auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
|
226
|
+
auto arrow_table = garrow_table_get_raw(garrow_table);
|
227
|
+
auto export_schema_status = arrow::ExportSchema(*(arrow_table->schema()),
|
228
|
+
reinterpret_cast<ArrowSchema *>(&schema));
|
229
|
+
if (!export_schema_status.ok()) {
|
230
|
+
throw std::runtime_error(
|
231
|
+
std::string("[arrow][get_schema] failed to export schema: ") +
|
232
|
+
export_schema_status.ToString());
|
233
|
+
}
|
234
|
+
}
|
209
235
|
}
|
210
236
|
|
211
237
|
namespace arrow_duckdb {
|
@@ -224,14 +250,13 @@ namespace arrow_duckdb {
|
|
224
250
|
{
|
225
251
|
auto c_name = StringValueCStr(name);
|
226
252
|
auto garrow_table = RVAL2GOBJ(arrow_table);
|
227
|
-
const idx_t rows_per_tuple = 1000000;
|
228
253
|
reinterpret_cast<duckdb::Connection *>(connection)
|
229
254
|
->TableFunction(
|
230
255
|
"arrow_scan",
|
231
256
|
{
|
232
257
|
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(garrow_table)),
|
233
258
|
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_produce)),
|
234
|
-
duckdb::Value::
|
259
|
+
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_get_schema)),
|
235
260
|
})
|
236
261
|
->CreateView(c_name, true, true);
|
237
262
|
}
|
@@ -20,7 +20,9 @@
|
|
20
20
|
|
21
21
|
#include <rbgobject.h>
|
22
22
|
|
23
|
+
extern "C" {
|
23
24
|
#include <ruby-duckdb.h>
|
25
|
+
}
|
24
26
|
|
25
27
|
#include "arrow-duckdb-registration.hpp"
|
26
28
|
|
@@ -99,7 +101,7 @@ namespace {
|
|
99
101
|
result_ensure_gschema(Result *result)
|
100
102
|
{
|
101
103
|
ArrowSchema c_abi_schema;
|
102
|
-
|
104
|
+
auto schema = reinterpret_cast<duckdb_arrow_schema>(&c_abi_schema);
|
103
105
|
auto state = duckdb_query_arrow_schema(result->arrow, &schema);
|
104
106
|
if (state == DuckDBError) {
|
105
107
|
free(result->error_message);
|
@@ -121,7 +123,7 @@ namespace {
|
|
121
123
|
result_fetch_internal(VALUE self, Result *result)
|
122
124
|
{
|
123
125
|
ArrowArray c_abi_array = {};
|
124
|
-
|
126
|
+
auto array = reinterpret_cast<duckdb_arrow_array>(&c_abi_array);
|
125
127
|
auto state = duckdb_query_arrow_array(result->arrow, &array);
|
126
128
|
if (state == DuckDBError) {
|
127
129
|
free(result->error_message);
|
@@ -219,8 +221,7 @@ namespace {
|
|
219
221
|
VALUE
|
220
222
|
query_sql_arrow(VALUE self, VALUE sql)
|
221
223
|
{
|
222
|
-
|
223
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
224
|
+
auto ctx = get_struct_connection(self);
|
224
225
|
|
225
226
|
if (!(ctx->con)) {
|
226
227
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -253,8 +254,7 @@ namespace {
|
|
253
254
|
VALUE
|
254
255
|
query_unregister_arrow(VALUE self, VALUE name)
|
255
256
|
{
|
256
|
-
|
257
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
257
|
+
auto ctx = get_struct_connection(self);
|
258
258
|
|
259
259
|
if (!(ctx->con)) {
|
260
260
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -293,8 +293,7 @@ namespace {
|
|
293
293
|
VALUE
|
294
294
|
query_register_arrow(VALUE self, VALUE name, VALUE arrow_table)
|
295
295
|
{
|
296
|
-
|
297
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
296
|
+
auto ctx = get_struct_connection(self);
|
298
297
|
|
299
298
|
if (!(ctx->con)) {
|
300
299
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -329,8 +328,7 @@ namespace {
|
|
329
328
|
VALUE
|
330
329
|
prepared_statement_execute_arrow(VALUE self)
|
331
330
|
{
|
332
|
-
|
333
|
-
Data_Get_Struct(self, rubyDuckDBPreparedStatement, ctx);
|
331
|
+
auto ctx = get_struct_prepared_statement(self);
|
334
332
|
|
335
333
|
ID id_new;
|
336
334
|
CONST_ID(id_new, "new");
|
data/ext/arrow-duckdb/extconf.rb
CHANGED
@@ -16,7 +16,7 @@ require "extpp"
|
|
16
16
|
require "mkmf-gnome"
|
17
17
|
require "native-package-installer"
|
18
18
|
|
19
|
-
checking_for(checking_message("Homebrew")) do
|
19
|
+
homebrew = checking_for(checking_message("Homebrew")) do
|
20
20
|
case NativePackageInstaller::Platform.detect
|
21
21
|
when NativePackageInstaller::Platform::Homebrew
|
22
22
|
openssl_prefix = `brew --prefix openssl`.chomp
|
@@ -40,6 +40,10 @@ unless have_library("duckdb")
|
|
40
40
|
install_missing_native_package(debian: "libduckdb-dev",
|
41
41
|
redhat: "duckdb-devel",
|
42
42
|
homebrew: "duckdb") or exit(false)
|
43
|
+
if homebrew
|
44
|
+
$INCFLAGS << " -I" << File.join(`brew --prefix duckdb`.chomp, "include")
|
45
|
+
$LIBPATH |= [File.join(`brew --prefix duckdb`.chomp, "lib")]
|
46
|
+
end
|
43
47
|
have_library("duckdb") or exit(false)
|
44
48
|
end
|
45
49
|
|
data/lib/arrow-duckdb/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
1
|
+
# Copyright 2021-2022 Sutou Kouhei <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,5 +13,5 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
module ArrowDuckDB
|
16
|
-
VERSION = "1.0.
|
16
|
+
VERSION = "1.0.3"
|
17
17
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow-duckdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sutou Kouhei
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2024-10-10 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: duckdb
|
@@ -104,7 +103,6 @@ homepage: https://github.com/red-data-tools/red-arrow-duckdb
|
|
104
103
|
licenses:
|
105
104
|
- Apache-2.0
|
106
105
|
metadata: {}
|
107
|
-
post_install_message:
|
108
106
|
rdoc_options: []
|
109
107
|
require_paths:
|
110
108
|
- lib
|
@@ -119,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
117
|
- !ruby/object:Gem::Version
|
120
118
|
version: '0'
|
121
119
|
requirements: []
|
122
|
-
rubygems_version: 3.
|
123
|
-
signing_key:
|
120
|
+
rubygems_version: 3.6.0.dev
|
124
121
|
specification_version: 4
|
125
122
|
summary: Red Arrow DuckDB is a library that provides Apache Arrow support to ruby-duckdb.
|
126
123
|
test_files: []
|