red-arrow-duckdb 1.0.1 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc/text/news.md +26 -1
- data/ext/arrow-duckdb/arrow-duckdb-registration.cpp +57 -32
- data/ext/arrow-duckdb/arrow-duckdb.cpp +8 -10
- data/ext/arrow-duckdb/extconf.rb +5 -1
- data/lib/arrow-duckdb/version.rb +2 -2
- metadata +3 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63fb3f7af8e84c386b1dcec7309b967d507ec85a65355471167c15426a5cf2f3
|
4
|
+
data.tar.gz: 9ae59ace06c47648b5fd4fa14723773de03f80c1f51f121e1b8fa3b225d1c56a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b63175c712cce70c9a48f2becb6581ff05ac5d153257bd740a178626222508f3f74c5bdab9e3618bc977e0b605fc79aa181c81cce8cfeaa33671d2ccf07bf763
|
7
|
+
data.tar.gz: 350c3eb4925cc5bd76af437c977f0cc8959c3ed91fcb4f119cb85c1c6f43a8709cb166dd993e53e9088edd4747b5950b875f0cc3548b03e512813c43b023e207
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,29 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 1.0.3 - 2024-10-11
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for DuckDB 1.1.0
|
8
|
+
* GH-4
|
9
|
+
* Patch by Sten Larsson
|
10
|
+
|
11
|
+
### Thanks
|
12
|
+
|
13
|
+
* Sten Larsson
|
14
|
+
|
15
|
+
## 1.0.2 - 2022-03-06
|
16
|
+
|
17
|
+
### Improvements
|
18
|
+
|
19
|
+
* Added support for timestamp value pushdown.
|
20
|
+
[GitHub#2](https://github.com/red-data-tools/red-arrow-duckdb/issues/2)
|
21
|
+
[Reported by Torsten Sprenger]
|
22
|
+
|
23
|
+
### Thanks
|
24
|
+
|
25
|
+
* Torsten Sprenger
|
26
|
+
|
3
27
|
## 1.0.1 - 2021-11-06
|
4
28
|
|
5
29
|
### Improvements
|
@@ -7,7 +31,8 @@
|
|
7
31
|
* macOS: Don't require specifying `PKG_CONFIG_PATH` on install such
|
8
32
|
as `PKG_CONFIG_PATH=$(brew --prefix openssl)/lib/pkgconfig gem
|
9
33
|
install red-arrow-duckdb`.
|
10
|
-
[GitHub#1]
|
34
|
+
[GitHub#1](https://github.com/red-data-tools/red-arrow-duckdb/issues/2)
|
35
|
+
[Reported by Konstantin Ilchenko]
|
11
36
|
|
12
37
|
### Thanks
|
13
38
|
|
@@ -24,7 +24,8 @@
|
|
24
24
|
#include <duckdb.hpp>
|
25
25
|
#ifndef DUCKDB_AMALGAMATION
|
26
26
|
# include <duckdb.h>
|
27
|
-
# include <duckdb/common/arrow_wrapper.hpp>
|
27
|
+
# include <duckdb/common/arrow/arrow_wrapper.hpp>
|
28
|
+
# include <duckdb/function/table/arrow.hpp>
|
28
29
|
# include <duckdb/function/table_function.hpp>
|
29
30
|
# include <duckdb/main/connection.hpp>
|
30
31
|
# include <duckdb/planner/filter/conjunction_filter.hpp>
|
@@ -35,6 +36,22 @@
|
|
35
36
|
#include "arrow-duckdb-registration.hpp"
|
36
37
|
|
37
38
|
namespace {
|
39
|
+
std::shared_ptr<arrow::Scalar>
|
40
|
+
convert_constant_timestamp(duckdb::Value &value, arrow::TimeUnit::type unit)
|
41
|
+
{
|
42
|
+
auto scalar_result =
|
43
|
+
arrow::MakeScalar(arrow::timestamp(unit), value.GetValue<int64_t>());
|
44
|
+
if (!scalar_result.ok()) {
|
45
|
+
throw duckdb::InvalidInputException(
|
46
|
+
"[arrow][filter][pushdown][%s] "
|
47
|
+
"failed to convert to Apache Arrow scalar: %s: <%s>",
|
48
|
+
value.type().ToString(),
|
49
|
+
scalar_result.status().ToString(),
|
50
|
+
value.ToString());
|
51
|
+
}
|
52
|
+
return *scalar_result;
|
53
|
+
}
|
54
|
+
|
38
55
|
std::shared_ptr<arrow::Scalar>
|
39
56
|
convert_constant(duckdb::Value &value)
|
40
57
|
{
|
@@ -55,9 +72,14 @@ namespace {
|
|
55
72
|
// return arrow::MakeScalar(arrow::date32(), value.GetValue<int32_t>());
|
56
73
|
// case duckdb::LogicalTypeId::TIME:
|
57
74
|
// return arrow::MakeScalar(arrow::time64(), value.GetValue<int64_t>());
|
58
|
-
|
59
|
-
|
60
|
-
|
75
|
+
case duckdb::LogicalTypeId::TIMESTAMP_SEC:
|
76
|
+
return convert_constant_timestamp(value, arrow::TimeUnit::SECOND);
|
77
|
+
case duckdb::LogicalTypeId::TIMESTAMP_MS:
|
78
|
+
return convert_constant_timestamp(value, arrow::TimeUnit::MILLI);
|
79
|
+
case duckdb::LogicalTypeId::TIMESTAMP:
|
80
|
+
return convert_constant_timestamp(value, arrow::TimeUnit::MICRO);
|
81
|
+
case duckdb::LogicalTypeId::TIMESTAMP_NS:
|
82
|
+
return convert_constant_timestamp(value, arrow::TimeUnit::NANO);
|
61
83
|
case duckdb::LogicalTypeId::UTINYINT:
|
62
84
|
return arrow::MakeScalar(value.GetValue<uint8_t>());
|
63
85
|
case duckdb::LogicalTypeId::USMALLINT:
|
@@ -75,7 +97,7 @@ namespace {
|
|
75
97
|
// case LogicalTypeId::DECIMAL:
|
76
98
|
default:
|
77
99
|
throw duckdb::NotImplementedException(
|
78
|
-
"[arrow][filter][pushdown] not implemented value type
|
100
|
+
"[arrow][filter][pushdown][%s] not implemented value type",
|
79
101
|
value.type().ToString());
|
80
102
|
}
|
81
103
|
}
|
@@ -141,14 +163,11 @@ namespace {
|
|
141
163
|
}
|
142
164
|
|
143
165
|
arrow::compute::Expression
|
144
|
-
convert_filters(
|
145
|
-
idx_t,
|
146
|
-
std::unique_ptr<duckdb::TableFilter>
|
147
|
-
> &filters,
|
166
|
+
convert_filters(duckdb::TableFilterSet *filter_set,
|
148
167
|
std::unordered_map<idx_t, std::string> &column_names)
|
149
168
|
{
|
150
169
|
std::vector<arrow::compute::Expression> expressions;
|
151
|
-
for (auto it = filters.begin(); it != filters.end(); ++it) {
|
170
|
+
for (auto it = filter_set->filters.begin(); it != filter_set->filters.end(); ++it) {
|
152
171
|
expressions.emplace_back(
|
153
172
|
std::move(convert_filter(it->second.get(), column_names[it->first])));
|
154
173
|
}
|
@@ -157,11 +176,7 @@ namespace {
|
|
157
176
|
|
158
177
|
arrow::Result<std::unique_ptr<duckdb::ArrowArrayStreamWrapper>>
|
159
178
|
arrow_table_produce_internal(uintptr_t data,
|
160
|
-
|
161
|
-
std::unordered_map<idx_t, std::string>,
|
162
|
-
std::vector<std::string>
|
163
|
-
> &project_columns,
|
164
|
-
duckdb::TableFilterCollection *filters)
|
179
|
+
duckdb::ArrowStreamParameters ¶meters)
|
165
180
|
{
|
166
181
|
auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
|
167
182
|
auto arrow_table = garrow_table_get_raw(garrow_table);
|
@@ -169,20 +184,22 @@ namespace {
|
|
169
184
|
std::make_shared<arrow::dataset::InMemoryDataset>(arrow_table);
|
170
185
|
ARROW_ASSIGN_OR_RAISE(auto scanner_builder, dataset->NewScan());
|
171
186
|
bool have_filter =
|
172
|
-
filters &&
|
173
|
-
filters->
|
174
|
-
!filters->table_filters->filters.empty();
|
187
|
+
parameters.filters &&
|
188
|
+
!parameters.filters->filters.empty();
|
175
189
|
if (have_filter) {
|
176
190
|
ARROW_RETURN_NOT_OK(
|
177
|
-
scanner_builder->Filter(
|
178
|
-
|
191
|
+
scanner_builder->Filter(
|
192
|
+
convert_filters(parameters.filters,
|
193
|
+
parameters.projected_columns.projection_map)));
|
179
194
|
}
|
180
|
-
if (!
|
181
|
-
ARROW_RETURN_NOT_OK(
|
195
|
+
if (!parameters.projected_columns.columns.empty()) {
|
196
|
+
ARROW_RETURN_NOT_OK(
|
197
|
+
scanner_builder->Project(
|
198
|
+
parameters.projected_columns.columns));
|
182
199
|
}
|
183
200
|
ARROW_ASSIGN_OR_RAISE(auto scanner, scanner_builder->Finish());
|
184
201
|
ARROW_ASSIGN_OR_RAISE(auto reader, scanner->ToRecordBatchReader());
|
185
|
-
auto stream_wrapper = duckdb::
|
202
|
+
auto stream_wrapper = duckdb::make_uniq<duckdb::ArrowArrayStreamWrapper>();
|
186
203
|
ARROW_RETURN_NOT_OK(
|
187
204
|
arrow::ExportRecordBatchReader(reader,
|
188
205
|
&(stream_wrapper->arrow_array_stream)));
|
@@ -191,14 +208,9 @@ namespace {
|
|
191
208
|
|
192
209
|
std::unique_ptr<duckdb::ArrowArrayStreamWrapper>
|
193
210
|
arrow_table_produce(uintptr_t data,
|
194
|
-
|
195
|
-
std::unordered_map<idx_t, std::string>,
|
196
|
-
std::vector<std::string>
|
197
|
-
> &project_columns,
|
198
|
-
duckdb::TableFilterCollection *filters)
|
211
|
+
duckdb::ArrowStreamParameters ¶meters)
|
199
212
|
{
|
200
|
-
auto stream_wrapper_result =
|
201
|
-
arrow_table_produce_internal(data, project_columns, filters);
|
213
|
+
auto stream_wrapper_result = arrow_table_produce_internal(data, parameters);
|
202
214
|
if (!stream_wrapper_result.ok()) {
|
203
215
|
throw std::runtime_error(
|
204
216
|
std::string("[arrow][produce] failed to produce: ") +
|
@@ -206,6 +218,20 @@ namespace {
|
|
206
218
|
}
|
207
219
|
return std::move(*stream_wrapper_result);
|
208
220
|
}
|
221
|
+
|
222
|
+
void
|
223
|
+
arrow_table_get_schema(uintptr_t data, duckdb::ArrowSchemaWrapper &schema)
|
224
|
+
{
|
225
|
+
auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
|
226
|
+
auto arrow_table = garrow_table_get_raw(garrow_table);
|
227
|
+
auto export_schema_status = arrow::ExportSchema(*(arrow_table->schema()),
|
228
|
+
reinterpret_cast<ArrowSchema *>(&schema));
|
229
|
+
if (!export_schema_status.ok()) {
|
230
|
+
throw std::runtime_error(
|
231
|
+
std::string("[arrow][get_schema] failed to export schema: ") +
|
232
|
+
export_schema_status.ToString());
|
233
|
+
}
|
234
|
+
}
|
209
235
|
}
|
210
236
|
|
211
237
|
namespace arrow_duckdb {
|
@@ -224,14 +250,13 @@ namespace arrow_duckdb {
|
|
224
250
|
{
|
225
251
|
auto c_name = StringValueCStr(name);
|
226
252
|
auto garrow_table = RVAL2GOBJ(arrow_table);
|
227
|
-
const idx_t rows_per_tuple = 1000000;
|
228
253
|
reinterpret_cast<duckdb::Connection *>(connection)
|
229
254
|
->TableFunction(
|
230
255
|
"arrow_scan",
|
231
256
|
{
|
232
257
|
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(garrow_table)),
|
233
258
|
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_produce)),
|
234
|
-
duckdb::Value::
|
259
|
+
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_get_schema)),
|
235
260
|
})
|
236
261
|
->CreateView(c_name, true, true);
|
237
262
|
}
|
@@ -20,7 +20,9 @@
|
|
20
20
|
|
21
21
|
#include <rbgobject.h>
|
22
22
|
|
23
|
+
extern "C" {
|
23
24
|
#include <ruby-duckdb.h>
|
25
|
+
}
|
24
26
|
|
25
27
|
#include "arrow-duckdb-registration.hpp"
|
26
28
|
|
@@ -99,7 +101,7 @@ namespace {
|
|
99
101
|
result_ensure_gschema(Result *result)
|
100
102
|
{
|
101
103
|
ArrowSchema c_abi_schema;
|
102
|
-
|
104
|
+
auto schema = reinterpret_cast<duckdb_arrow_schema>(&c_abi_schema);
|
103
105
|
auto state = duckdb_query_arrow_schema(result->arrow, &schema);
|
104
106
|
if (state == DuckDBError) {
|
105
107
|
free(result->error_message);
|
@@ -121,7 +123,7 @@ namespace {
|
|
121
123
|
result_fetch_internal(VALUE self, Result *result)
|
122
124
|
{
|
123
125
|
ArrowArray c_abi_array = {};
|
124
|
-
|
126
|
+
auto array = reinterpret_cast<duckdb_arrow_array>(&c_abi_array);
|
125
127
|
auto state = duckdb_query_arrow_array(result->arrow, &array);
|
126
128
|
if (state == DuckDBError) {
|
127
129
|
free(result->error_message);
|
@@ -219,8 +221,7 @@ namespace {
|
|
219
221
|
VALUE
|
220
222
|
query_sql_arrow(VALUE self, VALUE sql)
|
221
223
|
{
|
222
|
-
|
223
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
224
|
+
auto ctx = get_struct_connection(self);
|
224
225
|
|
225
226
|
if (!(ctx->con)) {
|
226
227
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -253,8 +254,7 @@ namespace {
|
|
253
254
|
VALUE
|
254
255
|
query_unregister_arrow(VALUE self, VALUE name)
|
255
256
|
{
|
256
|
-
|
257
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
257
|
+
auto ctx = get_struct_connection(self);
|
258
258
|
|
259
259
|
if (!(ctx->con)) {
|
260
260
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -293,8 +293,7 @@ namespace {
|
|
293
293
|
VALUE
|
294
294
|
query_register_arrow(VALUE self, VALUE name, VALUE arrow_table)
|
295
295
|
{
|
296
|
-
|
297
|
-
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
296
|
+
auto ctx = get_struct_connection(self);
|
298
297
|
|
299
298
|
if (!(ctx->con)) {
|
300
299
|
rb_raise(eDuckDBError, "Database connection closed");
|
@@ -329,8 +328,7 @@ namespace {
|
|
329
328
|
VALUE
|
330
329
|
prepared_statement_execute_arrow(VALUE self)
|
331
330
|
{
|
332
|
-
|
333
|
-
Data_Get_Struct(self, rubyDuckDBPreparedStatement, ctx);
|
331
|
+
auto ctx = get_struct_prepared_statement(self);
|
334
332
|
|
335
333
|
ID id_new;
|
336
334
|
CONST_ID(id_new, "new");
|
data/ext/arrow-duckdb/extconf.rb
CHANGED
@@ -16,7 +16,7 @@ require "extpp"
|
|
16
16
|
require "mkmf-gnome"
|
17
17
|
require "native-package-installer"
|
18
18
|
|
19
|
-
checking_for(checking_message("Homebrew")) do
|
19
|
+
homebrew = checking_for(checking_message("Homebrew")) do
|
20
20
|
case NativePackageInstaller::Platform.detect
|
21
21
|
when NativePackageInstaller::Platform::Homebrew
|
22
22
|
openssl_prefix = `brew --prefix openssl`.chomp
|
@@ -40,6 +40,10 @@ unless have_library("duckdb")
|
|
40
40
|
install_missing_native_package(debian: "libduckdb-dev",
|
41
41
|
redhat: "duckdb-devel",
|
42
42
|
homebrew: "duckdb") or exit(false)
|
43
|
+
if homebrew
|
44
|
+
$INCFLAGS << " -I" << File.join(`brew --prefix duckdb`.chomp, "include")
|
45
|
+
$LIBPATH |= [File.join(`brew --prefix duckdb`.chomp, "lib")]
|
46
|
+
end
|
43
47
|
have_library("duckdb") or exit(false)
|
44
48
|
end
|
45
49
|
|
data/lib/arrow-duckdb/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
1
|
+
# Copyright 2021-2022 Sutou Kouhei <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,5 +13,5 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
module ArrowDuckDB
|
16
|
-
VERSION = "1.0.
|
16
|
+
VERSION = "1.0.3"
|
17
17
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-arrow-duckdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sutou Kouhei
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2024-10-10 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: duckdb
|
@@ -104,7 +103,6 @@ homepage: https://github.com/red-data-tools/red-arrow-duckdb
|
|
104
103
|
licenses:
|
105
104
|
- Apache-2.0
|
106
105
|
metadata: {}
|
107
|
-
post_install_message:
|
108
106
|
rdoc_options: []
|
109
107
|
require_paths:
|
110
108
|
- lib
|
@@ -119,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
117
|
- !ruby/object:Gem::Version
|
120
118
|
version: '0'
|
121
119
|
requirements: []
|
122
|
-
rubygems_version: 3.
|
123
|
-
signing_key:
|
120
|
+
rubygems_version: 3.6.0.dev
|
124
121
|
specification_version: 4
|
125
122
|
summary: Red Arrow DuckDB is a library that provides Apache Arrow support to ruby-duckdb.
|
126
123
|
test_files: []
|