red-arrow-duckdb 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +111 -0
- data/doc/text/apache-2.0.txt +202 -0
- data/doc/text/news.md +5 -0
- data/ext/arrow-duckdb/arrow-duckdb-registration.cpp +238 -0
- data/ext/arrow-duckdb/arrow-duckdb-registration.hpp +27 -0
- data/ext/arrow-duckdb/arrow-duckdb.cpp +404 -0
- data/ext/arrow-duckdb/extconf.rb +33 -0
- data/lib/arrow-duckdb.rb +23 -0
- data/lib/arrow-duckdb/connection.rb +37 -0
- data/lib/arrow-duckdb/result.rb +21 -0
- data/lib/arrow-duckdb/version.rb +17 -0
- data/red-arrow-duckdb.gemspec +50 -0
- metadata +126 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d9abf186818144a8c4bcd7520f9d95c13f8dc1f1aec94b1a14317e3f904f8cbb
|
4
|
+
data.tar.gz: 910219122fb854cd0772a1b48a53c688f67d1a597482bed2462a0eda3109b01d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 35141392b47728e0ad929d97d1afc100f137465519d7da270f44fbb07e93ed8a8bd1bcdf99f14f26371da2d2f990d48c8565ef96cc8380293d90634b95011d53
|
7
|
+
data.tar.gz: 0cb4e44a168f4dc92cfee10dabc938765981de4823e852f96649e6a8ffad1f61705ac5c722270ea7ba26ec2077083870c12377e1e47d1fb7442613f1598b3269
|
data/README.md
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
# README
|
2
|
+
|
3
|
+
## Name
|
4
|
+
|
5
|
+
Red Arrow DuckDB
|
6
|
+
|
7
|
+
## Description
|
8
|
+
|
9
|
+
Red Arrow DuckDB is a library that provides Apache Arrow support to ruby-duckdb.
|
10
|
+
|
11
|
+
## Install
|
12
|
+
|
13
|
+
```bash
|
14
|
+
gem install red-arrow-duckdb
|
15
|
+
```
|
16
|
+
|
17
|
+
## Usage
|
18
|
+
|
19
|
+
### Receive result as Apache Arrow data
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
require "arrow-duckdb"
|
23
|
+
|
24
|
+
DuckDB::Database.open do |db|
|
25
|
+
db.connect do |connection|
|
26
|
+
connection.query('CREATE TABLE users (id INTEGER, name VARCHAR(30))')
|
27
|
+
|
28
|
+
connection.query("INSERT into users VALUES(1, 'Alice')")
|
29
|
+
connection.query("INSERT into users VALUES(2, 'Bob')")
|
30
|
+
connection.query("INSERT into users VALUES(3, 'Cathy')")
|
31
|
+
|
32
|
+
result = connection.query("SELECT * FROM users", output: :arrow)
|
33
|
+
puts(result.to_table)
|
34
|
+
# id name
|
35
|
+
# 0 1 Alice
|
36
|
+
# 1 2 Bob
|
37
|
+
# 2 3 Cathy
|
38
|
+
end
|
39
|
+
end
|
40
|
+
```
|
41
|
+
|
42
|
+
### Use Apache Arrow data as input
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
require "arrow-duckdb"
|
46
|
+
|
47
|
+
users = Arrow::Table.new("id" => [1, 2, 3],
|
48
|
+
"name" => ["Alice", "Bob", "Cathy"])
|
49
|
+
DuckDB::Database.open do |db|
|
50
|
+
db.connect do |connection|
|
51
|
+
connection.register("users", users) do
|
52
|
+
connection.query("SELECT * FROM users").each do |row|
|
53
|
+
p row
|
54
|
+
# ["1", "Alice"]
|
55
|
+
# ["2", "Bob"]
|
56
|
+
# ["3", "Cathy"]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
```
|
62
|
+
|
63
|
+
### Filter Apache Arrow data by DuckDB
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
require "arrow-duckdb"
|
67
|
+
|
68
|
+
users = Arrow::Table.new("id" => [1, 2, 3],
|
69
|
+
"name" => ["Alice", "Bob", "Cathy"])
|
70
|
+
DuckDB::Database.open do |db|
|
71
|
+
db.connect do |connection|
|
72
|
+
filtered_users = connection.register("users", users) do
|
73
|
+
result = connection.query("SELECT * FROM users WHERE id > ?",
|
74
|
+
1,
|
75
|
+
output: :arrow)
|
76
|
+
result.to_table
|
77
|
+
end
|
78
|
+
puts(filtered_users)
|
79
|
+
# id name
|
80
|
+
# 0 2 Bob
|
81
|
+
# 1 3 Cathy
|
82
|
+
|
83
|
+
# Use filtered data again
|
84
|
+
connection.register("filtered_users", filtered_users) do
|
85
|
+
result = connection.query("SELECT * FROM filtered_users",
|
86
|
+
output: :arrow)
|
87
|
+
puts(result.to_table)
|
88
|
+
# id name
|
89
|
+
# 0 2 Bob
|
90
|
+
# 1 3 Cathy
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
```
|
95
|
+
|
96
|
+
## Dependencies
|
97
|
+
|
98
|
+
* [Red Arrow](https://github.com/apache/arrow/tree/master/ruby/red-arrow)
|
99
|
+
|
100
|
+
* [ruby-duckdb](https://github.com/suketa/ruby-duckdb)
|
101
|
+
|
102
|
+
## Authors
|
103
|
+
|
104
|
+
* Sutou Kouhei \<kou@clear-code.com\>
|
105
|
+
|
106
|
+
## License
|
107
|
+
|
108
|
+
Apache License 2.0. See doc/text/apache-2.0.txt for details.
|
109
|
+
|
110
|
+
(Sutou Kouhei has a right to change the license including contributed
|
111
|
+
patches.)
|
@@ -0,0 +1,202 @@
|
|
1
|
+
|
2
|
+
Apache License
|
3
|
+
Version 2.0, January 2004
|
4
|
+
http://www.apache.org/licenses/
|
5
|
+
|
6
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7
|
+
|
8
|
+
1. Definitions.
|
9
|
+
|
10
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
11
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
12
|
+
|
13
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14
|
+
the copyright owner that is granting the License.
|
15
|
+
|
16
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
17
|
+
other entities that control, are controlled by, or are under common
|
18
|
+
control with that entity. For the purposes of this definition,
|
19
|
+
"control" means (i) the power, direct or indirect, to cause the
|
20
|
+
direction or management of such entity, whether by contract or
|
21
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23
|
+
|
24
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25
|
+
exercising permissions granted by this License.
|
26
|
+
|
27
|
+
"Source" form shall mean the preferred form for making modifications,
|
28
|
+
including but not limited to software source code, documentation
|
29
|
+
source, and configuration files.
|
30
|
+
|
31
|
+
"Object" form shall mean any form resulting from mechanical
|
32
|
+
transformation or translation of a Source form, including but
|
33
|
+
not limited to compiled object code, generated documentation,
|
34
|
+
and conversions to other media types.
|
35
|
+
|
36
|
+
"Work" shall mean the work of authorship, whether in Source or
|
37
|
+
Object form, made available under the License, as indicated by a
|
38
|
+
copyright notice that is included in or attached to the work
|
39
|
+
(an example is provided in the Appendix below).
|
40
|
+
|
41
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42
|
+
form, that is based on (or derived from) the Work and for which the
|
43
|
+
editorial revisions, annotations, elaborations, or other modifications
|
44
|
+
represent, as a whole, an original work of authorship. For the purposes
|
45
|
+
of this License, Derivative Works shall not include works that remain
|
46
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47
|
+
the Work and Derivative Works thereof.
|
48
|
+
|
49
|
+
"Contribution" shall mean any work of authorship, including
|
50
|
+
the original version of the Work and any modifications or additions
|
51
|
+
to that Work or Derivative Works thereof, that is intentionally
|
52
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
55
|
+
means any form of electronic, verbal, or written communication sent
|
56
|
+
to the Licensor or its representatives, including but not limited to
|
57
|
+
communication on electronic mailing lists, source code control systems,
|
58
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
59
|
+
Licensor for the purpose of discussing and improving the Work, but
|
60
|
+
excluding communication that is conspicuously marked or otherwise
|
61
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
62
|
+
|
63
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64
|
+
on behalf of whom a Contribution has been received by Licensor and
|
65
|
+
subsequently incorporated within the Work.
|
66
|
+
|
67
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68
|
+
this License, each Contributor hereby grants to You a perpetual,
|
69
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70
|
+
copyright license to reproduce, prepare Derivative Works of,
|
71
|
+
publicly display, publicly perform, sublicense, and distribute the
|
72
|
+
Work and such Derivative Works in Source or Object form.
|
73
|
+
|
74
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75
|
+
this License, each Contributor hereby grants to You a perpetual,
|
76
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77
|
+
(except as stated in this section) patent license to make, have made,
|
78
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79
|
+
where such license applies only to those patent claims licensable
|
80
|
+
by such Contributor that are necessarily infringed by their
|
81
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
82
|
+
with the Work to which such Contribution(s) was submitted. If You
|
83
|
+
institute patent litigation against any entity (including a
|
84
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85
|
+
or a Contribution incorporated within the Work constitutes direct
|
86
|
+
or contributory patent infringement, then any patent licenses
|
87
|
+
granted to You under this License for that Work shall terminate
|
88
|
+
as of the date such litigation is filed.
|
89
|
+
|
90
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
91
|
+
Work or Derivative Works thereof in any medium, with or without
|
92
|
+
modifications, and in Source or Object form, provided that You
|
93
|
+
meet the following conditions:
|
94
|
+
|
95
|
+
(a) You must give any other recipients of the Work or
|
96
|
+
Derivative Works a copy of this License; and
|
97
|
+
|
98
|
+
(b) You must cause any modified files to carry prominent notices
|
99
|
+
stating that You changed the files; and
|
100
|
+
|
101
|
+
(c) You must retain, in the Source form of any Derivative Works
|
102
|
+
that You distribute, all copyright, patent, trademark, and
|
103
|
+
attribution notices from the Source form of the Work,
|
104
|
+
excluding those notices that do not pertain to any part of
|
105
|
+
the Derivative Works; and
|
106
|
+
|
107
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108
|
+
distribution, then any Derivative Works that You distribute must
|
109
|
+
include a readable copy of the attribution notices contained
|
110
|
+
within such NOTICE file, excluding those notices that do not
|
111
|
+
pertain to any part of the Derivative Works, in at least one
|
112
|
+
of the following places: within a NOTICE text file distributed
|
113
|
+
as part of the Derivative Works; within the Source form or
|
114
|
+
documentation, if provided along with the Derivative Works; or,
|
115
|
+
within a display generated by the Derivative Works, if and
|
116
|
+
wherever such third-party notices normally appear. The contents
|
117
|
+
of the NOTICE file are for informational purposes only and
|
118
|
+
do not modify the License. You may add Your own attribution
|
119
|
+
notices within Derivative Works that You distribute, alongside
|
120
|
+
or as an addendum to the NOTICE text from the Work, provided
|
121
|
+
that such additional attribution notices cannot be construed
|
122
|
+
as modifying the License.
|
123
|
+
|
124
|
+
You may add Your own copyright statement to Your modifications and
|
125
|
+
may provide additional or different license terms and conditions
|
126
|
+
for use, reproduction, or distribution of Your modifications, or
|
127
|
+
for any such Derivative Works as a whole, provided Your use,
|
128
|
+
reproduction, and distribution of the Work otherwise complies with
|
129
|
+
the conditions stated in this License.
|
130
|
+
|
131
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132
|
+
any Contribution intentionally submitted for inclusion in the Work
|
133
|
+
by You to the Licensor shall be under the terms and conditions of
|
134
|
+
this License, without any additional terms or conditions.
|
135
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136
|
+
the terms of any separate license agreement you may have executed
|
137
|
+
with Licensor regarding such Contributions.
|
138
|
+
|
139
|
+
6. Trademarks. This License does not grant permission to use the trade
|
140
|
+
names, trademarks, service marks, or product names of the Licensor,
|
141
|
+
except as required for reasonable and customary use in describing the
|
142
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
143
|
+
|
144
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145
|
+
agreed to in writing, Licensor provides the Work (and each
|
146
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148
|
+
implied, including, without limitation, any warranties or conditions
|
149
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151
|
+
appropriateness of using or redistributing the Work and assume any
|
152
|
+
risks associated with Your exercise of permissions under this License.
|
153
|
+
|
154
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
155
|
+
whether in tort (including negligence), contract, or otherwise,
|
156
|
+
unless required by applicable law (such as deliberate and grossly
|
157
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158
|
+
liable to You for damages, including any direct, indirect, special,
|
159
|
+
incidental, or consequential damages of any character arising as a
|
160
|
+
result of this License or out of the use or inability to use the
|
161
|
+
Work (including but not limited to damages for loss of goodwill,
|
162
|
+
work stoppage, computer failure or malfunction, or any and all
|
163
|
+
other commercial damages or losses), even if such Contributor
|
164
|
+
has been advised of the possibility of such damages.
|
165
|
+
|
166
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
168
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169
|
+
or other liability obligations and/or rights consistent with this
|
170
|
+
License. However, in accepting such obligations, You may act only
|
171
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172
|
+
of any other Contributor, and only if You agree to indemnify,
|
173
|
+
defend, and hold each Contributor harmless for any liability
|
174
|
+
incurred by, or claims asserted against, such Contributor by reason
|
175
|
+
of your accepting any such warranty or additional liability.
|
176
|
+
|
177
|
+
END OF TERMS AND CONDITIONS
|
178
|
+
|
179
|
+
APPENDIX: How to apply the Apache License to your work.
|
180
|
+
|
181
|
+
To apply the Apache License to your work, attach the following
|
182
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
183
|
+
replaced with your own identifying information. (Don't include
|
184
|
+
the brackets!) The text should be enclosed in the appropriate
|
185
|
+
comment syntax for the file format. We also recommend that a
|
186
|
+
file or class name and description of purpose be included on the
|
187
|
+
same "printed page" as the copyright notice for easier
|
188
|
+
identification within third-party archives.
|
189
|
+
|
190
|
+
Copyright [yyyy] [name of copyright owner]
|
191
|
+
|
192
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193
|
+
you may not use this file except in compliance with the License.
|
194
|
+
You may obtain a copy of the License at
|
195
|
+
|
196
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
197
|
+
|
198
|
+
Unless required by applicable law or agreed to in writing, software
|
199
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201
|
+
See the License for the specific language governing permissions and
|
202
|
+
limitations under the License.
|
data/doc/text/news.md
ADDED
@@ -0,0 +1,238 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <arrow-glib/arrow-glib.hpp>
|
18
|
+
|
19
|
+
#include <arrow/c/bridge.h>
|
20
|
+
#include <arrow/dataset/api.h>
|
21
|
+
|
22
|
+
#include <rbgobject.h>
|
23
|
+
|
24
|
+
#include <duckdb.hpp>
|
25
|
+
#ifndef DUCKDB_AMALGAMATION
|
26
|
+
# include <duckdb.h>
|
27
|
+
# include <duckdb/common/arrow_wrapper.hpp>
|
28
|
+
# include <duckdb/function/table_function.hpp>
|
29
|
+
# include <duckdb/main/connection.hpp>
|
30
|
+
# include <duckdb/planner/filter/conjunction_filter.hpp>
|
31
|
+
# include <duckdb/planner/filter/constant_filter.hpp>
|
32
|
+
# include <duckdb/planner/table_filter.hpp>
|
33
|
+
#endif
|
34
|
+
|
35
|
+
#include "arrow-duckdb-registration.hpp"
|
36
|
+
|
37
|
+
namespace {
|
38
|
+
std::shared_ptr<arrow::Scalar>
|
39
|
+
convert_constant(duckdb::Value &value)
|
40
|
+
{
|
41
|
+
switch (value.type().id()) {
|
42
|
+
case duckdb::LogicalTypeId::BOOLEAN:
|
43
|
+
return arrow::MakeScalar(value.GetValue<bool>());
|
44
|
+
case duckdb::LogicalTypeId::TINYINT:
|
45
|
+
return arrow::MakeScalar(value.GetValue<int8_t>());
|
46
|
+
case duckdb::LogicalTypeId::SMALLINT:
|
47
|
+
return arrow::MakeScalar(value.GetValue<int16_t>());
|
48
|
+
case duckdb::LogicalTypeId::INTEGER:
|
49
|
+
return arrow::MakeScalar(value.GetValue<int32_t>());
|
50
|
+
case duckdb::LogicalTypeId::BIGINT:
|
51
|
+
return arrow::MakeScalar(value.GetValue<int64_t>());
|
52
|
+
// case duckdb::LogicalTypeId::HUGEINT:
|
53
|
+
// return arrow::MakeScalar(value.GetValue<duckdb::hugeint_t>());
|
54
|
+
// case duckdb::LogicalTypeId::DATE:
|
55
|
+
// return arrow::MakeScalar(arrow::date32(), value.GetValue<int32_t>());
|
56
|
+
// case duckdb::LogicalTypeId::TIME:
|
57
|
+
// return arrow::MakeScalar(arrow::time64(), value.GetValue<int64_t>());
|
58
|
+
// case duckdb::LogicalTypeId::TIMESTAMP:
|
59
|
+
// return arrow::MakeScalar(arrow::timestamp(),
|
60
|
+
// value.GetValue<int64_t>());
|
61
|
+
case duckdb::LogicalTypeId::UTINYINT:
|
62
|
+
return arrow::MakeScalar(value.GetValue<uint8_t>());
|
63
|
+
case duckdb::LogicalTypeId::USMALLINT:
|
64
|
+
return arrow::MakeScalar(value.GetValue<uint16_t>());
|
65
|
+
case duckdb::LogicalTypeId::UINTEGER:
|
66
|
+
return arrow::MakeScalar(value.GetValue<uint32_t>());
|
67
|
+
case duckdb::LogicalTypeId::UBIGINT:
|
68
|
+
return arrow::MakeScalar(value.GetValue<uint64_t>());
|
69
|
+
case duckdb::LogicalTypeId::FLOAT:
|
70
|
+
return arrow::MakeScalar(value.GetValue<float>());
|
71
|
+
case duckdb::LogicalTypeId::DOUBLE:
|
72
|
+
return arrow::MakeScalar(value.GetValue<double>());
|
73
|
+
case duckdb::LogicalTypeId::VARCHAR:
|
74
|
+
return arrow::MakeScalar(value.ToString());
|
75
|
+
// case LogicalTypeId::DECIMAL:
|
76
|
+
default:
|
77
|
+
throw duckdb::NotImplementedException(
|
78
|
+
"[arrow][filter][pushdown] not implemented value type: %s",
|
79
|
+
value.type().ToString());
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
arrow::compute::Expression
|
84
|
+
convert_filter(duckdb::TableFilter *filter,
|
85
|
+
std::string &column_name)
|
86
|
+
{
|
87
|
+
auto field = arrow::compute::field_ref(column_name);
|
88
|
+
switch (filter->filter_type) {
|
89
|
+
case duckdb::TableFilterType::CONSTANT_COMPARISON:
|
90
|
+
{
|
91
|
+
auto constant_filter = static_cast<duckdb::ConstantFilter *>(filter);
|
92
|
+
auto constant_scalar = convert_constant(constant_filter->constant);
|
93
|
+
auto constant = arrow::compute::literal(constant_scalar);
|
94
|
+
switch (constant_filter->comparison_type) {
|
95
|
+
case duckdb::ExpressionType::COMPARE_EQUAL:
|
96
|
+
return arrow::compute::equal(field, constant);
|
97
|
+
case duckdb::ExpressionType::COMPARE_LESSTHAN:
|
98
|
+
return arrow::compute::less(field, constant);
|
99
|
+
case duckdb::ExpressionType::COMPARE_GREATERTHAN:
|
100
|
+
return arrow::compute::greater(field, constant);
|
101
|
+
case duckdb::ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
102
|
+
return arrow::compute::less_equal(field, constant);
|
103
|
+
case duckdb::ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
104
|
+
return arrow::compute::greater_equal(field, constant);
|
105
|
+
default:
|
106
|
+
throw duckdb::NotImplementedException(
|
107
|
+
"[arrow][filter][pushdown] not implemented comparison type: %s",
|
108
|
+
duckdb::ExpressionTypeToString(constant_filter->comparison_type));
|
109
|
+
}
|
110
|
+
break;
|
111
|
+
}
|
112
|
+
case duckdb::TableFilterType::IS_NULL:
|
113
|
+
return arrow::compute::is_null(field);
|
114
|
+
case duckdb::TableFilterType::IS_NOT_NULL:
|
115
|
+
return arrow::compute::is_valid(field);
|
116
|
+
case duckdb::TableFilterType::CONJUNCTION_OR:
|
117
|
+
{
|
118
|
+
auto or_filter = static_cast<duckdb::ConjunctionOrFilter *>(filter);
|
119
|
+
std::vector<arrow::compute::Expression> sub_expressions;
|
120
|
+
for (auto &child_filter : or_filter->child_filters) {
|
121
|
+
sub_expressions.emplace_back(
|
122
|
+
std::move(convert_filter(child_filter.get(), column_name)));
|
123
|
+
}
|
124
|
+
return arrow::compute::or_(sub_expressions);
|
125
|
+
}
|
126
|
+
case duckdb::TableFilterType::CONJUNCTION_AND:
|
127
|
+
{
|
128
|
+
auto and_filter = static_cast<duckdb::ConjunctionAndFilter *>(filter);
|
129
|
+
std::vector<arrow::compute::Expression> sub_expressions;
|
130
|
+
for (auto &child_filter : and_filter->child_filters) {
|
131
|
+
sub_expressions.emplace_back(
|
132
|
+
std::move(convert_filter(child_filter.get(), column_name)));
|
133
|
+
}
|
134
|
+
return arrow::compute::and_(sub_expressions);
|
135
|
+
}
|
136
|
+
default:
|
137
|
+
throw duckdb::NotImplementedException(
|
138
|
+
"[arrow][filter][pushdown] unknown filter type: %u",
|
139
|
+
filter->filter_type);
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
143
|
+
arrow::compute::Expression
|
144
|
+
convert_filters(std::unordered_map<
|
145
|
+
idx_t,
|
146
|
+
std::unique_ptr<duckdb::TableFilter>
|
147
|
+
> &filters,
|
148
|
+
std::unordered_map<idx_t, std::string> &column_names)
|
149
|
+
{
|
150
|
+
std::vector<arrow::compute::Expression> expressions;
|
151
|
+
for (auto it = filters.begin(); it != filters.end(); ++it) {
|
152
|
+
expressions.emplace_back(
|
153
|
+
std::move(convert_filter(it->second.get(), column_names[it->first])));
|
154
|
+
}
|
155
|
+
return arrow::compute::and_(expressions);
|
156
|
+
}
|
157
|
+
|
158
|
+
arrow::Result<std::unique_ptr<duckdb::ArrowArrayStreamWrapper>>
|
159
|
+
arrow_table_produce_internal(uintptr_t data,
|
160
|
+
std::pair<
|
161
|
+
std::unordered_map<idx_t, std::string>,
|
162
|
+
std::vector<std::string>
|
163
|
+
> &project_columns,
|
164
|
+
duckdb::TableFilterCollection *filters)
|
165
|
+
{
|
166
|
+
auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
|
167
|
+
auto arrow_table = garrow_table_get_raw(garrow_table);
|
168
|
+
auto dataset =
|
169
|
+
std::make_shared<arrow::dataset::InMemoryDataset>(arrow_table);
|
170
|
+
ARROW_ASSIGN_OR_RAISE(auto scanner_builder, dataset->NewScan());
|
171
|
+
bool have_filter =
|
172
|
+
filters &&
|
173
|
+
filters->table_filters &&
|
174
|
+
!filters->table_filters->filters.empty();
|
175
|
+
if (have_filter) {
|
176
|
+
ARROW_RETURN_NOT_OK(
|
177
|
+
scanner_builder->Filter(convert_filters(filters->table_filters->filters,
|
178
|
+
project_columns.first)));
|
179
|
+
}
|
180
|
+
if (!project_columns.second.empty()) {
|
181
|
+
ARROW_RETURN_NOT_OK(scanner_builder->Project(project_columns.second));
|
182
|
+
}
|
183
|
+
ARROW_ASSIGN_OR_RAISE(auto scanner, scanner_builder->Finish());
|
184
|
+
ARROW_ASSIGN_OR_RAISE(auto reader, scanner->ToRecordBatchReader());
|
185
|
+
auto stream_wrapper = duckdb::make_unique<duckdb::ArrowArrayStreamWrapper>();
|
186
|
+
ARROW_RETURN_NOT_OK(
|
187
|
+
arrow::ExportRecordBatchReader(reader,
|
188
|
+
&(stream_wrapper->arrow_array_stream)));
|
189
|
+
return stream_wrapper;
|
190
|
+
}
|
191
|
+
|
192
|
+
std::unique_ptr<duckdb::ArrowArrayStreamWrapper>
|
193
|
+
arrow_table_produce(uintptr_t data,
|
194
|
+
std::pair<
|
195
|
+
std::unordered_map<idx_t, std::string>,
|
196
|
+
std::vector<std::string>
|
197
|
+
> &project_columns,
|
198
|
+
duckdb::TableFilterCollection *filters)
|
199
|
+
{
|
200
|
+
auto stream_wrapper_result =
|
201
|
+
arrow_table_produce_internal(data, project_columns, filters);
|
202
|
+
if (!stream_wrapper_result.ok()) {
|
203
|
+
throw std::runtime_error(
|
204
|
+
std::string("[arrow][produce] failed to produce: ") +
|
205
|
+
stream_wrapper_result.status().ToString());
|
206
|
+
}
|
207
|
+
return std::move(*stream_wrapper_result);
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
namespace arrow_duckdb {
|
212
|
+
void
|
213
|
+
connection_unregister(duckdb_connection connection, VALUE name)
|
214
|
+
{
|
215
|
+
auto c_name = StringValueCStr(name);
|
216
|
+
reinterpret_cast<duckdb::Connection *>(connection)
|
217
|
+
->Query(std::string("DROP VIEW \"") + c_name + "\"");
|
218
|
+
}
|
219
|
+
|
220
|
+
void
|
221
|
+
connection_register(duckdb_connection connection,
|
222
|
+
VALUE name,
|
223
|
+
VALUE arrow_table)
|
224
|
+
{
|
225
|
+
auto c_name = StringValueCStr(name);
|
226
|
+
auto garrow_table = RVAL2GOBJ(arrow_table);
|
227
|
+
const idx_t rows_per_tuple = 1000000;
|
228
|
+
reinterpret_cast<duckdb::Connection *>(connection)
|
229
|
+
->TableFunction(
|
230
|
+
"arrow_scan",
|
231
|
+
{
|
232
|
+
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(garrow_table)),
|
233
|
+
duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_produce)),
|
234
|
+
duckdb::Value::UBIGINT(rows_per_tuple)
|
235
|
+
})
|
236
|
+
->CreateView(c_name, true, true);
|
237
|
+
}
|
238
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#pragma once
|
18
|
+
|
19
|
+
namespace arrow_duckdb {
|
20
|
+
void
|
21
|
+
connection_unregister(duckdb_connection connection, VALUE name);
|
22
|
+
void
|
23
|
+
connection_register(duckdb_connection connection,
|
24
|
+
VALUE name,
|
25
|
+
VALUE arrow_table);
|
26
|
+
}
|
27
|
+
|
@@ -0,0 +1,404 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <arrow-glib/arrow-glib.hpp>
|
18
|
+
|
19
|
+
#include <arrow/c/bridge.h>
|
20
|
+
|
21
|
+
#include <rbgobject.h>
|
22
|
+
|
23
|
+
#include <ruby-duckdb.h>
|
24
|
+
|
25
|
+
#include "arrow-duckdb-registration.hpp"
|
26
|
+
|
27
|
+
extern "C" void Init_arrow_duckdb(void);
|
28
|
+
|
29
|
+
namespace {
|
30
|
+
# if !GARROW_VERSION_CHECK(6, 0, 0)
|
31
|
+
GArrowSchema *
|
32
|
+
garrow_schema_import(gpointer c_abi_schema,
|
33
|
+
GError **error)
|
34
|
+
{
|
35
|
+
auto arrow_schema =
|
36
|
+
*arrow::ImportSchema(static_cast<ArrowSchema *>(c_abi_schema));
|
37
|
+
return garrow_schema_new_raw(&arrow_schema);
|
38
|
+
}
|
39
|
+
|
40
|
+
GArrowRecordBatch *
|
41
|
+
garrow_record_batch_import(gpointer c_abi_array,
|
42
|
+
GArrowSchema *schema,
|
43
|
+
GError **error)
|
44
|
+
{
|
45
|
+
auto arrow_schema = garrow_schema_get_raw(schema);
|
46
|
+
auto arrow_record_batch =
|
47
|
+
*arrow::ImportRecordBatch(static_cast<ArrowArray *>(c_abi_array),
|
48
|
+
arrow_schema);
|
49
|
+
return garrow_record_batch_new_raw(&arrow_record_batch);
|
50
|
+
}
|
51
|
+
# endif
|
52
|
+
|
53
|
+
VALUE cArrowTable;
|
54
|
+
VALUE cArrowDuckDBResult;
|
55
|
+
|
56
|
+
struct Result {
|
57
|
+
duckdb_arrow arrow;
|
58
|
+
char *error_message;
|
59
|
+
GArrowSchema *gschema;
|
60
|
+
};
|
61
|
+
|
62
|
+
void
|
63
|
+
result_free(void *data)
|
64
|
+
{
|
65
|
+
Result *result = static_cast<Result *>(data);
|
66
|
+
if (result->gschema) {
|
67
|
+
g_object_unref(result->gschema);
|
68
|
+
}
|
69
|
+
free(result->error_message);
|
70
|
+
duckdb_destroy_arrow(&(result->arrow));
|
71
|
+
}
|
72
|
+
|
73
|
+
static const rb_data_type_t result_type = {
|
74
|
+
"ArrowDuckDB::Result",
|
75
|
+
{
|
76
|
+
nullptr,
|
77
|
+
result_free,
|
78
|
+
},
|
79
|
+
nullptr,
|
80
|
+
nullptr,
|
81
|
+
RUBY_TYPED_FREE_IMMEDIATELY,
|
82
|
+
};
|
83
|
+
|
84
|
+
VALUE
|
85
|
+
result_alloc_func(VALUE klass)
|
86
|
+
{
|
87
|
+
Result *result;
|
88
|
+
auto rb_result = TypedData_Make_Struct(klass,
|
89
|
+
Result,
|
90
|
+
&result_type,
|
91
|
+
result);
|
92
|
+
result->arrow = nullptr;
|
93
|
+
result->error_message = nullptr;
|
94
|
+
result->gschema = nullptr;
|
95
|
+
return rb_result;
|
96
|
+
}
|
97
|
+
|
98
|
+
void
|
99
|
+
result_ensure_gschema(Result *result)
|
100
|
+
{
|
101
|
+
ArrowSchema c_abi_schema;
|
102
|
+
duckdb_arrow_schema schema = &c_abi_schema;
|
103
|
+
auto state = duckdb_query_arrow_schema(result->arrow, &schema);
|
104
|
+
if (state == DuckDBError) {
|
105
|
+
free(result->error_message);
|
106
|
+
result->error_message =
|
107
|
+
const_cast<char *>(duckdb_query_arrow_error(result->arrow));
|
108
|
+
rb_raise(eDuckDBError,
|
109
|
+
"Failed to fetch Apache Arrow schema: %s",
|
110
|
+
result->error_message);
|
111
|
+
}
|
112
|
+
|
113
|
+
GError *gerror = nullptr;
|
114
|
+
result->gschema = garrow_schema_import(&c_abi_schema, &gerror);
|
115
|
+
if (gerror) {
|
116
|
+
RG_RAISE_ERROR(gerror);
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
VALUE
|
121
|
+
result_fetch_internal(VALUE self, Result *result)
|
122
|
+
{
|
123
|
+
ArrowArray c_abi_array = {};
|
124
|
+
duckdb_arrow_array array = &c_abi_array;
|
125
|
+
auto state = duckdb_query_arrow_array(result->arrow, &array);
|
126
|
+
if (state == DuckDBError) {
|
127
|
+
free(result->error_message);
|
128
|
+
result->error_message =
|
129
|
+
const_cast<char *>(duckdb_query_arrow_error(result->arrow));
|
130
|
+
rb_raise(eDuckDBError,
|
131
|
+
"Failed to fetch Apache Arrow array: %s",
|
132
|
+
result->error_message);
|
133
|
+
}
|
134
|
+
if (!c_abi_array.release) {
|
135
|
+
return Qnil;
|
136
|
+
}
|
137
|
+
|
138
|
+
GError *gerror = nullptr;
|
139
|
+
auto grecord_batch = garrow_record_batch_import(&c_abi_array,
|
140
|
+
result->gschema,
|
141
|
+
&gerror);
|
142
|
+
if (gerror) {
|
143
|
+
RG_RAISE_ERROR(gerror);
|
144
|
+
return Qnil;
|
145
|
+
}
|
146
|
+
return GOBJ2RVAL_UNREF(grecord_batch);
|
147
|
+
}
|
148
|
+
|
149
|
+
VALUE
|
150
|
+
result_fetch(VALUE self)
|
151
|
+
{
|
152
|
+
Result *result;
|
153
|
+
TypedData_Get_Struct(self, Result, &result_type, result);
|
154
|
+
|
155
|
+
result_ensure_gschema(result);
|
156
|
+
|
157
|
+
return result_fetch_internal(self, result);
|
158
|
+
}
|
159
|
+
|
160
|
+
VALUE
|
161
|
+
result_each(VALUE self)
|
162
|
+
{
|
163
|
+
RETURN_ENUMERATOR(self, 0, 0);
|
164
|
+
|
165
|
+
Result *result;
|
166
|
+
TypedData_Get_Struct(self, Result, &result_type, result);
|
167
|
+
|
168
|
+
result_ensure_gschema(result);
|
169
|
+
|
170
|
+
while (true) {
|
171
|
+
auto record_batch = result_fetch_internal(self, result);
|
172
|
+
if (NIL_P(record_batch)) {
|
173
|
+
break;
|
174
|
+
}
|
175
|
+
rb_yield(record_batch);
|
176
|
+
}
|
177
|
+
|
178
|
+
return self;
|
179
|
+
}
|
180
|
+
|
181
|
+
VALUE
|
182
|
+
result_schema(VALUE self)
|
183
|
+
{
|
184
|
+
Result *result;
|
185
|
+
TypedData_Get_Struct(self, Result, &result_type, result);
|
186
|
+
|
187
|
+
result_ensure_gschema(result);
|
188
|
+
|
189
|
+
return GOBJ2RVAL(result->gschema);
|
190
|
+
}
|
191
|
+
|
192
|
+
VALUE
|
193
|
+
result_n_columns(VALUE self)
|
194
|
+
{
|
195
|
+
Result *result;
|
196
|
+
TypedData_Get_Struct(self, Result, &result_type, result);
|
197
|
+
|
198
|
+
return ULL2NUM(duckdb_arrow_column_count(result->arrow));
|
199
|
+
}
|
200
|
+
|
201
|
+
VALUE
|
202
|
+
result_n_rows(VALUE self)
|
203
|
+
{
|
204
|
+
Result *result;
|
205
|
+
TypedData_Get_Struct(self, Result, &result_type, result);
|
206
|
+
|
207
|
+
return ULL2NUM(duckdb_arrow_row_count(result->arrow));
|
208
|
+
}
|
209
|
+
|
210
|
+
VALUE
|
211
|
+
result_n_changed_rows(VALUE self)
|
212
|
+
{
|
213
|
+
Result *result;
|
214
|
+
TypedData_Get_Struct(self, Result, &result_type, result);
|
215
|
+
|
216
|
+
return ULL2NUM(duckdb_arrow_rows_changed(result->arrow));
|
217
|
+
}
|
218
|
+
|
219
|
+
VALUE
|
220
|
+
query_sql_arrow(VALUE self, VALUE sql)
|
221
|
+
{
|
222
|
+
rubyDuckDBConnection *ctx;
|
223
|
+
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
224
|
+
|
225
|
+
if (!(ctx->con)) {
|
226
|
+
rb_raise(eDuckDBError, "Database connection closed");
|
227
|
+
}
|
228
|
+
|
229
|
+
ID id_new;
|
230
|
+
CONST_ID(id_new, "new");
|
231
|
+
auto result = rb_funcall(cArrowDuckDBResult, id_new, 0);
|
232
|
+
Result *arrow_duckdb_result;
|
233
|
+
TypedData_Get_Struct(result, Result, &result_type, arrow_duckdb_result);
|
234
|
+
auto state = duckdb_query_arrow(ctx->con,
|
235
|
+
StringValueCStr(sql),
|
236
|
+
&(arrow_duckdb_result->arrow));
|
237
|
+
if (state == DuckDBError) {
|
238
|
+
if (arrow_duckdb_result->arrow) {
|
239
|
+
arrow_duckdb_result->error_message =
|
240
|
+
const_cast<char *>(
|
241
|
+
duckdb_query_arrow_error(arrow_duckdb_result->arrow));
|
242
|
+
rb_raise(eDuckDBError,
|
243
|
+
"Failed to execute query: %s",
|
244
|
+
arrow_duckdb_result->error_message);
|
245
|
+
} else {
|
246
|
+
rb_raise(eDuckDBError, "Failed to execute query");
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
return result;
|
251
|
+
}
|
252
|
+
|
253
|
+
VALUE
|
254
|
+
query_unregister_arrow(VALUE self, VALUE name)
|
255
|
+
{
|
256
|
+
rubyDuckDBConnection *ctx;
|
257
|
+
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
258
|
+
|
259
|
+
if (!(ctx->con)) {
|
260
|
+
rb_raise(eDuckDBError, "Database connection closed");
|
261
|
+
}
|
262
|
+
|
263
|
+
arrow_duckdb::connection_unregister(ctx->con, name);
|
264
|
+
|
265
|
+
auto arrow_tables = rb_iv_get(self, "@arrow_tables");
|
266
|
+
if (NIL_P(arrow_tables)) {
|
267
|
+
arrow_tables = rb_hash_new();
|
268
|
+
rb_iv_set(self, "@arrow_tables", arrow_tables);
|
269
|
+
}
|
270
|
+
rb_hash_delete(arrow_tables, name);
|
271
|
+
|
272
|
+
return self;
|
273
|
+
}
|
274
|
+
|
275
|
+
VALUE
|
276
|
+
query_register_arrow_body(VALUE)
|
277
|
+
{
|
278
|
+
return rb_yield_values(0);
|
279
|
+
}
|
280
|
+
|
281
|
+
struct QueryRegisterArrowData {
|
282
|
+
VALUE self;
|
283
|
+
VALUE name;
|
284
|
+
};
|
285
|
+
|
286
|
+
VALUE
|
287
|
+
query_register_arrow_ensure(VALUE user_data)
|
288
|
+
{
|
289
|
+
auto data = reinterpret_cast<QueryRegisterArrowData *>(user_data);
|
290
|
+
return query_unregister_arrow(data->self, data->name);
|
291
|
+
}
|
292
|
+
|
293
|
+
VALUE
|
294
|
+
query_register_arrow(VALUE self, VALUE name, VALUE arrow_table)
|
295
|
+
{
|
296
|
+
rubyDuckDBConnection *ctx;
|
297
|
+
Data_Get_Struct(self, rubyDuckDBConnection, ctx);
|
298
|
+
|
299
|
+
if (!(ctx->con)) {
|
300
|
+
rb_raise(eDuckDBError, "Database connection closed");
|
301
|
+
}
|
302
|
+
|
303
|
+
if (!RVAL2CBOOL(rb_obj_is_kind_of(arrow_table, cArrowTable))) {
|
304
|
+
rb_raise(rb_eArgError, "must be Arrow::Table: %" PRIsVALUE, arrow_table);
|
305
|
+
}
|
306
|
+
|
307
|
+
arrow_duckdb::connection_register(ctx->con, name, arrow_table);
|
308
|
+
|
309
|
+
auto arrow_tables = rb_iv_get(self, "@arrow_tables");
|
310
|
+
if (NIL_P(arrow_tables)) {
|
311
|
+
arrow_tables = rb_hash_new();
|
312
|
+
rb_iv_set(self, "@arrow_tables", arrow_tables);
|
313
|
+
}
|
314
|
+
rb_hash_aset(arrow_tables, name, arrow_table);
|
315
|
+
|
316
|
+
if (rb_block_given_p()) {
|
317
|
+
QueryRegisterArrowData data;
|
318
|
+
data.self = self;
|
319
|
+
data.name = name;
|
320
|
+
return rb_ensure(query_register_arrow_body,
|
321
|
+
Qnil,
|
322
|
+
query_register_arrow_ensure,
|
323
|
+
reinterpret_cast<VALUE>(&data));
|
324
|
+
} else {
|
325
|
+
return self;
|
326
|
+
}
|
327
|
+
}
|
328
|
+
|
329
|
+
VALUE
|
330
|
+
prepared_statement_execute_arrow(VALUE self)
|
331
|
+
{
|
332
|
+
rubyDuckDBPreparedStatement *ctx;
|
333
|
+
Data_Get_Struct(self, rubyDuckDBPreparedStatement, ctx);
|
334
|
+
|
335
|
+
ID id_new;
|
336
|
+
CONST_ID(id_new, "new");
|
337
|
+
auto result = rb_funcall(cArrowDuckDBResult, id_new, 0);
|
338
|
+
Result *arrow_duckdb_result;
|
339
|
+
TypedData_Get_Struct(result, Result, &result_type, arrow_duckdb_result);
|
340
|
+
|
341
|
+
auto state = duckdb_execute_prepared_arrow(ctx->prepared_statement,
|
342
|
+
&(arrow_duckdb_result->arrow));
|
343
|
+
if (state == DuckDBError) {
|
344
|
+
if (arrow_duckdb_result->arrow) {
|
345
|
+
arrow_duckdb_result->error_message =
|
346
|
+
const_cast<char *>(
|
347
|
+
duckdb_query_arrow_error(arrow_duckdb_result->arrow));
|
348
|
+
rb_raise(eDuckDBError,
|
349
|
+
"Failed to execute prepared statement: %s",
|
350
|
+
arrow_duckdb_result->error_message);
|
351
|
+
} else {
|
352
|
+
rb_raise(eDuckDBError, "Failed to execute prepared statement");
|
353
|
+
}
|
354
|
+
}
|
355
|
+
|
356
|
+
return result;
|
357
|
+
}
|
358
|
+
|
359
|
+
void init()
|
360
|
+
{
|
361
|
+
cArrowTable = rb_const_get(rb_const_get(rb_cObject, rb_intern("Arrow")),
|
362
|
+
rb_intern("Table"));
|
363
|
+
|
364
|
+
auto mArrowDuckDB = rb_define_module("ArrowDuckDB");
|
365
|
+
cArrowDuckDBResult = rb_define_class_under(mArrowDuckDB,
|
366
|
+
"Result",
|
367
|
+
rb_cObject);
|
368
|
+
rb_define_alloc_func(cArrowDuckDBResult, result_alloc_func);
|
369
|
+
rb_include_module(cArrowDuckDBResult, rb_mEnumerable);
|
370
|
+
rb_define_method(cArrowDuckDBResult, "fetch", result_fetch, 0);
|
371
|
+
rb_define_method(cArrowDuckDBResult, "each", result_each, 0);
|
372
|
+
rb_define_method(cArrowDuckDBResult, "schema", result_schema, 0);
|
373
|
+
rb_define_method(cArrowDuckDBResult, "n_columns", result_n_columns, 0);
|
374
|
+
rb_define_method(cArrowDuckDBResult, "n_rows", result_n_rows, 0);
|
375
|
+
rb_define_method(cArrowDuckDBResult,
|
376
|
+
"n_changed_rows",
|
377
|
+
result_n_changed_rows,
|
378
|
+
0);
|
379
|
+
|
380
|
+
rb_define_method(cDuckDBConnection, "query_sql_arrow", query_sql_arrow, 1);
|
381
|
+
rb_define_method(cDuckDBConnection,
|
382
|
+
"register_arrow",
|
383
|
+
query_register_arrow,
|
384
|
+
2);
|
385
|
+
rb_define_method(cDuckDBConnection,
|
386
|
+
"unregister_arrow",
|
387
|
+
query_unregister_arrow,
|
388
|
+
1);
|
389
|
+
|
390
|
+
auto cDuckDBPreparedStatement =
|
391
|
+
rb_const_get(rb_const_get(rb_cObject, rb_intern("DuckDB")),
|
392
|
+
rb_intern("PreparedStatement"));
|
393
|
+
rb_define_method(cDuckDBPreparedStatement,
|
394
|
+
"execute_arrow",
|
395
|
+
prepared_statement_execute_arrow,
|
396
|
+
0);
|
397
|
+
}
|
398
|
+
}
|
399
|
+
|
400
|
+
extern "C" void
|
401
|
+
Init_arrow_duckdb(void)
|
402
|
+
{
|
403
|
+
init();
|
404
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
require "extpp"
|
16
|
+
require "mkmf-gnome"
|
17
|
+
|
18
|
+
required_pkg_config_package("arrow-glib")
|
19
|
+
required_pkg_config_package("arrow-dataset")
|
20
|
+
|
21
|
+
[
|
22
|
+
["glib2", "ext/glib2"],
|
23
|
+
["duckdb", "ext/duckdb"],
|
24
|
+
].each do |name, source_dir|
|
25
|
+
spec = find_gem_spec(name)
|
26
|
+
source_dir = File.join(spec.full_gem_path, source_dir)
|
27
|
+
build_dir = source_dir
|
28
|
+
add_depend_package_path(name, source_dir, build_dir)
|
29
|
+
end
|
30
|
+
|
31
|
+
have_library("duckdb") or exit(false)
|
32
|
+
|
33
|
+
create_makefile("arrow_duckdb")
|
data/lib/arrow-duckdb.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
require "arrow"
|
16
|
+
require "duckdb"
|
17
|
+
|
18
|
+
require "arrow-duckdb/version"
|
19
|
+
|
20
|
+
require "arrow_duckdb.so"
|
21
|
+
|
22
|
+
require "arrow-duckdb/connection"
|
23
|
+
require "arrow-duckdb/result"
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module ArrowDuckDB
|
16
|
+
module ArrowableQuery
|
17
|
+
def query(sql, *args, output: nil)
|
18
|
+
return super(sql, *args) if output != :arrow
|
19
|
+
|
20
|
+
return query_sql_arrow(sql) if args.empty?
|
21
|
+
|
22
|
+
stmt = DuckDB::PreparedStatement.new(self, sql)
|
23
|
+
args.each_with_index do |arg, i|
|
24
|
+
stmt.bind(i + 1, arg)
|
25
|
+
end
|
26
|
+
stmt.execute_arrow
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module DuckDB
|
32
|
+
class Connection
|
33
|
+
prepend ArrowDuckDB::ArrowableQuery
|
34
|
+
|
35
|
+
alias_method :register, :register_arrow
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module ArrowDuckDB
|
16
|
+
class Result
|
17
|
+
def to_table
|
18
|
+
Arrow::Table.new(schema, to_a)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
module ArrowDuckDB
|
16
|
+
VERSION = "1.0.0"
|
17
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#
|
3
|
+
# Copyright 2021 Sutou Kouhei <kou@clear-code.com>
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
# you may not use this file except in compliance with the License.
|
7
|
+
# You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
|
17
|
+
clean_white_space = lambda do |entry|
|
18
|
+
entry.gsub(/(\A\n+|\n+\z)/, '') + "\n"
|
19
|
+
end
|
20
|
+
|
21
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
|
22
|
+
require "arrow-duckdb/version"
|
23
|
+
|
24
|
+
Gem::Specification.new do |spec|
|
25
|
+
spec.name = "red-arrow-duckdb"
|
26
|
+
spec.version = ArrowDuckDB::VERSION
|
27
|
+
spec.homepage = "https://github.com/red-data-tools/red-arrow-duckdb"
|
28
|
+
spec.authors = ["Sutou Kouhei"]
|
29
|
+
spec.email = ["kou@clear-code.com"]
|
30
|
+
|
31
|
+
readme = File.read("README.md")
|
32
|
+
readme.force_encoding("UTF-8")
|
33
|
+
entries = readme.split(/^\#\#\s(.*)$/)
|
34
|
+
clean_white_space.call(entries[entries.index("Description") + 1])
|
35
|
+
description = clean_white_space.call(entries[entries.index("Description") + 1])
|
36
|
+
spec.summary, spec.description, = description.split(/\n\n+/, 3)
|
37
|
+
spec.license = "Apache-2.0"
|
38
|
+
spec.files = ["README.md", "#{spec.name}.gemspec"]
|
39
|
+
spec.files += Dir.glob("lib/**/*.rb")
|
40
|
+
spec.files += Dir.glob("ext/**/*.{cpp,hpp}")
|
41
|
+
spec.files += Dir.glob("doc/text/*")
|
42
|
+
spec.extensions = ["ext/arrow-duckdb/extconf.rb"]
|
43
|
+
|
44
|
+
spec.add_runtime_dependency("duckdb")
|
45
|
+
spec.add_runtime_dependency("red-arrow")
|
46
|
+
|
47
|
+
spec.add_development_dependency("bundler")
|
48
|
+
spec.add_development_dependency("rake")
|
49
|
+
spec.add_development_dependency("test-unit")
|
50
|
+
end
|
metadata
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: red-arrow-duckdb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sutou Kouhei
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-08-12 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: duckdb
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: red-arrow
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: test-unit
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: ''
|
84
|
+
email:
|
85
|
+
- kou@clear-code.com
|
86
|
+
executables: []
|
87
|
+
extensions:
|
88
|
+
- ext/arrow-duckdb/extconf.rb
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- README.md
|
92
|
+
- doc/text/apache-2.0.txt
|
93
|
+
- doc/text/news.md
|
94
|
+
- ext/arrow-duckdb/arrow-duckdb-registration.cpp
|
95
|
+
- ext/arrow-duckdb/arrow-duckdb-registration.hpp
|
96
|
+
- ext/arrow-duckdb/arrow-duckdb.cpp
|
97
|
+
- ext/arrow-duckdb/extconf.rb
|
98
|
+
- lib/arrow-duckdb.rb
|
99
|
+
- lib/arrow-duckdb/connection.rb
|
100
|
+
- lib/arrow-duckdb/result.rb
|
101
|
+
- lib/arrow-duckdb/version.rb
|
102
|
+
- red-arrow-duckdb.gemspec
|
103
|
+
homepage: https://github.com/red-data-tools/red-arrow-duckdb
|
104
|
+
licenses:
|
105
|
+
- Apache-2.0
|
106
|
+
metadata: {}
|
107
|
+
post_install_message:
|
108
|
+
rdoc_options: []
|
109
|
+
require_paths:
|
110
|
+
- lib
|
111
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '0'
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - ">="
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
requirements: []
|
122
|
+
rubygems_version: 3.3.0.dev
|
123
|
+
signing_key:
|
124
|
+
specification_version: 4
|
125
|
+
summary: Red Arrow DuckDB is a library that provides Apache Arrow support to ruby-duckdb.
|
126
|
+
test_files: []
|