red-arrow-duckdb 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ac2fc2780b82eadb1ac59bbd1af1427ef6522c4ce97c4a51c8d3dbfe7181763f
4
- data.tar.gz: 4c75ea339bca32b4b3249faad455c7383ff700d7dd7d338804a91bd8ad4c46da
3
+ metadata.gz: 63fb3f7af8e84c386b1dcec7309b967d507ec85a65355471167c15426a5cf2f3
4
+ data.tar.gz: 9ae59ace06c47648b5fd4fa14723773de03f80c1f51f121e1b8fa3b225d1c56a
5
5
  SHA512:
6
- metadata.gz: a1963496e56b1868cac0c462758250110c4a95d3bc1f990cb67f7e26da11db2f85ebe1a3edfcf3109454ff43a8b4012b9fb807164eb6fd089fe0ee176ef6718b
7
- data.tar.gz: 637dd0226663c59358a13b6545544c49c149221d6e97b150d12751a50368e2c30dd8887e2b267728c10a0f228384fc17244ada2cf19cead1e8c51e5525ce32ca
6
+ metadata.gz: b63175c712cce70c9a48f2becb6581ff05ac5d153257bd740a178626222508f3f74c5bdab9e3618bc977e0b605fc79aa181c81cce8cfeaa33671d2ccf07bf763
7
+ data.tar.gz: 350c3eb4925cc5bd76af437c977f0cc8959c3ed91fcb4f119cb85c1c6f43a8709cb166dd993e53e9088edd4747b5950b875f0cc3548b03e512813c43b023e207
data/doc/text/news.md CHANGED
@@ -1,5 +1,29 @@
1
1
  # News
2
2
 
3
+ ## 1.0.3 - 2024-10-11
4
+
5
+ ### Improvements
6
+
7
+ * Added support for DuckDB 1.1.0
8
+ * GH-4
9
+ * Patch by Sten Larsson
10
+
11
+ ### Thanks
12
+
13
+ * Sten Larsson
14
+
15
+ ## 1.0.2 - 2022-03-06
16
+
17
+ ### Improvements
18
+
19
+ * Added support for timestamp value pushdown.
20
+ [GitHub#2](https://github.com/red-data-tools/red-arrow-duckdb/issues/2)
21
+ [Reported by Torsten Sprenger]
22
+
23
+ ### Thanks
24
+
25
+ * Torsten Sprenger
26
+
3
27
  ## 1.0.1 - 2021-11-06
4
28
 
5
29
  ### Improvements
@@ -7,7 +31,8 @@
7
31
  * macOS: Don't require specifying `PKG_CONFIG_PATH` on install such
8
32
  as `PKG_CONFIG_PATH=$(brew --prefix openssl)/lib/pkgconfig gem
9
33
  install red-arrow-duckdb`.
10
- [GitHub#1][Reported by Konstantin Ilchenko]
34
+ [GitHub#1](https://github.com/red-data-tools/red-arrow-duckdb/issues/2)
35
+ [Reported by Konstantin Ilchenko]
11
36
 
12
37
  ### Thanks
13
38
 
@@ -24,7 +24,8 @@
24
24
  #include <duckdb.hpp>
25
25
  #ifndef DUCKDB_AMALGAMATION
26
26
  # include <duckdb.h>
27
- # include <duckdb/common/arrow_wrapper.hpp>
27
+ # include <duckdb/common/arrow/arrow_wrapper.hpp>
28
+ # include <duckdb/function/table/arrow.hpp>
28
29
  # include <duckdb/function/table_function.hpp>
29
30
  # include <duckdb/main/connection.hpp>
30
31
  # include <duckdb/planner/filter/conjunction_filter.hpp>
@@ -35,6 +36,22 @@
35
36
  #include "arrow-duckdb-registration.hpp"
36
37
 
37
38
  namespace {
39
+ std::shared_ptr<arrow::Scalar>
40
+ convert_constant_timestamp(duckdb::Value &value, arrow::TimeUnit::type unit)
41
+ {
42
+ auto scalar_result =
43
+ arrow::MakeScalar(arrow::timestamp(unit), value.GetValue<int64_t>());
44
+ if (!scalar_result.ok()) {
45
+ throw duckdb::InvalidInputException(
46
+ "[arrow][filter][pushdown][%s] "
47
+ "failed to convert to Apache Arrow scalar: %s: <%s>",
48
+ value.type().ToString(),
49
+ scalar_result.status().ToString(),
50
+ value.ToString());
51
+ }
52
+ return *scalar_result;
53
+ }
54
+
38
55
  std::shared_ptr<arrow::Scalar>
39
56
  convert_constant(duckdb::Value &value)
40
57
  {
@@ -55,9 +72,14 @@ namespace {
55
72
  // return arrow::MakeScalar(arrow::date32(), value.GetValue<int32_t>());
56
73
  // case duckdb::LogicalTypeId::TIME:
57
74
  // return arrow::MakeScalar(arrow::time64(), value.GetValue<int64_t>());
58
- // case duckdb::LogicalTypeId::TIMESTAMP:
59
- // return arrow::MakeScalar(arrow::timestamp(),
60
- // value.GetValue<int64_t>());
75
+ case duckdb::LogicalTypeId::TIMESTAMP_SEC:
76
+ return convert_constant_timestamp(value, arrow::TimeUnit::SECOND);
77
+ case duckdb::LogicalTypeId::TIMESTAMP_MS:
78
+ return convert_constant_timestamp(value, arrow::TimeUnit::MILLI);
79
+ case duckdb::LogicalTypeId::TIMESTAMP:
80
+ return convert_constant_timestamp(value, arrow::TimeUnit::MICRO);
81
+ case duckdb::LogicalTypeId::TIMESTAMP_NS:
82
+ return convert_constant_timestamp(value, arrow::TimeUnit::NANO);
61
83
  case duckdb::LogicalTypeId::UTINYINT:
62
84
  return arrow::MakeScalar(value.GetValue<uint8_t>());
63
85
  case duckdb::LogicalTypeId::USMALLINT:
@@ -75,7 +97,7 @@ namespace {
75
97
  // case LogicalTypeId::DECIMAL:
76
98
  default:
77
99
  throw duckdb::NotImplementedException(
78
- "[arrow][filter][pushdown] not implemented value type: %s",
100
+ "[arrow][filter][pushdown][%s] not implemented value type",
79
101
  value.type().ToString());
80
102
  }
81
103
  }
@@ -141,14 +163,11 @@ namespace {
141
163
  }
142
164
 
143
165
  arrow::compute::Expression
144
- convert_filters(std::unordered_map<
145
- idx_t,
146
- std::unique_ptr<duckdb::TableFilter>
147
- > &filters,
166
+ convert_filters(duckdb::TableFilterSet *filter_set,
148
167
  std::unordered_map<idx_t, std::string> &column_names)
149
168
  {
150
169
  std::vector<arrow::compute::Expression> expressions;
151
- for (auto it = filters.begin(); it != filters.end(); ++it) {
170
+ for (auto it = filter_set->filters.begin(); it != filter_set->filters.end(); ++it) {
152
171
  expressions.emplace_back(
153
172
  std::move(convert_filter(it->second.get(), column_names[it->first])));
154
173
  }
@@ -157,11 +176,7 @@ namespace {
157
176
 
158
177
  arrow::Result<std::unique_ptr<duckdb::ArrowArrayStreamWrapper>>
159
178
  arrow_table_produce_internal(uintptr_t data,
160
- std::pair<
161
- std::unordered_map<idx_t, std::string>,
162
- std::vector<std::string>
163
- > &project_columns,
164
- duckdb::TableFilterCollection *filters)
179
+ duckdb::ArrowStreamParameters &parameters)
165
180
  {
166
181
  auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
167
182
  auto arrow_table = garrow_table_get_raw(garrow_table);
@@ -169,20 +184,22 @@ namespace {
169
184
  std::make_shared<arrow::dataset::InMemoryDataset>(arrow_table);
170
185
  ARROW_ASSIGN_OR_RAISE(auto scanner_builder, dataset->NewScan());
171
186
  bool have_filter =
172
- filters &&
173
- filters->table_filters &&
174
- !filters->table_filters->filters.empty();
187
+ parameters.filters &&
188
+ !parameters.filters->filters.empty();
175
189
  if (have_filter) {
176
190
  ARROW_RETURN_NOT_OK(
177
- scanner_builder->Filter(convert_filters(filters->table_filters->filters,
178
- project_columns.first)));
191
+ scanner_builder->Filter(
192
+ convert_filters(parameters.filters,
193
+ parameters.projected_columns.projection_map)));
179
194
  }
180
- if (!project_columns.second.empty()) {
181
- ARROW_RETURN_NOT_OK(scanner_builder->Project(project_columns.second));
195
+ if (!parameters.projected_columns.columns.empty()) {
196
+ ARROW_RETURN_NOT_OK(
197
+ scanner_builder->Project(
198
+ parameters.projected_columns.columns));
182
199
  }
183
200
  ARROW_ASSIGN_OR_RAISE(auto scanner, scanner_builder->Finish());
184
201
  ARROW_ASSIGN_OR_RAISE(auto reader, scanner->ToRecordBatchReader());
185
- auto stream_wrapper = duckdb::make_unique<duckdb::ArrowArrayStreamWrapper>();
202
+ auto stream_wrapper = duckdb::make_uniq<duckdb::ArrowArrayStreamWrapper>();
186
203
  ARROW_RETURN_NOT_OK(
187
204
  arrow::ExportRecordBatchReader(reader,
188
205
  &(stream_wrapper->arrow_array_stream)));
@@ -191,14 +208,9 @@ namespace {
191
208
 
192
209
  std::unique_ptr<duckdb::ArrowArrayStreamWrapper>
193
210
  arrow_table_produce(uintptr_t data,
194
- std::pair<
195
- std::unordered_map<idx_t, std::string>,
196
- std::vector<std::string>
197
- > &project_columns,
198
- duckdb::TableFilterCollection *filters)
211
+ duckdb::ArrowStreamParameters &parameters)
199
212
  {
200
- auto stream_wrapper_result =
201
- arrow_table_produce_internal(data, project_columns, filters);
213
+ auto stream_wrapper_result = arrow_table_produce_internal(data, parameters);
202
214
  if (!stream_wrapper_result.ok()) {
203
215
  throw std::runtime_error(
204
216
  std::string("[arrow][produce] failed to produce: ") +
@@ -206,6 +218,20 @@ namespace {
206
218
  }
207
219
  return std::move(*stream_wrapper_result);
208
220
  }
221
+
222
+ void
223
+ arrow_table_get_schema(uintptr_t data, duckdb::ArrowSchemaWrapper &schema)
224
+ {
225
+ auto garrow_table = GARROW_TABLE(reinterpret_cast<gpointer>(data));
226
+ auto arrow_table = garrow_table_get_raw(garrow_table);
227
+ auto export_schema_status = arrow::ExportSchema(*(arrow_table->schema()),
228
+ reinterpret_cast<ArrowSchema *>(&schema));
229
+ if (!export_schema_status.ok()) {
230
+ throw std::runtime_error(
231
+ std::string("[arrow][get_schema] failed to export schema: ") +
232
+ export_schema_status.ToString());
233
+ }
234
+ }
209
235
  }
210
236
 
211
237
  namespace arrow_duckdb {
@@ -224,14 +250,13 @@ namespace arrow_duckdb {
224
250
  {
225
251
  auto c_name = StringValueCStr(name);
226
252
  auto garrow_table = RVAL2GOBJ(arrow_table);
227
- const idx_t rows_per_tuple = 1000000;
228
253
  reinterpret_cast<duckdb::Connection *>(connection)
229
254
  ->TableFunction(
230
255
  "arrow_scan",
231
256
  {
232
257
  duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(garrow_table)),
233
258
  duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_produce)),
234
- duckdb::Value::UBIGINT(rows_per_tuple)
259
+ duckdb::Value::POINTER(reinterpret_cast<uintptr_t>(arrow_table_get_schema)),
235
260
  })
236
261
  ->CreateView(c_name, true, true);
237
262
  }
@@ -20,7 +20,9 @@
20
20
 
21
21
  #include <rbgobject.h>
22
22
 
23
+ extern "C" {
23
24
  #include <ruby-duckdb.h>
25
+ }
24
26
 
25
27
  #include "arrow-duckdb-registration.hpp"
26
28
 
@@ -99,7 +101,7 @@ namespace {
99
101
  result_ensure_gschema(Result *result)
100
102
  {
101
103
  ArrowSchema c_abi_schema;
102
- duckdb_arrow_schema schema = &c_abi_schema;
104
+ auto schema = reinterpret_cast<duckdb_arrow_schema>(&c_abi_schema);
103
105
  auto state = duckdb_query_arrow_schema(result->arrow, &schema);
104
106
  if (state == DuckDBError) {
105
107
  free(result->error_message);
@@ -121,7 +123,7 @@ namespace {
121
123
  result_fetch_internal(VALUE self, Result *result)
122
124
  {
123
125
  ArrowArray c_abi_array = {};
124
- duckdb_arrow_array array = &c_abi_array;
126
+ auto array = reinterpret_cast<duckdb_arrow_array>(&c_abi_array);
125
127
  auto state = duckdb_query_arrow_array(result->arrow, &array);
126
128
  if (state == DuckDBError) {
127
129
  free(result->error_message);
@@ -219,8 +221,7 @@ namespace {
219
221
  VALUE
220
222
  query_sql_arrow(VALUE self, VALUE sql)
221
223
  {
222
- rubyDuckDBConnection *ctx;
223
- Data_Get_Struct(self, rubyDuckDBConnection, ctx);
224
+ auto ctx = get_struct_connection(self);
224
225
 
225
226
  if (!(ctx->con)) {
226
227
  rb_raise(eDuckDBError, "Database connection closed");
@@ -253,8 +254,7 @@ namespace {
253
254
  VALUE
254
255
  query_unregister_arrow(VALUE self, VALUE name)
255
256
  {
256
- rubyDuckDBConnection *ctx;
257
- Data_Get_Struct(self, rubyDuckDBConnection, ctx);
257
+ auto ctx = get_struct_connection(self);
258
258
 
259
259
  if (!(ctx->con)) {
260
260
  rb_raise(eDuckDBError, "Database connection closed");
@@ -293,8 +293,7 @@ namespace {
293
293
  VALUE
294
294
  query_register_arrow(VALUE self, VALUE name, VALUE arrow_table)
295
295
  {
296
- rubyDuckDBConnection *ctx;
297
- Data_Get_Struct(self, rubyDuckDBConnection, ctx);
296
+ auto ctx = get_struct_connection(self);
298
297
 
299
298
  if (!(ctx->con)) {
300
299
  rb_raise(eDuckDBError, "Database connection closed");
@@ -329,8 +328,7 @@ namespace {
329
328
  VALUE
330
329
  prepared_statement_execute_arrow(VALUE self)
331
330
  {
332
- rubyDuckDBPreparedStatement *ctx;
333
- Data_Get_Struct(self, rubyDuckDBPreparedStatement, ctx);
331
+ auto ctx = get_struct_prepared_statement(self);
334
332
 
335
333
  ID id_new;
336
334
  CONST_ID(id_new, "new");
@@ -16,7 +16,7 @@ require "extpp"
16
16
  require "mkmf-gnome"
17
17
  require "native-package-installer"
18
18
 
19
- checking_for(checking_message("Homebrew")) do
19
+ homebrew = checking_for(checking_message("Homebrew")) do
20
20
  case NativePackageInstaller::Platform.detect
21
21
  when NativePackageInstaller::Platform::Homebrew
22
22
  openssl_prefix = `brew --prefix openssl`.chomp
@@ -40,6 +40,10 @@ unless have_library("duckdb")
40
40
  install_missing_native_package(debian: "libduckdb-dev",
41
41
  redhat: "duckdb-devel",
42
42
  homebrew: "duckdb") or exit(false)
43
+ if homebrew
44
+ $INCFLAGS << " -I" << File.join(`brew --prefix duckdb`.chomp, "include")
45
+ $LIBPATH |= [File.join(`brew --prefix duckdb`.chomp, "lib")]
46
+ end
43
47
  have_library("duckdb") or exit(false)
44
48
  end
45
49
 
@@ -1,4 +1,4 @@
1
- # Copyright 2021 Sutou Kouhei <kou@clear-code.com>
1
+ # Copyright 2021-2022 Sutou Kouhei <kou@clear-code.com>
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -13,5 +13,5 @@
13
13
  # limitations under the License.
14
14
 
15
15
  module ArrowDuckDB
16
- VERSION = "1.0.1"
16
+ VERSION = "1.0.3"
17
17
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-arrow-duckdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sutou Kouhei
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2021-11-05 00:00:00.000000000 Z
10
+ date: 2024-10-10 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: duckdb
@@ -104,7 +103,6 @@ homepage: https://github.com/red-data-tools/red-arrow-duckdb
104
103
  licenses:
105
104
  - Apache-2.0
106
105
  metadata: {}
107
- post_install_message:
108
106
  rdoc_options: []
109
107
  require_paths:
110
108
  - lib
@@ -119,8 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
117
  - !ruby/object:Gem::Version
120
118
  version: '0'
121
119
  requirements: []
122
- rubygems_version: 3.3.0.dev
123
- signing_key:
120
+ rubygems_version: 3.6.0.dev
124
121
  specification_version: 4
125
122
  summary: Red Arrow DuckDB is a library that provides Apache Arrow support to ruby-duckdb.
126
123
  test_files: []