duckdb 0.9.1.2 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b492788be53be66a956ad49e4cd8a82ffa6273c12f037b2c0abd51253321802f
4
- data.tar.gz: 50571029b1d1160a7abb6ac223bd4cda4f3791b29d26cdebac6b795f160a2a14
3
+ metadata.gz: afbb8627bfcb9764d6928d64558b55c18b26932b1ad50946cfe777169b6986f9
4
+ data.tar.gz: b560803df1d4b93d1e1b9f5c56b55f779c34095780592b9cdae51f7d4da2c0ec
5
5
  SHA512:
6
- metadata.gz: d80355e181599217574191f3acba24d6dbcb37e4a73b774689590f42fc9b92032d4a4285962ee65100d3a3186825b29f09066d9fd0f8f24ce46aaec85cba4701
7
- data.tar.gz: b126f7e0057c77cae338f7c6b76d4b775c52b06f84936ebd6208a3357148ababa8c8c3aaca727dacf4b7ed05b5f5da2bedc74d8b1d46f170ddc418d4c9e125fa
6
+ metadata.gz: 4c16acf784261f874501381aa9c764bc461d56fd35b066bae88c6cf0b4de654a867a6d309641cf0cf39d60854d268ce610fd3997534adcd11f322e7e4cd76401
7
+ data.tar.gz: 24d26d68b843c27ca306980e0d1acdfed2c6f24bb573cb89ba684879c4d05d43256d2335085e82fb1c352b7cdffcd016f2c2dadf3d924fc747cc4c450b142693
data/.gitattributes ADDED
@@ -0,0 +1 @@
1
+ *.rb diff=ruby
@@ -1,9 +1,9 @@
1
1
  name: MacOS
2
2
 
3
3
  on:
4
- push:
5
- branches:
6
- - main
4
+ # push:
5
+ # branches:
6
+ # - main
7
7
  pull_request:
8
8
  types:
9
9
  - opened
@@ -15,8 +15,8 @@ jobs:
15
15
  runs-on: macos-latest
16
16
  strategy:
17
17
  matrix:
18
- ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', '3.3.0-preview2', 'head']
19
- duckdb: ['0.9.1', '0.8.1']
18
+ ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', '3.3.0-preview3', 'head']
19
+ duckdb: ['0.9.2', '0.8.1']
20
20
 
21
21
  steps:
22
22
  - uses: actions/checkout@v3
@@ -59,7 +59,7 @@ jobs:
59
59
 
60
60
  - name: run test with Ruby ${{ matrix.ruby }}
61
61
  run: |
62
- rake test
62
+ env RUBYOPT=-W:deprecated rake test
63
63
 
64
64
  post-test:
65
65
  name: All tests passed on macos
@@ -1,9 +1,9 @@
1
1
  name: Ubuntu
2
2
 
3
3
  on:
4
- push:
5
- branches:
6
- - main
4
+ # push:
5
+ # branches:
6
+ # - main
7
7
  pull_request:
8
8
  types:
9
9
  - opened
@@ -15,8 +15,8 @@ jobs:
15
15
  runs-on: ubuntu-latest
16
16
  strategy:
17
17
  matrix:
18
- ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', '3.3.0-preview2', 'head']
19
- duckdb: ['0.9.1', '0.8.1']
18
+ ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', '3.3.0-preview3', 'head']
19
+ duckdb: ['0.9.2', '0.8.1']
20
20
 
21
21
  steps:
22
22
  - uses: actions/checkout@v3
@@ -57,7 +57,7 @@ jobs:
57
57
  env:
58
58
  DUCKDB_VERSION: ${{ matrix.duckdb }}
59
59
  run: |
60
- rake test
60
+ env RUBYOPT=-W:deprecated rake test
61
61
 
62
62
  post-test:
63
63
  name: All tests passed on Ubuntu
@@ -1,9 +1,9 @@
1
1
  name: Windows
2
2
 
3
3
  on:
4
- push:
5
- branches:
6
- - main
4
+ # push:
5
+ # branches:
6
+ # - main
7
7
  pull_request:
8
8
  types:
9
9
  - opened
@@ -16,7 +16,7 @@ jobs:
16
16
  strategy:
17
17
  matrix:
18
18
  ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', 'ucrt', 'mingw', 'mswin', 'head']
19
- duckdb: ['0.9.1', '0.8.1']
19
+ duckdb: ['0.9.2', '0.8.1']
20
20
 
21
21
  steps:
22
22
  - uses: actions/checkout@v3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # ChangeLog
2
2
 
3
+ # 0.9.2
4
+ - add DuckDB::Connection#async_query_stream
5
+ - DuckDB::PendingResult accepts second argument. If the second argument is
6
+ true, PendingResult#execute_pending returns streaming DuckDB::Result object.
7
+ - add DuckDB::PreparedStatement#pending_prepared_stream
8
+ - add DuckDB::Result#streaming?.
9
+
3
10
  # 0.9.1.2
4
11
  - add DuckDB::Connection#interrupt, DuckDB::Connection#query_progress
5
12
  - add DuckDB::Connection#async_query, alias method async_execute.
data/Dockerfile CHANGED
@@ -1,7 +1,7 @@
1
1
  ARG RUBY_VERSION=3.2.2
2
2
  FROM ruby:${RUBY_VERSION}
3
3
 
4
- ARG DUCKDB_VERSION=0.9.1
4
+ ARG DUCKDB_VERSION=0.9.2
5
5
 
6
6
  RUN apt update -qq && \
7
7
  apt install -y build-essential curl git wget
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- duckdb (0.9.1.2)
4
+ duckdb (0.9.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -9,16 +9,16 @@ GEM
9
9
  benchmark-ips (2.12.0)
10
10
  mini_portile2 (2.8.5)
11
11
  minitest (5.20.0)
12
- nokogiri (1.15.4)
12
+ nokogiri (1.15.5)
13
13
  mini_portile2 (~> 2.8.2)
14
14
  racc (~> 1.4)
15
- nokogiri (1.15.4-x86_64-linux)
15
+ nokogiri (1.15.5-x86_64-linux)
16
16
  racc (~> 1.4)
17
17
  racc (1.7.3)
18
18
  rake (13.1.0)
19
19
  rake-compiler (1.2.5)
20
20
  rake
21
- ruby_memcheck (2.2.0)
21
+ ruby_memcheck (2.2.1)
22
22
  nokogiri
23
23
  stackprof (0.2.25)
24
24
 
data/README.md CHANGED
@@ -104,10 +104,25 @@ con.query('SELECT * FROM users WHERE name = ? AND email = ?', 'Alice', 'alice@ex
104
104
  con.query('SELECT * FROM users WHERE name = $name AND email = $email', name: 'Alice', email: 'alice@example.com')
105
105
  ```
106
106
 
107
+ ### using async query
108
+
109
+ You can use async query.
110
+
111
+ ```ruby
112
+ DuckDB::Result.use_chunk_each = true # must be true.
113
+ ...
114
+
115
+ pending_result = con.async_query_stream('SLOW QUERY')
116
+ pending_result.execute_task while pending_result.state == :not_ready
117
+
118
+ result = pending_result.execute_pending
119
+ result.each.first
120
+ ```
121
+
122
+ Here is [the benchmark](./benchmark/async_query.rb).
107
123
 
108
124
  ### using BLOB column
109
125
 
110
- BLOB is available with DuckDB v0.2.5 or later.
111
126
  Use `DuckDB::Blob.new` or use sting#force_encoding(Encoding::BINARY)
112
127
 
113
128
  ```ruby
@@ -119,6 +134,7 @@ DuckDB::Database.open do |db|
119
134
  stmt = DuckDB::PreparedStatement.new(con, 'INSERT INTO blob_table VALUES ($1)')
120
135
 
121
136
  stmt.bind(1, DuckDB::Blob.new("\0\1\2\3\4\5"))
137
+ # or
122
138
  # stmt.bind(1, "\0\1\2\3\4\5".force_encoding(Encoding::BINARY))
123
139
  stmt.execute
124
140
 
@@ -0,0 +1,90 @@
1
+ require 'bundler/setup'
2
+ require 'duckdb'
3
+ require 'benchmark/ips'
4
+
5
+
6
+ DuckDB::Result.use_chunk_each = true
7
+ DuckDB::Database.open do |db|
8
+ db.connect do |con|
9
+ con.query('SET threads=1')
10
+ con.query('CREATE TABLE tbl as SELECT range a, mod(range, 10) b FROM range(100000)')
11
+ con.query('CREATE TABLE tbl2 as SELECT range a, mod(range, 10) b FROM range(100000)')
12
+ query_sql = 'SELECT * FROM tbl where b = (SELECT min(b) FROM tbl2)'
13
+ print <<~END_OF_HEAD
14
+
15
+ Benchmark: Get first record ======================================
16
+ END_OF_HEAD
17
+
18
+ Benchmark.ips do |x|
19
+ x.report('async_query') do
20
+ pending_result = con.async_query(query_sql)
21
+
22
+ pending_result.execute_task while pending_result.state == :not_ready
23
+ result = pending_result.execute_pending
24
+ result.each.first
25
+ end
26
+ x.report('query') do
27
+ result = con.query(query_sql)
28
+ result.each.first
29
+ end
30
+ x.report('async_query_stream') do
31
+ pending_result = con.async_query_stream(query_sql)
32
+
33
+ pending_result.execute_task while pending_result.state == :not_ready
34
+ result = pending_result.execute_pending
35
+ result.each.first
36
+ end
37
+ end
38
+
39
+ print <<~END_OF_HEAD
40
+
41
+
42
+ Benchmark: Get all records ======================================
43
+ END_OF_HEAD
44
+
45
+ Benchmark.ips do |x|
46
+ x.report('async_query') do
47
+ pending_result = con.async_query(query_sql)
48
+
49
+ pending_result.execute_task while pending_result.state == :not_ready
50
+ result = pending_result.execute_pending
51
+ result.each.to_a
52
+ end
53
+ x.report('query') do
54
+ result = con.query(query_sql)
55
+ result.each.to_a
56
+ end
57
+ x.report('async_query_stream') do
58
+ pending_result = con.async_query_stream(query_sql)
59
+
60
+ pending_result.execute_task while pending_result.state == :not_ready
61
+ result = pending_result.execute_pending
62
+ result.each.to_a
63
+ end
64
+ end
65
+ end
66
+ end
67
+
68
+ __END__
69
+
70
+ results:
71
+ Benchmark: Get first record ======================================
72
+ Warming up --------------------------------------
73
+ async_query 70.000 i/100ms
74
+ query 88.000 i/100ms
75
+ async_query_stream 188.000 i/100ms
76
+ Calculating -------------------------------------
77
+ async_query 847.191 (± 4.6%) i/s - 4.270k in 5.051650s
78
+ query 850.509 (± 3.8%) i/s - 4.312k in 5.078167s
79
+ async_query_stream 1.757k (± 7.3%) i/s - 8.836k in 5.057142s
80
+
81
+
82
+ Benchmark: Get all records ======================================
83
+ Warming up --------------------------------------
84
+ async_query 40.000 i/100ms
85
+ query 40.000 i/100ms
86
+ async_query_stream 39.000 i/100ms
87
+ Calculating -------------------------------------
88
+ async_query 402.567 (± 0.5%) i/s - 2.040k in 5.067639s
89
+ query 406.632 (± 0.7%) i/s - 2.040k in 5.017079s
90
+ async_query_stream 395.532 (± 0.8%) i/s - 1.989k in 5.028955s
@@ -1,31 +1,64 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'mkmf'
2
4
 
3
- def duckdb_library_available?(func)
4
- header = find_header('duckdb.h') || find_header('duckdb.h', '/opt/homebrew/include')
5
- library = have_func(func, 'duckdb.h') || find_library('duckdb', func, '/opt/homebrew/opt/duckdb/lib')
6
- header && library
5
+ DUCKDB_REQUIRED_VERSION = '0.8.0'
6
+
7
+ def check_duckdb_header(header, version)
8
+ found = find_header(
9
+ header,
10
+ '/opt/homebrew/include',
11
+ '/opt/homebrew/opt/duckdb/include',
12
+ '/opt/local/include'
13
+ )
14
+ return if found
15
+
16
+ msg = "#{header} is not found. Install #{header} of duckdb >= #{version}."
17
+ print_message(msg)
18
+ raise msg
7
19
  end
8
20
 
9
- def check_duckdb_library(func, version)
10
- return if duckdb_library_available?(func)
21
+ def check_duckdb_library(library, func, version)
22
+ found = find_library(
23
+ library,
24
+ func,
25
+ '/opt/homebrew/lib',
26
+ '/opt/homebrew/opt/duckdb/lib',
27
+ '/opt/local/lib'
28
+ )
29
+ return if found
11
30
 
12
- msg = "duckdb >= #{version} is not found. Install duckdb >= #{version} library and header file."
13
- puts ''
14
- puts '*' * 80
15
- puts msg
16
- puts '*' * 80
17
- puts ''
31
+ library_name = duckdb_library_name(library)
32
+ msg = "#{library_name} is not found. Install #{library_name} of duckdb >= #{version}."
33
+ print_message(msg)
18
34
  raise msg
19
35
  end
20
36
 
37
+ def duckdb_library_name(library)
38
+ "lib#{library}.(so|dylib|dll)"
39
+ end
40
+
41
+ def print_message(msg)
42
+ print <<~END_OF_MESSAGE
43
+
44
+ #{'*' * 80}
45
+ #{msg}
46
+ #{'*' * 80}
47
+
48
+ END_OF_MESSAGE
49
+ end
50
+
21
51
  dir_config('duckdb')
22
52
 
53
+ check_duckdb_header('duckdb.h', DUCKDB_REQUIRED_VERSION)
54
+
23
55
  # check duckdb >= 0.8.0
24
- check_duckdb_library('duckdb_string_is_inlined', '0.8.0')
56
+ check_duckdb_library('duckdb', 'duckdb_string_is_inlined', DUCKDB_REQUIRED_VERSION)
25
57
 
26
58
  # check duckdb >= 0.9.0
27
59
  have_func('duckdb_bind_parameter_index', 'duckdb.h')
28
60
 
61
+ # duckdb_parameter_name is not found on Windows.
29
62
  have_func('duckdb_parameter_name', 'duckdb.h')
30
63
 
31
64
  create_makefile('duckdb/duckdb_native')
@@ -5,7 +5,7 @@ static VALUE cDuckDBPendingResult;
5
5
  static void deallocate(void *ctx);
6
6
  static VALUE allocate(VALUE klass);
7
7
  static size_t memsize(const void *p);
8
- static VALUE duckdb_pending_result_initialize(VALUE self, VALUE oDuckDBPreparedStatement);
8
+ static VALUE duckdb_pending_result_initialize(int argc, VALUE *args, VALUE self);
9
9
  static VALUE duckdb_pending_result_execute_task(VALUE self);
10
10
  static VALUE duckdb_pending_result_execute_pending(VALUE self);
11
11
 
@@ -38,11 +38,27 @@ static size_t memsize(const void *p) {
38
38
  return sizeof(rubyDuckDBPendingResult);
39
39
  }
40
40
 
41
- static VALUE duckdb_pending_result_initialize(VALUE self, VALUE oDuckDBPreparedStatement) {
41
+ static VALUE duckdb_pending_result_initialize(int argc, VALUE *argv, VALUE self) {
42
+ VALUE oDuckDBPreparedStatement;
43
+ VALUE streaming_p = Qfalse;
44
+ duckdb_state state;
45
+
46
+ rb_scan_args(argc, argv, "11", &oDuckDBPreparedStatement, &streaming_p);
47
+
48
+ if (rb_obj_is_kind_of(oDuckDBPreparedStatement, cDuckDBPreparedStatement) != Qtrue) {
49
+ rb_raise(rb_eTypeError, "1st argument must be DuckDB::PreparedStatement");
50
+ }
51
+
42
52
  rubyDuckDBPendingResult *ctx = get_struct_pending_result(self);
43
53
  rubyDuckDBPreparedStatement *stmt = get_struct_prepared_statement(oDuckDBPreparedStatement);
44
54
 
45
- if (duckdb_pending_prepared(stmt->prepared_statement, &(ctx->pending_result)) == DuckDBError) {
55
+ if (!NIL_P(streaming_p) && streaming_p == Qtrue) {
56
+ state = duckdb_pending_prepared_streaming(stmt->prepared_statement, &(ctx->pending_result));
57
+ } else {
58
+ state = duckdb_pending_prepared(stmt->prepared_statement, &(ctx->pending_result));
59
+ }
60
+
61
+ if (state == DuckDBError) {
46
62
  rb_raise(eDuckDBError, "%s", duckdb_pending_error(ctx->pending_result));
47
63
  }
48
64
  return self;
@@ -112,7 +128,7 @@ void rbduckdb_init_duckdb_pending_result(void) {
112
128
  cDuckDBPendingResult = rb_define_class_under(mDuckDB, "PendingResult", rb_cObject);
113
129
  rb_define_alloc_func(cDuckDBPendingResult, allocate);
114
130
 
115
- rb_define_method(cDuckDBPendingResult, "initialize", duckdb_pending_result_initialize, 1);
131
+ rb_define_method(cDuckDBPendingResult, "initialize", duckdb_pending_result_initialize, -1);
116
132
  rb_define_method(cDuckDBPendingResult, "execute_task", duckdb_pending_result_execute_task, 0);
117
133
  rb_define_method(cDuckDBPendingResult, "execute_pending", duckdb_pending_result_execute_pending, 0);
118
134
 
@@ -1,6 +1,6 @@
1
1
  #include "ruby-duckdb.h"
2
2
 
3
- static VALUE cDuckDBPreparedStatement;
3
+ VALUE cDuckDBPreparedStatement;
4
4
 
5
5
  static void deallocate(void *ctx);
6
6
  static VALUE allocate(VALUE klass);
data/ext/duckdb/result.c CHANGED
@@ -19,6 +19,11 @@ static VALUE duckdb_result_column_count(VALUE oDuckDBResult);
19
19
  static VALUE duckdb_result_row_count(VALUE oDuckDBResult);
20
20
  static VALUE duckdb_result_rows_changed(VALUE oDuckDBResult);
21
21
  static VALUE duckdb_result_columns(VALUE oDuckDBResult);
22
+ static VALUE duckdb_result_streaming_p(VALUE oDuckDBResult);
23
+ static VALUE duckdb_result_chunk_each(VALUE oDuckDBResult);
24
+
25
+ static VALUE duckdb_result__chunk_stream(VALUE oDuckDBResult);
26
+ static void yield_rows(duckdb_data_chunk chunk, idx_t col_count);
22
27
  static VALUE duckdb_result__column_type(VALUE oDuckDBResult, VALUE col_idx);
23
28
  static VALUE duckdb_result__is_null(VALUE oDuckDBResult, VALUE row_idx, VALUE col_idx);
24
29
  static VALUE duckdb_result__to_boolean(VALUE oDuckDBResult, VALUE row_idx, VALUE col_idx);
@@ -50,7 +55,6 @@ static VALUE vector_map(duckdb_logical_type ty, duckdb_vector vector, idx_t row_
50
55
  static VALUE vector_struct(duckdb_logical_type ty, duckdb_vector vector, idx_t row_idx);
51
56
  static VALUE vector_uuid(void* vector_data, idx_t row_idx);
52
57
  static VALUE vector_value(duckdb_vector vector, idx_t row_idx);
53
- static VALUE duckdb_result_chunk_each(VALUE oDuckDBResult);
54
58
 
55
59
  static const rb_data_type_t result_data_type = {
56
60
  "DuckDB/Result",
@@ -235,6 +239,79 @@ static VALUE duckdb_result_columns(VALUE oDuckDBResult) {
235
239
  return ary;
236
240
  }
237
241
 
242
+ /*
243
+ * call-seq:
244
+ * result.streaming? -> Boolean
245
+ *
246
+ * Returns true if the result is streaming, otherwise false.
247
+ *
248
+ */
249
+ static VALUE duckdb_result_streaming_p(VALUE oDuckDBResult) {
250
+ rubyDuckDBResult *ctx;
251
+ TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
252
+ return duckdb_result_is_streaming(ctx->result) ? Qtrue : Qfalse;
253
+ }
254
+
255
+ static VALUE duckdb_result_chunk_each(VALUE oDuckDBResult) {
256
+ rubyDuckDBResult *ctx;
257
+ idx_t col_count;
258
+ idx_t chunk_count;
259
+ idx_t chunk_idx;
260
+ duckdb_data_chunk chunk;
261
+
262
+ TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
263
+
264
+ col_count = duckdb_column_count(&(ctx->result));
265
+ chunk_count = duckdb_result_chunk_count(ctx->result);
266
+
267
+ RETURN_ENUMERATOR(oDuckDBResult, 0, 0);
268
+
269
+ for (chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) {
270
+ chunk = duckdb_result_get_chunk(ctx->result, chunk_idx);
271
+ yield_rows(chunk, col_count);
272
+ duckdb_destroy_data_chunk(&chunk);
273
+ }
274
+ return Qnil;
275
+ }
276
+
277
+ static VALUE duckdb_result__chunk_stream(VALUE oDuckDBResult) {
278
+ rubyDuckDBResult *ctx;
279
+ duckdb_data_chunk chunk;
280
+ idx_t col_count;
281
+
282
+ TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
283
+
284
+ RETURN_ENUMERATOR(oDuckDBResult, 0, 0);
285
+
286
+ col_count = duckdb_column_count(&(ctx->result));
287
+
288
+ while((chunk = duckdb_stream_fetch_chunk(ctx->result)) != NULL) {
289
+ yield_rows(chunk, col_count);
290
+ duckdb_destroy_data_chunk(&chunk);
291
+ }
292
+ return Qnil;
293
+ }
294
+
295
+ static void yield_rows(duckdb_data_chunk chunk, idx_t col_count) {
296
+ idx_t row_count;
297
+ idx_t row_idx;
298
+ idx_t col_idx;
299
+ duckdb_vector vector;
300
+ VALUE row;
301
+ VALUE val;
302
+
303
+ row_count = duckdb_data_chunk_get_size(chunk);
304
+ for (row_idx = 0; row_idx < row_count; row_idx++) {
305
+ row = rb_ary_new2(col_count);
306
+ for (col_idx = 0; col_idx < col_count; col_idx++) {
307
+ vector = duckdb_data_chunk_get_vector(chunk, col_idx);
308
+ val = vector_value(vector, row_idx);
309
+ rb_ary_store(row, col_idx, val);
310
+ }
311
+ rb_yield(row);
312
+ }
313
+ }
314
+
238
315
  static VALUE duckdb_result__column_type(VALUE oDuckDBResult, VALUE col_idx) {
239
316
  rubyDuckDBResult *ctx;
240
317
  TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
@@ -405,32 +482,32 @@ static VALUE vector_date(void *vector_data, idx_t row_idx) {
405
482
  duckdb_date_struct date = duckdb_from_date(((duckdb_date *) vector_data)[row_idx]);
406
483
 
407
484
  return rb_funcall(mDuckDBConverter, rb_intern("_to_date"), 3,
408
- INT2FIX(date.year),
409
- INT2FIX(date.month),
410
- INT2FIX(date.day)
411
- );
485
+ INT2FIX(date.year),
486
+ INT2FIX(date.month),
487
+ INT2FIX(date.day)
488
+ );
412
489
  }
413
490
 
414
491
  static VALUE vector_timestamp(void* vector_data, idx_t row_idx) {
415
492
  duckdb_timestamp_struct data = duckdb_from_timestamp(((duckdb_timestamp *)vector_data)[row_idx]);
416
493
  return rb_funcall(mDuckDBConverter, rb_intern("_to_time"), 7,
417
- INT2FIX(data.date.year),
418
- INT2FIX(data.date.month),
419
- INT2FIX(data.date.day),
420
- INT2FIX(data.time.hour),
421
- INT2FIX(data.time.min),
422
- INT2FIX(data.time.sec),
423
- INT2NUM(data.time.micros)
424
- );
494
+ INT2FIX(data.date.year),
495
+ INT2FIX(data.date.month),
496
+ INT2FIX(data.date.day),
497
+ INT2FIX(data.time.hour),
498
+ INT2FIX(data.time.min),
499
+ INT2FIX(data.time.sec),
500
+ INT2NUM(data.time.micros)
501
+ );
425
502
  }
426
503
 
427
504
  static VALUE vector_interval(void* vector_data, idx_t row_idx) {
428
505
  duckdb_interval data = ((duckdb_interval *)vector_data)[row_idx];
429
506
  return rb_funcall(mDuckDBConverter, rb_intern("_to_interval_from_vector"), 3,
430
- INT2NUM(data.months),
431
- INT2NUM(data.days),
432
- LL2NUM(data.micros)
433
- );
507
+ INT2NUM(data.months),
508
+ INT2NUM(data.days),
509
+ LL2NUM(data.micros)
510
+ );
434
511
  }
435
512
 
436
513
  static VALUE vector_blob(void* vector_data, idx_t row_idx) {
@@ -454,9 +531,9 @@ static VALUE vector_varchar(void* vector_data, idx_t row_idx) {
454
531
  static VALUE vector_hugeint(void* vector_data, idx_t row_idx) {
455
532
  duckdb_hugeint hugeint = ((duckdb_hugeint *)vector_data)[row_idx];
456
533
  return rb_funcall(mDuckDBConverter, rb_intern("_to_hugeint_from_vector"), 2,
457
- ULL2NUM(hugeint.lower),
458
- LL2NUM(hugeint.upper)
459
- );
534
+ ULL2NUM(hugeint.lower),
535
+ LL2NUM(hugeint.upper)
536
+ );
460
537
  }
461
538
 
462
539
  static VALUE vector_decimal(duckdb_logical_type ty, void* vector_data, idx_t row_idx) {
@@ -477,11 +554,11 @@ static VALUE vector_decimal(duckdb_logical_type ty, void* vector_data, idx_t row
477
554
  }
478
555
 
479
556
  return rb_funcall(mDuckDBConverter, rb_intern("_to_decimal_from_vector"), 4,
480
- INT2FIX(width),
481
- INT2FIX(scale),
482
- ULL2NUM(value.lower),
483
- LL2NUM(value.upper)
484
- );
557
+ INT2FIX(width),
558
+ INT2FIX(scale),
559
+ ULL2NUM(value.lower),
560
+ LL2NUM(value.upper)
561
+ );
485
562
  }
486
563
 
487
564
  static VALUE vector_enum(duckdb_logical_type ty, void* vector_data, idx_t row_idx) {
@@ -577,9 +654,9 @@ static VALUE vector_struct(duckdb_logical_type ty, duckdb_vector vector, idx_t r
577
654
  static VALUE vector_uuid(void* vector_data, idx_t row_idx) {
578
655
  duckdb_hugeint hugeint = ((duckdb_hugeint *)vector_data)[row_idx];
579
656
  return rb_funcall(mDuckDBConverter, rb_intern("_to_uuid_from_vector"), 2,
580
- ULL2NUM(hugeint.lower),
581
- LL2NUM(hugeint.upper)
582
- );
657
+ ULL2NUM(hugeint.lower),
658
+ LL2NUM(hugeint.upper)
659
+ );
583
660
  }
584
661
 
585
662
  static VALUE vector_value(duckdb_vector vector, idx_t row_idx) {
@@ -617,7 +694,7 @@ static VALUE vector_value(duckdb_vector vector, idx_t row_idx) {
617
694
  case DUCKDB_TYPE_BIGINT:
618
695
  obj = LL2NUM(((int64_t *) vector_data)[row_idx]);
619
696
  break;
620
- case DUCKDB_TYPE_UTINYINT:
697
+ case DUCKDB_TYPE_UTINYINT:
621
698
  obj = INT2FIX(((uint8_t *) vector_data)[row_idx]);
622
699
  break;
623
700
  case DUCKDB_TYPE_USMALLINT:
@@ -680,43 +757,6 @@ static VALUE vector_value(duckdb_vector vector, idx_t row_idx) {
680
757
  return obj;
681
758
  }
682
759
 
683
- static VALUE duckdb_result_chunk_each(VALUE oDuckDBResult) {
684
- rubyDuckDBResult *ctx;
685
- VALUE row;
686
- idx_t col_count;
687
- idx_t row_count;
688
- idx_t chunk_count;
689
- idx_t col_idx;
690
- idx_t row_idx;
691
- idx_t chunk_idx;
692
- duckdb_data_chunk chunk;
693
- duckdb_vector vector;
694
- VALUE val;
695
-
696
- TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
697
-
698
- col_count = duckdb_column_count(&(ctx->result));
699
- chunk_count = duckdb_result_chunk_count(ctx->result);
700
-
701
- RETURN_ENUMERATOR(oDuckDBResult, 0, 0);
702
-
703
- for (chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) {
704
- chunk = duckdb_result_get_chunk(ctx->result, chunk_idx);
705
- row_count = duckdb_data_chunk_get_size(chunk);
706
- for (row_idx = 0; row_idx < row_count; row_idx++) {
707
- row = rb_ary_new2(col_count);
708
- for (col_idx = 0; col_idx < col_count; col_idx++) {
709
- vector = duckdb_data_chunk_get_vector(chunk, col_idx);
710
- val = vector_value(vector, row_idx);
711
- rb_ary_store(row, col_idx, val);
712
- }
713
- rb_yield(row);
714
- }
715
- duckdb_destroy_data_chunk(&chunk);
716
- }
717
- return Qnil;
718
- }
719
-
720
760
  void rbduckdb_init_duckdb_result(void) {
721
761
  cDuckDBResult = rb_define_class_under(mDuckDB, "Result", rb_cObject);
722
762
  rb_define_alloc_func(cDuckDBResult, allocate);
@@ -725,6 +765,9 @@ void rbduckdb_init_duckdb_result(void) {
725
765
  rb_define_method(cDuckDBResult, "row_count", duckdb_result_row_count, 0);
726
766
  rb_define_method(cDuckDBResult, "rows_changed", duckdb_result_rows_changed, 0);
727
767
  rb_define_method(cDuckDBResult, "columns", duckdb_result_columns, 0);
768
+ rb_define_method(cDuckDBResult, "streaming?", duckdb_result_streaming_p, 0);
769
+ rb_define_method(cDuckDBResult, "chunk_each", duckdb_result_chunk_each, 0);
770
+ rb_define_private_method(cDuckDBResult, "_chunk_stream", duckdb_result__chunk_stream, 0);
728
771
  rb_define_private_method(cDuckDBResult, "_column_type", duckdb_result__column_type, 1);
729
772
  rb_define_private_method(cDuckDBResult, "_null?", duckdb_result__is_null, 2);
730
773
  rb_define_private_method(cDuckDBResult, "_to_boolean", duckdb_result__to_boolean, 2);
@@ -742,5 +785,4 @@ void rbduckdb_init_duckdb_result(void) {
742
785
  rb_define_private_method(cDuckDBResult, "_enum_internal_type", duckdb_result__enum_internal_type, 1);
743
786
  rb_define_private_method(cDuckDBResult, "_enum_dictionary_size", duckdb_result__enum_dictionary_size, 1);
744
787
  rb_define_private_method(cDuckDBResult, "_enum_dictionary_value", duckdb_result__enum_dictionary_value, 2);
745
- rb_define_method(cDuckDBResult, "chunk_each", duckdb_result_chunk_each, 0);
746
788
  }
@@ -29,5 +29,6 @@ extern VALUE cDuckDBBlob;
29
29
  extern VALUE cDuckDBConfig;
30
30
  extern VALUE eDuckDBError;
31
31
  extern VALUE mDuckDBConverter;
32
+ extern VALUE cDuckDBPreparedStatement;
32
33
 
33
34
  #endif
@@ -42,14 +42,12 @@ module DuckDB
42
42
  # require 'duckdb'
43
43
  # db = DuckDB::Database.open('duckdb_file')
44
44
  # con = db.connect
45
- # pending_result = con.async_query('SELECT * FROM users')
46
- # sql = 'SELECT * FROM users WHERE name = ? AND email = ?'
47
- # pending_result = con.async_query(sql, 'Dave', 'dave@example.com')
48
- #
49
- # # or You can use named parameter.
50
45
  #
51
46
  # sql = 'SELECT * FROM users WHERE name = $name AND email = $email'
52
47
  # pending_result = con.async_query(sql, name: 'Dave', email: 'dave@example.com')
48
+ # pending_result.execute_task while pending_result.state == :not_ready
49
+ # result = pending_result.execute_pending
50
+ # result.each.first
53
51
  #
54
52
  def async_query(sql, *args, **kwargs)
55
53
  stmt = PreparedStatement.new(self, sql)
@@ -57,6 +55,30 @@ module DuckDB
57
55
  stmt.pending_prepared
58
56
  end
59
57
 
58
+ #
59
+ # executes sql with args asynchronously and provides streaming result.
60
+ # The first argument sql must be SQL string.
61
+ # The rest arguments are parameters of SQL string.
62
+ # This method returns DuckDB::PendingResult object.
63
+ #
64
+ # require 'duckdb'
65
+ # DuckDB::Result.use_chunk_each = true # must be true
66
+ # db = DuckDB::Database.open('duckdb_file')
67
+ # con = db.connect
68
+ #
69
+ # sql = 'SELECT * FROM users WHERE name = $name AND email = $email'
70
+ # pending_result = con.async_query_stream(sql, name: 'Dave', email: 'dave@example.com')
71
+ #
72
+ # pending_result.execute_task while pending_result.state == :not_ready
73
+ # result = pending_result.execute_pending
74
+ # result.each.first
75
+ #
76
+ def async_query_stream(sql, *args, **kwargs)
77
+ stmt = PreparedStatement.new(self, sql)
78
+ stmt.bind_args(*args, **kwargs)
79
+ stmt.pending_prepared_stream
80
+ end
81
+
60
82
  #
61
83
  # connects DuckDB database
62
84
  # The first argument is DuckDB::Database object
@@ -24,6 +24,12 @@ module DuckDB
24
24
  PendingResult.new(self)
25
25
  end
26
26
 
27
+ def pending_prepared_stream
28
+ raise DuckDB::Error, 'DuckDB::Result.use_chunk_each must be true.' unless DuckDB::Result.use_chunk_each?
29
+
30
+ PendingResult.new(self, true)
31
+ end
32
+
27
33
  # binds all parameters with SQL prepared statement.
28
34
  #
29
35
  # require 'duckdb'
data/lib/duckdb/result.rb CHANGED
@@ -58,9 +58,15 @@ module DuckDB
58
58
 
59
59
  def each
60
60
  if self.class.use_chunk_each?
61
- return chunk_each unless block_given?
61
+ if streaming?
62
+ return _chunk_stream unless block_given?
62
63
 
63
- chunk_each { |row| yield row }
64
+ _chunk_stream { |row| yield row }
65
+ else
66
+ return chunk_each unless block_given?
67
+
68
+ chunk_each { |row| yield row }
69
+ end
64
70
  else
65
71
  warn('this `each` behavior will be deprecated in the future. set `DuckDB::Result.use_chunk_each = true` to use new `each` behavior.')
66
72
  return to_enum { row_size } unless block_given?
@@ -3,5 +3,5 @@
3
3
  module DuckDB
4
4
  # The version string of ruby-duckdb.
5
5
  # Currently, ruby-duckdb is NOT semantic versioning.
6
- VERSION = '0.9.1.2'
6
+ VERSION = '0.9.2'
7
7
  end
@@ -0,0 +1,24 @@
1
+ require 'duckdb'
2
+
3
+ DuckDB::Result.use_chunk_each = true
4
+ DuckDB::Database.open do |db|
5
+ db.connect do |con|
6
+ con.query('SET threads=1')
7
+ con.query('CREATE TABLE tbl as SELECT range a, mod(range, 10) b FROM range(10000)')
8
+ con.query('CREATE TABLE tbl2 as SELECT range a, mod(range, 10) b FROM range(10000)')
9
+ # con.query('SET ENABLE_PROGRESS_BAR=true')
10
+ # con.query('SET ENABLE_PROGRESS_BAR_PRINT=false')
11
+ pending_result = con.async_query('SELECT * FROM tbl where b = (SELECT min(b) FROM tbl2)')
12
+
13
+ # con.interrupt
14
+ while pending_result.state == :not_ready
15
+ pending_result.execute_task
16
+ print '.'
17
+ $stdout.flush
18
+ sleep 0.01
19
+ end
20
+ result = pending_result.execute_pending
21
+ puts
22
+ p result.each.first
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ require 'duckdb'
2
+
3
+ DuckDB::Result.use_chunk_each = true
4
+ DuckDB::Database.open do |db|
5
+ db.connect do |con|
6
+ con.query('SET threads=1')
7
+ con.query('CREATE TABLE tbl as SELECT range a, mod(range, 10) b FROM range(10000)')
8
+ con.query('CREATE TABLE tbl2 as SELECT range a, mod(range, 10) b FROM range(10000)')
9
+ # con.query('SET ENABLE_PROGRESS_BAR=true')
10
+ # con.query('SET ENABLE_PROGRESS_BAR_PRINT=false')
11
+ pending_result = con.async_query_stream('SELECT * FROM tbl where b = (SELECT min(b) FROM tbl2)')
12
+
13
+ # con.interrupt
14
+ while pending_result.state == :not_ready
15
+ pending_result.execute_task
16
+ print '.'
17
+ $stdout.flush
18
+ sleep 0.01
19
+ end
20
+ result = pending_result.execute_pending
21
+ puts
22
+ p result.each.first
23
+ end
24
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: duckdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.1.2
4
+ version: 0.9.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masaki Suketa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-05 00:00:00.000000000 Z
11
+ date: 2023-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -75,6 +75,7 @@ extensions:
75
75
  - ext/duckdb/extconf.rb
76
76
  extra_rdoc_files: []
77
77
  files:
78
+ - ".gitattributes"
78
79
  - ".github/FUNDING.yml"
79
80
  - ".github/workflows/test_on_macos.yml"
80
81
  - ".github/workflows/test_on_ubuntu.yml"
@@ -88,6 +89,7 @@ files:
88
89
  - LICENSE
89
90
  - README.md
90
91
  - Rakefile
92
+ - benchmark/async_query.rb
91
93
  - benchmark/converter_hugeint_ips.rb
92
94
  - benchmark/get_converter_module_ips.rb
93
95
  - benchmark/to_bigdecimal_ips.rb
@@ -138,6 +140,8 @@ files:
138
140
  - lib/duckdb/prepared_statement.rb
139
141
  - lib/duckdb/result.rb
140
142
  - lib/duckdb/version.rb
143
+ - sample/async_query.rb
144
+ - sample/async_query_stream.rb
141
145
  homepage: https://github.com/suketa/ruby-duckdb
142
146
  licenses:
143
147
  - MIT