duckdb 0.9.1.2 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitattributes +1 -0
- data/.github/workflows/test_on_macos.yml +6 -6
- data/.github/workflows/test_on_ubuntu.yml +6 -6
- data/.github/workflows/test_on_windows.yml +4 -4
- data/CHANGELOG.md +7 -0
- data/Dockerfile +1 -1
- data/Gemfile.lock +4 -4
- data/README.md +17 -1
- data/benchmark/async_query.rb +90 -0
- data/ext/duckdb/extconf.rb +46 -13
- data/ext/duckdb/pending_result.c +20 -4
- data/ext/duckdb/prepared_statement.c +1 -1
- data/ext/duckdb/result.c +109 -67
- data/ext/duckdb/ruby-duckdb.h +1 -0
- data/lib/duckdb/connection.rb +27 -5
- data/lib/duckdb/prepared_statement.rb +6 -0
- data/lib/duckdb/result.rb +8 -2
- data/lib/duckdb/version.rb +1 -1
- data/sample/async_query.rb +24 -0
- data/sample/async_query_stream.rb +24 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: afbb8627bfcb9764d6928d64558b55c18b26932b1ad50946cfe777169b6986f9
|
4
|
+
data.tar.gz: b560803df1d4b93d1e1b9f5c56b55f779c34095780592b9cdae51f7d4da2c0ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c16acf784261f874501381aa9c764bc461d56fd35b066bae88c6cf0b4de654a867a6d309641cf0cf39d60854d268ce610fd3997534adcd11f322e7e4cd76401
|
7
|
+
data.tar.gz: 24d26d68b843c27ca306980e0d1acdfed2c6f24bb573cb89ba684879c4d05d43256d2335085e82fb1c352b7cdffcd016f2c2dadf3d924fc747cc4c450b142693
|
data/.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
*.rb diff=ruby
|
@@ -1,9 +1,9 @@
|
|
1
1
|
name: MacOS
|
2
2
|
|
3
3
|
on:
|
4
|
-
push:
|
5
|
-
|
6
|
-
|
4
|
+
# push:
|
5
|
+
# branches:
|
6
|
+
# - main
|
7
7
|
pull_request:
|
8
8
|
types:
|
9
9
|
- opened
|
@@ -15,8 +15,8 @@ jobs:
|
|
15
15
|
runs-on: macos-latest
|
16
16
|
strategy:
|
17
17
|
matrix:
|
18
|
-
ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', '3.3.0-
|
19
|
-
duckdb: ['0.9.
|
18
|
+
ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', '3.3.0-preview3', 'head']
|
19
|
+
duckdb: ['0.9.2', '0.8.1']
|
20
20
|
|
21
21
|
steps:
|
22
22
|
- uses: actions/checkout@v3
|
@@ -59,7 +59,7 @@ jobs:
|
|
59
59
|
|
60
60
|
- name: run test with Ruby ${{ matrix.ruby }}
|
61
61
|
run: |
|
62
|
-
rake test
|
62
|
+
env RUBYOPT=-W:deprecated rake test
|
63
63
|
|
64
64
|
post-test:
|
65
65
|
name: All tests passed on macos
|
@@ -1,9 +1,9 @@
|
|
1
1
|
name: Ubuntu
|
2
2
|
|
3
3
|
on:
|
4
|
-
push:
|
5
|
-
|
6
|
-
|
4
|
+
# push:
|
5
|
+
# branches:
|
6
|
+
# - main
|
7
7
|
pull_request:
|
8
8
|
types:
|
9
9
|
- opened
|
@@ -15,8 +15,8 @@ jobs:
|
|
15
15
|
runs-on: ubuntu-latest
|
16
16
|
strategy:
|
17
17
|
matrix:
|
18
|
-
ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', '3.3.0-
|
19
|
-
duckdb: ['0.9.
|
18
|
+
ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', '3.3.0-preview3', 'head']
|
19
|
+
duckdb: ['0.9.2', '0.8.1']
|
20
20
|
|
21
21
|
steps:
|
22
22
|
- uses: actions/checkout@v3
|
@@ -57,7 +57,7 @@ jobs:
|
|
57
57
|
env:
|
58
58
|
DUCKDB_VERSION: ${{ matrix.duckdb }}
|
59
59
|
run: |
|
60
|
-
rake test
|
60
|
+
env RUBYOPT=-W:deprecated rake test
|
61
61
|
|
62
62
|
post-test:
|
63
63
|
name: All tests passed on Ubuntu
|
@@ -1,9 +1,9 @@
|
|
1
1
|
name: Windows
|
2
2
|
|
3
3
|
on:
|
4
|
-
push:
|
5
|
-
|
6
|
-
|
4
|
+
# push:
|
5
|
+
# branches:
|
6
|
+
# - main
|
7
7
|
pull_request:
|
8
8
|
types:
|
9
9
|
- opened
|
@@ -16,7 +16,7 @@ jobs:
|
|
16
16
|
strategy:
|
17
17
|
matrix:
|
18
18
|
ruby: ['2.7.8', '3.0.6', '3.1.4', '3.2.2', 'ucrt', 'mingw', 'mswin', 'head']
|
19
|
-
duckdb: ['0.9.
|
19
|
+
duckdb: ['0.9.2', '0.8.1']
|
20
20
|
|
21
21
|
steps:
|
22
22
|
- uses: actions/checkout@v3
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
# ChangeLog
|
2
2
|
|
3
|
+
# 0.9.2
|
4
|
+
- add DuckDB::Connection#async_query_stream
|
5
|
+
- DuckDB::PendingResult accepts second argument. If the second argument is
|
6
|
+
true, PendingResult#execute_pending returns streaming DuckDB::Result object.
|
7
|
+
- add DuckDB::PreparedStatement#pending_prepared_stream
|
8
|
+
- add DuckDB::Result#streaming?.
|
9
|
+
|
3
10
|
# 0.9.1.2
|
4
11
|
- add DuckDB::Connection#interrupt, DuckDB::Connection#query_progress
|
5
12
|
- add DuckDB::Connection#async_query, alias method async_execute.
|
data/Dockerfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
duckdb (0.9.
|
4
|
+
duckdb (0.9.2)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -9,16 +9,16 @@ GEM
|
|
9
9
|
benchmark-ips (2.12.0)
|
10
10
|
mini_portile2 (2.8.5)
|
11
11
|
minitest (5.20.0)
|
12
|
-
nokogiri (1.15.
|
12
|
+
nokogiri (1.15.5)
|
13
13
|
mini_portile2 (~> 2.8.2)
|
14
14
|
racc (~> 1.4)
|
15
|
-
nokogiri (1.15.
|
15
|
+
nokogiri (1.15.5-x86_64-linux)
|
16
16
|
racc (~> 1.4)
|
17
17
|
racc (1.7.3)
|
18
18
|
rake (13.1.0)
|
19
19
|
rake-compiler (1.2.5)
|
20
20
|
rake
|
21
|
-
ruby_memcheck (2.2.
|
21
|
+
ruby_memcheck (2.2.1)
|
22
22
|
nokogiri
|
23
23
|
stackprof (0.2.25)
|
24
24
|
|
data/README.md
CHANGED
@@ -104,10 +104,25 @@ con.query('SELECT * FROM users WHERE name = ? AND email = ?', 'Alice', 'alice@ex
|
|
104
104
|
con.query('SELECT * FROM users WHERE name = $name AND email = $email', name: 'Alice', email: 'alice@example.com')
|
105
105
|
```
|
106
106
|
|
107
|
+
### using async query
|
108
|
+
|
109
|
+
You can use async query.
|
110
|
+
|
111
|
+
```ruby
|
112
|
+
DuckDB::Result.use_chunk_each = true # must be true.
|
113
|
+
...
|
114
|
+
|
115
|
+
pending_result = con.async_query_stream('SLOW QUERY')
|
116
|
+
pending_result.execute_task while pending_result.state == :not_ready
|
117
|
+
|
118
|
+
result = pending_result.execute_pending
|
119
|
+
result.each.first
|
120
|
+
```
|
121
|
+
|
122
|
+
Here is [the benchmark](./benchmark/async_query.rb).
|
107
123
|
|
108
124
|
### using BLOB column
|
109
125
|
|
110
|
-
BLOB is available with DuckDB v0.2.5 or later.
|
111
126
|
Use `DuckDB::Blob.new` or use sting#force_encoding(Encoding::BINARY)
|
112
127
|
|
113
128
|
```ruby
|
@@ -119,6 +134,7 @@ DuckDB::Database.open do |db|
|
|
119
134
|
stmt = DuckDB::PreparedStatement.new(con, 'INSERT INTO blob_table VALUES ($1)')
|
120
135
|
|
121
136
|
stmt.bind(1, DuckDB::Blob.new("\0\1\2\3\4\5"))
|
137
|
+
# or
|
122
138
|
# stmt.bind(1, "\0\1\2\3\4\5".force_encoding(Encoding::BINARY))
|
123
139
|
stmt.execute
|
124
140
|
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require 'duckdb'
|
3
|
+
require 'benchmark/ips'
|
4
|
+
|
5
|
+
|
6
|
+
DuckDB::Result.use_chunk_each = true
|
7
|
+
DuckDB::Database.open do |db|
|
8
|
+
db.connect do |con|
|
9
|
+
con.query('SET threads=1')
|
10
|
+
con.query('CREATE TABLE tbl as SELECT range a, mod(range, 10) b FROM range(100000)')
|
11
|
+
con.query('CREATE TABLE tbl2 as SELECT range a, mod(range, 10) b FROM range(100000)')
|
12
|
+
query_sql = 'SELECT * FROM tbl where b = (SELECT min(b) FROM tbl2)'
|
13
|
+
print <<~END_OF_HEAD
|
14
|
+
|
15
|
+
Benchmark: Get first record ======================================
|
16
|
+
END_OF_HEAD
|
17
|
+
|
18
|
+
Benchmark.ips do |x|
|
19
|
+
x.report('async_query') do
|
20
|
+
pending_result = con.async_query(query_sql)
|
21
|
+
|
22
|
+
pending_result.execute_task while pending_result.state == :not_ready
|
23
|
+
result = pending_result.execute_pending
|
24
|
+
result.each.first
|
25
|
+
end
|
26
|
+
x.report('query') do
|
27
|
+
result = con.query(query_sql)
|
28
|
+
result.each.first
|
29
|
+
end
|
30
|
+
x.report('async_query_stream') do
|
31
|
+
pending_result = con.async_query_stream(query_sql)
|
32
|
+
|
33
|
+
pending_result.execute_task while pending_result.state == :not_ready
|
34
|
+
result = pending_result.execute_pending
|
35
|
+
result.each.first
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
print <<~END_OF_HEAD
|
40
|
+
|
41
|
+
|
42
|
+
Benchmark: Get all records ======================================
|
43
|
+
END_OF_HEAD
|
44
|
+
|
45
|
+
Benchmark.ips do |x|
|
46
|
+
x.report('async_query') do
|
47
|
+
pending_result = con.async_query(query_sql)
|
48
|
+
|
49
|
+
pending_result.execute_task while pending_result.state == :not_ready
|
50
|
+
result = pending_result.execute_pending
|
51
|
+
result.each.to_a
|
52
|
+
end
|
53
|
+
x.report('query') do
|
54
|
+
result = con.query(query_sql)
|
55
|
+
result.each.to_a
|
56
|
+
end
|
57
|
+
x.report('async_query_stream') do
|
58
|
+
pending_result = con.async_query_stream(query_sql)
|
59
|
+
|
60
|
+
pending_result.execute_task while pending_result.state == :not_ready
|
61
|
+
result = pending_result.execute_pending
|
62
|
+
result.each.to_a
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
__END__
|
69
|
+
|
70
|
+
results:
|
71
|
+
Benchmark: Get first record ======================================
|
72
|
+
Warming up --------------------------------------
|
73
|
+
async_query 70.000 i/100ms
|
74
|
+
query 88.000 i/100ms
|
75
|
+
async_query_stream 188.000 i/100ms
|
76
|
+
Calculating -------------------------------------
|
77
|
+
async_query 847.191 (± 4.6%) i/s - 4.270k in 5.051650s
|
78
|
+
query 850.509 (± 3.8%) i/s - 4.312k in 5.078167s
|
79
|
+
async_query_stream 1.757k (± 7.3%) i/s - 8.836k in 5.057142s
|
80
|
+
|
81
|
+
|
82
|
+
Benchmark: Get all records ======================================
|
83
|
+
Warming up --------------------------------------
|
84
|
+
async_query 40.000 i/100ms
|
85
|
+
query 40.000 i/100ms
|
86
|
+
async_query_stream 39.000 i/100ms
|
87
|
+
Calculating -------------------------------------
|
88
|
+
async_query 402.567 (± 0.5%) i/s - 2.040k in 5.067639s
|
89
|
+
query 406.632 (± 0.7%) i/s - 2.040k in 5.017079s
|
90
|
+
async_query_stream 395.532 (± 0.8%) i/s - 1.989k in 5.028955s
|
data/ext/duckdb/extconf.rb
CHANGED
@@ -1,31 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'mkmf'
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
5
|
+
DUCKDB_REQUIRED_VERSION = '0.8.0'
|
6
|
+
|
7
|
+
def check_duckdb_header(header, version)
|
8
|
+
found = find_header(
|
9
|
+
header,
|
10
|
+
'/opt/homebrew/include',
|
11
|
+
'/opt/homebrew/opt/duckdb/include',
|
12
|
+
'/opt/local/include'
|
13
|
+
)
|
14
|
+
return if found
|
15
|
+
|
16
|
+
msg = "#{header} is not found. Install #{header} of duckdb >= #{version}."
|
17
|
+
print_message(msg)
|
18
|
+
raise msg
|
7
19
|
end
|
8
20
|
|
9
|
-
def check_duckdb_library(func, version)
|
10
|
-
|
21
|
+
def check_duckdb_library(library, func, version)
|
22
|
+
found = find_library(
|
23
|
+
library,
|
24
|
+
func,
|
25
|
+
'/opt/homebrew/lib',
|
26
|
+
'/opt/homebrew/opt/duckdb/lib',
|
27
|
+
'/opt/local/lib'
|
28
|
+
)
|
29
|
+
return if found
|
11
30
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
puts msg
|
16
|
-
puts '*' * 80
|
17
|
-
puts ''
|
31
|
+
library_name = duckdb_library_name(library)
|
32
|
+
msg = "#{library_name} is not found. Install #{library_name} of duckdb >= #{version}."
|
33
|
+
print_message(msg)
|
18
34
|
raise msg
|
19
35
|
end
|
20
36
|
|
37
|
+
def duckdb_library_name(library)
|
38
|
+
"lib#{library}.(so|dylib|dll)"
|
39
|
+
end
|
40
|
+
|
41
|
+
def print_message(msg)
|
42
|
+
print <<~END_OF_MESSAGE
|
43
|
+
|
44
|
+
#{'*' * 80}
|
45
|
+
#{msg}
|
46
|
+
#{'*' * 80}
|
47
|
+
|
48
|
+
END_OF_MESSAGE
|
49
|
+
end
|
50
|
+
|
21
51
|
dir_config('duckdb')
|
22
52
|
|
53
|
+
check_duckdb_header('duckdb.h', DUCKDB_REQUIRED_VERSION)
|
54
|
+
|
23
55
|
# check duckdb >= 0.8.0
|
24
|
-
check_duckdb_library('
|
56
|
+
check_duckdb_library('duckdb', 'duckdb_string_is_inlined', DUCKDB_REQUIRED_VERSION)
|
25
57
|
|
26
58
|
# check duckdb >= 0.9.0
|
27
59
|
have_func('duckdb_bind_parameter_index', 'duckdb.h')
|
28
60
|
|
61
|
+
# duckdb_parameter_name is not found on Windows.
|
29
62
|
have_func('duckdb_parameter_name', 'duckdb.h')
|
30
63
|
|
31
64
|
create_makefile('duckdb/duckdb_native')
|
data/ext/duckdb/pending_result.c
CHANGED
@@ -5,7 +5,7 @@ static VALUE cDuckDBPendingResult;
|
|
5
5
|
static void deallocate(void *ctx);
|
6
6
|
static VALUE allocate(VALUE klass);
|
7
7
|
static size_t memsize(const void *p);
|
8
|
-
static VALUE duckdb_pending_result_initialize(VALUE
|
8
|
+
static VALUE duckdb_pending_result_initialize(int argc, VALUE *args, VALUE self);
|
9
9
|
static VALUE duckdb_pending_result_execute_task(VALUE self);
|
10
10
|
static VALUE duckdb_pending_result_execute_pending(VALUE self);
|
11
11
|
|
@@ -38,11 +38,27 @@ static size_t memsize(const void *p) {
|
|
38
38
|
return sizeof(rubyDuckDBPendingResult);
|
39
39
|
}
|
40
40
|
|
41
|
-
static VALUE duckdb_pending_result_initialize(VALUE
|
41
|
+
static VALUE duckdb_pending_result_initialize(int argc, VALUE *argv, VALUE self) {
|
42
|
+
VALUE oDuckDBPreparedStatement;
|
43
|
+
VALUE streaming_p = Qfalse;
|
44
|
+
duckdb_state state;
|
45
|
+
|
46
|
+
rb_scan_args(argc, argv, "11", &oDuckDBPreparedStatement, &streaming_p);
|
47
|
+
|
48
|
+
if (rb_obj_is_kind_of(oDuckDBPreparedStatement, cDuckDBPreparedStatement) != Qtrue) {
|
49
|
+
rb_raise(rb_eTypeError, "1st argument must be DuckDB::PreparedStatement");
|
50
|
+
}
|
51
|
+
|
42
52
|
rubyDuckDBPendingResult *ctx = get_struct_pending_result(self);
|
43
53
|
rubyDuckDBPreparedStatement *stmt = get_struct_prepared_statement(oDuckDBPreparedStatement);
|
44
54
|
|
45
|
-
if (
|
55
|
+
if (!NIL_P(streaming_p) && streaming_p == Qtrue) {
|
56
|
+
state = duckdb_pending_prepared_streaming(stmt->prepared_statement, &(ctx->pending_result));
|
57
|
+
} else {
|
58
|
+
state = duckdb_pending_prepared(stmt->prepared_statement, &(ctx->pending_result));
|
59
|
+
}
|
60
|
+
|
61
|
+
if (state == DuckDBError) {
|
46
62
|
rb_raise(eDuckDBError, "%s", duckdb_pending_error(ctx->pending_result));
|
47
63
|
}
|
48
64
|
return self;
|
@@ -112,7 +128,7 @@ void rbduckdb_init_duckdb_pending_result(void) {
|
|
112
128
|
cDuckDBPendingResult = rb_define_class_under(mDuckDB, "PendingResult", rb_cObject);
|
113
129
|
rb_define_alloc_func(cDuckDBPendingResult, allocate);
|
114
130
|
|
115
|
-
rb_define_method(cDuckDBPendingResult, "initialize", duckdb_pending_result_initialize, 1);
|
131
|
+
rb_define_method(cDuckDBPendingResult, "initialize", duckdb_pending_result_initialize, -1);
|
116
132
|
rb_define_method(cDuckDBPendingResult, "execute_task", duckdb_pending_result_execute_task, 0);
|
117
133
|
rb_define_method(cDuckDBPendingResult, "execute_pending", duckdb_pending_result_execute_pending, 0);
|
118
134
|
|
data/ext/duckdb/result.c
CHANGED
@@ -19,6 +19,11 @@ static VALUE duckdb_result_column_count(VALUE oDuckDBResult);
|
|
19
19
|
static VALUE duckdb_result_row_count(VALUE oDuckDBResult);
|
20
20
|
static VALUE duckdb_result_rows_changed(VALUE oDuckDBResult);
|
21
21
|
static VALUE duckdb_result_columns(VALUE oDuckDBResult);
|
22
|
+
static VALUE duckdb_result_streaming_p(VALUE oDuckDBResult);
|
23
|
+
static VALUE duckdb_result_chunk_each(VALUE oDuckDBResult);
|
24
|
+
|
25
|
+
static VALUE duckdb_result__chunk_stream(VALUE oDuckDBResult);
|
26
|
+
static void yield_rows(duckdb_data_chunk chunk, idx_t col_count);
|
22
27
|
static VALUE duckdb_result__column_type(VALUE oDuckDBResult, VALUE col_idx);
|
23
28
|
static VALUE duckdb_result__is_null(VALUE oDuckDBResult, VALUE row_idx, VALUE col_idx);
|
24
29
|
static VALUE duckdb_result__to_boolean(VALUE oDuckDBResult, VALUE row_idx, VALUE col_idx);
|
@@ -50,7 +55,6 @@ static VALUE vector_map(duckdb_logical_type ty, duckdb_vector vector, idx_t row_
|
|
50
55
|
static VALUE vector_struct(duckdb_logical_type ty, duckdb_vector vector, idx_t row_idx);
|
51
56
|
static VALUE vector_uuid(void* vector_data, idx_t row_idx);
|
52
57
|
static VALUE vector_value(duckdb_vector vector, idx_t row_idx);
|
53
|
-
static VALUE duckdb_result_chunk_each(VALUE oDuckDBResult);
|
54
58
|
|
55
59
|
static const rb_data_type_t result_data_type = {
|
56
60
|
"DuckDB/Result",
|
@@ -235,6 +239,79 @@ static VALUE duckdb_result_columns(VALUE oDuckDBResult) {
|
|
235
239
|
return ary;
|
236
240
|
}
|
237
241
|
|
242
|
+
/*
|
243
|
+
* call-seq:
|
244
|
+
* result.streaming? -> Boolean
|
245
|
+
*
|
246
|
+
* Returns true if the result is streaming, otherwise false.
|
247
|
+
*
|
248
|
+
*/
|
249
|
+
static VALUE duckdb_result_streaming_p(VALUE oDuckDBResult) {
|
250
|
+
rubyDuckDBResult *ctx;
|
251
|
+
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
252
|
+
return duckdb_result_is_streaming(ctx->result) ? Qtrue : Qfalse;
|
253
|
+
}
|
254
|
+
|
255
|
+
static VALUE duckdb_result_chunk_each(VALUE oDuckDBResult) {
|
256
|
+
rubyDuckDBResult *ctx;
|
257
|
+
idx_t col_count;
|
258
|
+
idx_t chunk_count;
|
259
|
+
idx_t chunk_idx;
|
260
|
+
duckdb_data_chunk chunk;
|
261
|
+
|
262
|
+
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
263
|
+
|
264
|
+
col_count = duckdb_column_count(&(ctx->result));
|
265
|
+
chunk_count = duckdb_result_chunk_count(ctx->result);
|
266
|
+
|
267
|
+
RETURN_ENUMERATOR(oDuckDBResult, 0, 0);
|
268
|
+
|
269
|
+
for (chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) {
|
270
|
+
chunk = duckdb_result_get_chunk(ctx->result, chunk_idx);
|
271
|
+
yield_rows(chunk, col_count);
|
272
|
+
duckdb_destroy_data_chunk(&chunk);
|
273
|
+
}
|
274
|
+
return Qnil;
|
275
|
+
}
|
276
|
+
|
277
|
+
static VALUE duckdb_result__chunk_stream(VALUE oDuckDBResult) {
|
278
|
+
rubyDuckDBResult *ctx;
|
279
|
+
duckdb_data_chunk chunk;
|
280
|
+
idx_t col_count;
|
281
|
+
|
282
|
+
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
283
|
+
|
284
|
+
RETURN_ENUMERATOR(oDuckDBResult, 0, 0);
|
285
|
+
|
286
|
+
col_count = duckdb_column_count(&(ctx->result));
|
287
|
+
|
288
|
+
while((chunk = duckdb_stream_fetch_chunk(ctx->result)) != NULL) {
|
289
|
+
yield_rows(chunk, col_count);
|
290
|
+
duckdb_destroy_data_chunk(&chunk);
|
291
|
+
}
|
292
|
+
return Qnil;
|
293
|
+
}
|
294
|
+
|
295
|
+
static void yield_rows(duckdb_data_chunk chunk, idx_t col_count) {
|
296
|
+
idx_t row_count;
|
297
|
+
idx_t row_idx;
|
298
|
+
idx_t col_idx;
|
299
|
+
duckdb_vector vector;
|
300
|
+
VALUE row;
|
301
|
+
VALUE val;
|
302
|
+
|
303
|
+
row_count = duckdb_data_chunk_get_size(chunk);
|
304
|
+
for (row_idx = 0; row_idx < row_count; row_idx++) {
|
305
|
+
row = rb_ary_new2(col_count);
|
306
|
+
for (col_idx = 0; col_idx < col_count; col_idx++) {
|
307
|
+
vector = duckdb_data_chunk_get_vector(chunk, col_idx);
|
308
|
+
val = vector_value(vector, row_idx);
|
309
|
+
rb_ary_store(row, col_idx, val);
|
310
|
+
}
|
311
|
+
rb_yield(row);
|
312
|
+
}
|
313
|
+
}
|
314
|
+
|
238
315
|
static VALUE duckdb_result__column_type(VALUE oDuckDBResult, VALUE col_idx) {
|
239
316
|
rubyDuckDBResult *ctx;
|
240
317
|
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
@@ -405,32 +482,32 @@ static VALUE vector_date(void *vector_data, idx_t row_idx) {
|
|
405
482
|
duckdb_date_struct date = duckdb_from_date(((duckdb_date *) vector_data)[row_idx]);
|
406
483
|
|
407
484
|
return rb_funcall(mDuckDBConverter, rb_intern("_to_date"), 3,
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
485
|
+
INT2FIX(date.year),
|
486
|
+
INT2FIX(date.month),
|
487
|
+
INT2FIX(date.day)
|
488
|
+
);
|
412
489
|
}
|
413
490
|
|
414
491
|
static VALUE vector_timestamp(void* vector_data, idx_t row_idx) {
|
415
492
|
duckdb_timestamp_struct data = duckdb_from_timestamp(((duckdb_timestamp *)vector_data)[row_idx]);
|
416
493
|
return rb_funcall(mDuckDBConverter, rb_intern("_to_time"), 7,
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
494
|
+
INT2FIX(data.date.year),
|
495
|
+
INT2FIX(data.date.month),
|
496
|
+
INT2FIX(data.date.day),
|
497
|
+
INT2FIX(data.time.hour),
|
498
|
+
INT2FIX(data.time.min),
|
499
|
+
INT2FIX(data.time.sec),
|
500
|
+
INT2NUM(data.time.micros)
|
501
|
+
);
|
425
502
|
}
|
426
503
|
|
427
504
|
static VALUE vector_interval(void* vector_data, idx_t row_idx) {
|
428
505
|
duckdb_interval data = ((duckdb_interval *)vector_data)[row_idx];
|
429
506
|
return rb_funcall(mDuckDBConverter, rb_intern("_to_interval_from_vector"), 3,
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
507
|
+
INT2NUM(data.months),
|
508
|
+
INT2NUM(data.days),
|
509
|
+
LL2NUM(data.micros)
|
510
|
+
);
|
434
511
|
}
|
435
512
|
|
436
513
|
static VALUE vector_blob(void* vector_data, idx_t row_idx) {
|
@@ -454,9 +531,9 @@ static VALUE vector_varchar(void* vector_data, idx_t row_idx) {
|
|
454
531
|
static VALUE vector_hugeint(void* vector_data, idx_t row_idx) {
|
455
532
|
duckdb_hugeint hugeint = ((duckdb_hugeint *)vector_data)[row_idx];
|
456
533
|
return rb_funcall(mDuckDBConverter, rb_intern("_to_hugeint_from_vector"), 2,
|
457
|
-
|
458
|
-
|
459
|
-
|
534
|
+
ULL2NUM(hugeint.lower),
|
535
|
+
LL2NUM(hugeint.upper)
|
536
|
+
);
|
460
537
|
}
|
461
538
|
|
462
539
|
static VALUE vector_decimal(duckdb_logical_type ty, void* vector_data, idx_t row_idx) {
|
@@ -477,11 +554,11 @@ static VALUE vector_decimal(duckdb_logical_type ty, void* vector_data, idx_t row
|
|
477
554
|
}
|
478
555
|
|
479
556
|
return rb_funcall(mDuckDBConverter, rb_intern("_to_decimal_from_vector"), 4,
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
557
|
+
INT2FIX(width),
|
558
|
+
INT2FIX(scale),
|
559
|
+
ULL2NUM(value.lower),
|
560
|
+
LL2NUM(value.upper)
|
561
|
+
);
|
485
562
|
}
|
486
563
|
|
487
564
|
static VALUE vector_enum(duckdb_logical_type ty, void* vector_data, idx_t row_idx) {
|
@@ -577,9 +654,9 @@ static VALUE vector_struct(duckdb_logical_type ty, duckdb_vector vector, idx_t r
|
|
577
654
|
static VALUE vector_uuid(void* vector_data, idx_t row_idx) {
|
578
655
|
duckdb_hugeint hugeint = ((duckdb_hugeint *)vector_data)[row_idx];
|
579
656
|
return rb_funcall(mDuckDBConverter, rb_intern("_to_uuid_from_vector"), 2,
|
580
|
-
|
581
|
-
|
582
|
-
|
657
|
+
ULL2NUM(hugeint.lower),
|
658
|
+
LL2NUM(hugeint.upper)
|
659
|
+
);
|
583
660
|
}
|
584
661
|
|
585
662
|
static VALUE vector_value(duckdb_vector vector, idx_t row_idx) {
|
@@ -617,7 +694,7 @@ static VALUE vector_value(duckdb_vector vector, idx_t row_idx) {
|
|
617
694
|
case DUCKDB_TYPE_BIGINT:
|
618
695
|
obj = LL2NUM(((int64_t *) vector_data)[row_idx]);
|
619
696
|
break;
|
620
|
-
|
697
|
+
case DUCKDB_TYPE_UTINYINT:
|
621
698
|
obj = INT2FIX(((uint8_t *) vector_data)[row_idx]);
|
622
699
|
break;
|
623
700
|
case DUCKDB_TYPE_USMALLINT:
|
@@ -680,43 +757,6 @@ static VALUE vector_value(duckdb_vector vector, idx_t row_idx) {
|
|
680
757
|
return obj;
|
681
758
|
}
|
682
759
|
|
683
|
-
static VALUE duckdb_result_chunk_each(VALUE oDuckDBResult) {
|
684
|
-
rubyDuckDBResult *ctx;
|
685
|
-
VALUE row;
|
686
|
-
idx_t col_count;
|
687
|
-
idx_t row_count;
|
688
|
-
idx_t chunk_count;
|
689
|
-
idx_t col_idx;
|
690
|
-
idx_t row_idx;
|
691
|
-
idx_t chunk_idx;
|
692
|
-
duckdb_data_chunk chunk;
|
693
|
-
duckdb_vector vector;
|
694
|
-
VALUE val;
|
695
|
-
|
696
|
-
TypedData_Get_Struct(oDuckDBResult, rubyDuckDBResult, &result_data_type, ctx);
|
697
|
-
|
698
|
-
col_count = duckdb_column_count(&(ctx->result));
|
699
|
-
chunk_count = duckdb_result_chunk_count(ctx->result);
|
700
|
-
|
701
|
-
RETURN_ENUMERATOR(oDuckDBResult, 0, 0);
|
702
|
-
|
703
|
-
for (chunk_idx = 0; chunk_idx < chunk_count; chunk_idx++) {
|
704
|
-
chunk = duckdb_result_get_chunk(ctx->result, chunk_idx);
|
705
|
-
row_count = duckdb_data_chunk_get_size(chunk);
|
706
|
-
for (row_idx = 0; row_idx < row_count; row_idx++) {
|
707
|
-
row = rb_ary_new2(col_count);
|
708
|
-
for (col_idx = 0; col_idx < col_count; col_idx++) {
|
709
|
-
vector = duckdb_data_chunk_get_vector(chunk, col_idx);
|
710
|
-
val = vector_value(vector, row_idx);
|
711
|
-
rb_ary_store(row, col_idx, val);
|
712
|
-
}
|
713
|
-
rb_yield(row);
|
714
|
-
}
|
715
|
-
duckdb_destroy_data_chunk(&chunk);
|
716
|
-
}
|
717
|
-
return Qnil;
|
718
|
-
}
|
719
|
-
|
720
760
|
void rbduckdb_init_duckdb_result(void) {
|
721
761
|
cDuckDBResult = rb_define_class_under(mDuckDB, "Result", rb_cObject);
|
722
762
|
rb_define_alloc_func(cDuckDBResult, allocate);
|
@@ -725,6 +765,9 @@ void rbduckdb_init_duckdb_result(void) {
|
|
725
765
|
rb_define_method(cDuckDBResult, "row_count", duckdb_result_row_count, 0);
|
726
766
|
rb_define_method(cDuckDBResult, "rows_changed", duckdb_result_rows_changed, 0);
|
727
767
|
rb_define_method(cDuckDBResult, "columns", duckdb_result_columns, 0);
|
768
|
+
rb_define_method(cDuckDBResult, "streaming?", duckdb_result_streaming_p, 0);
|
769
|
+
rb_define_method(cDuckDBResult, "chunk_each", duckdb_result_chunk_each, 0);
|
770
|
+
rb_define_private_method(cDuckDBResult, "_chunk_stream", duckdb_result__chunk_stream, 0);
|
728
771
|
rb_define_private_method(cDuckDBResult, "_column_type", duckdb_result__column_type, 1);
|
729
772
|
rb_define_private_method(cDuckDBResult, "_null?", duckdb_result__is_null, 2);
|
730
773
|
rb_define_private_method(cDuckDBResult, "_to_boolean", duckdb_result__to_boolean, 2);
|
@@ -742,5 +785,4 @@ void rbduckdb_init_duckdb_result(void) {
|
|
742
785
|
rb_define_private_method(cDuckDBResult, "_enum_internal_type", duckdb_result__enum_internal_type, 1);
|
743
786
|
rb_define_private_method(cDuckDBResult, "_enum_dictionary_size", duckdb_result__enum_dictionary_size, 1);
|
744
787
|
rb_define_private_method(cDuckDBResult, "_enum_dictionary_value", duckdb_result__enum_dictionary_value, 2);
|
745
|
-
rb_define_method(cDuckDBResult, "chunk_each", duckdb_result_chunk_each, 0);
|
746
788
|
}
|
data/ext/duckdb/ruby-duckdb.h
CHANGED
data/lib/duckdb/connection.rb
CHANGED
@@ -42,14 +42,12 @@ module DuckDB
|
|
42
42
|
# require 'duckdb'
|
43
43
|
# db = DuckDB::Database.open('duckdb_file')
|
44
44
|
# con = db.connect
|
45
|
-
# pending_result = con.async_query('SELECT * FROM users')
|
46
|
-
# sql = 'SELECT * FROM users WHERE name = ? AND email = ?'
|
47
|
-
# pending_result = con.async_query(sql, 'Dave', 'dave@example.com')
|
48
|
-
#
|
49
|
-
# # or You can use named parameter.
|
50
45
|
#
|
51
46
|
# sql = 'SELECT * FROM users WHERE name = $name AND email = $email'
|
52
47
|
# pending_result = con.async_query(sql, name: 'Dave', email: 'dave@example.com')
|
48
|
+
# pending_result.execute_task while pending_result.state == :not_ready
|
49
|
+
# result = pending_result.execute_pending
|
50
|
+
# result.each.first
|
53
51
|
#
|
54
52
|
def async_query(sql, *args, **kwargs)
|
55
53
|
stmt = PreparedStatement.new(self, sql)
|
@@ -57,6 +55,30 @@ module DuckDB
|
|
57
55
|
stmt.pending_prepared
|
58
56
|
end
|
59
57
|
|
58
|
+
#
|
59
|
+
# executes sql with args asynchronously and provides streaming result.
|
60
|
+
# The first argument sql must be SQL string.
|
61
|
+
# The rest arguments are parameters of SQL string.
|
62
|
+
# This method returns DuckDB::PendingResult object.
|
63
|
+
#
|
64
|
+
# require 'duckdb'
|
65
|
+
# DuckDB::Result.use_chunk_each = true # must be true
|
66
|
+
# db = DuckDB::Database.open('duckdb_file')
|
67
|
+
# con = db.connect
|
68
|
+
#
|
69
|
+
# sql = 'SELECT * FROM users WHERE name = $name AND email = $email'
|
70
|
+
# pending_result = con.async_query_stream(sql, name: 'Dave', email: 'dave@example.com')
|
71
|
+
#
|
72
|
+
# pending_result.execute_task while pending_result.state == :not_ready
|
73
|
+
# result = pending_result.execute_pending
|
74
|
+
# result.each.first
|
75
|
+
#
|
76
|
+
def async_query_stream(sql, *args, **kwargs)
|
77
|
+
stmt = PreparedStatement.new(self, sql)
|
78
|
+
stmt.bind_args(*args, **kwargs)
|
79
|
+
stmt.pending_prepared_stream
|
80
|
+
end
|
81
|
+
|
60
82
|
#
|
61
83
|
# connects DuckDB database
|
62
84
|
# The first argument is DuckDB::Database object
|
@@ -24,6 +24,12 @@ module DuckDB
|
|
24
24
|
PendingResult.new(self)
|
25
25
|
end
|
26
26
|
|
27
|
+
def pending_prepared_stream
|
28
|
+
raise DuckDB::Error, 'DuckDB::Result.use_chunk_each must be true.' unless DuckDB::Result.use_chunk_each?
|
29
|
+
|
30
|
+
PendingResult.new(self, true)
|
31
|
+
end
|
32
|
+
|
27
33
|
# binds all parameters with SQL prepared statement.
|
28
34
|
#
|
29
35
|
# require 'duckdb'
|
data/lib/duckdb/result.rb
CHANGED
@@ -58,9 +58,15 @@ module DuckDB
|
|
58
58
|
|
59
59
|
def each
|
60
60
|
if self.class.use_chunk_each?
|
61
|
-
|
61
|
+
if streaming?
|
62
|
+
return _chunk_stream unless block_given?
|
62
63
|
|
63
|
-
|
64
|
+
_chunk_stream { |row| yield row }
|
65
|
+
else
|
66
|
+
return chunk_each unless block_given?
|
67
|
+
|
68
|
+
chunk_each { |row| yield row }
|
69
|
+
end
|
64
70
|
else
|
65
71
|
warn('this `each` behavior will be deprecated in the future. set `DuckDB::Result.use_chunk_each = true` to use new `each` behavior.')
|
66
72
|
return to_enum { row_size } unless block_given?
|
data/lib/duckdb/version.rb
CHANGED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'duckdb'
|
2
|
+
|
3
|
+
DuckDB::Result.use_chunk_each = true
|
4
|
+
DuckDB::Database.open do |db|
|
5
|
+
db.connect do |con|
|
6
|
+
con.query('SET threads=1')
|
7
|
+
con.query('CREATE TABLE tbl as SELECT range a, mod(range, 10) b FROM range(10000)')
|
8
|
+
con.query('CREATE TABLE tbl2 as SELECT range a, mod(range, 10) b FROM range(10000)')
|
9
|
+
# con.query('SET ENABLE_PROGRESS_BAR=true')
|
10
|
+
# con.query('SET ENABLE_PROGRESS_BAR_PRINT=false')
|
11
|
+
pending_result = con.async_query('SELECT * FROM tbl where b = (SELECT min(b) FROM tbl2)')
|
12
|
+
|
13
|
+
# con.interrupt
|
14
|
+
while pending_result.state == :not_ready
|
15
|
+
pending_result.execute_task
|
16
|
+
print '.'
|
17
|
+
$stdout.flush
|
18
|
+
sleep 0.01
|
19
|
+
end
|
20
|
+
result = pending_result.execute_pending
|
21
|
+
puts
|
22
|
+
p result.each.first
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'duckdb'
|
2
|
+
|
3
|
+
DuckDB::Result.use_chunk_each = true
|
4
|
+
DuckDB::Database.open do |db|
|
5
|
+
db.connect do |con|
|
6
|
+
con.query('SET threads=1')
|
7
|
+
con.query('CREATE TABLE tbl as SELECT range a, mod(range, 10) b FROM range(10000)')
|
8
|
+
con.query('CREATE TABLE tbl2 as SELECT range a, mod(range, 10) b FROM range(10000)')
|
9
|
+
# con.query('SET ENABLE_PROGRESS_BAR=true')
|
10
|
+
# con.query('SET ENABLE_PROGRESS_BAR_PRINT=false')
|
11
|
+
pending_result = con.async_query_stream('SELECT * FROM tbl where b = (SELECT min(b) FROM tbl2)')
|
12
|
+
|
13
|
+
# con.interrupt
|
14
|
+
while pending_result.state == :not_ready
|
15
|
+
pending_result.execute_task
|
16
|
+
print '.'
|
17
|
+
$stdout.flush
|
18
|
+
sleep 0.01
|
19
|
+
end
|
20
|
+
result = pending_result.execute_pending
|
21
|
+
puts
|
22
|
+
p result.each.first
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: duckdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Masaki Suketa
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -75,6 +75,7 @@ extensions:
|
|
75
75
|
- ext/duckdb/extconf.rb
|
76
76
|
extra_rdoc_files: []
|
77
77
|
files:
|
78
|
+
- ".gitattributes"
|
78
79
|
- ".github/FUNDING.yml"
|
79
80
|
- ".github/workflows/test_on_macos.yml"
|
80
81
|
- ".github/workflows/test_on_ubuntu.yml"
|
@@ -88,6 +89,7 @@ files:
|
|
88
89
|
- LICENSE
|
89
90
|
- README.md
|
90
91
|
- Rakefile
|
92
|
+
- benchmark/async_query.rb
|
91
93
|
- benchmark/converter_hugeint_ips.rb
|
92
94
|
- benchmark/get_converter_module_ips.rb
|
93
95
|
- benchmark/to_bigdecimal_ips.rb
|
@@ -138,6 +140,8 @@ files:
|
|
138
140
|
- lib/duckdb/prepared_statement.rb
|
139
141
|
- lib/duckdb/result.rb
|
140
142
|
- lib/duckdb/version.rb
|
143
|
+
- sample/async_query.rb
|
144
|
+
- sample/async_query_stream.rb
|
141
145
|
homepage: https://github.com/suketa/ruby-duckdb
|
142
146
|
licenses:
|
143
147
|
- MIT
|