duckdb 0.9.2-dev2.0 → 0.9.2-dev7.0
Sign up to get free protection for your applications and to get access to all the features.
- package/lib/duckdb.d.ts +16 -0
- package/lib/duckdb.js +12 -0
- package/package.json +1 -1
- package/src/database.cpp +30 -1
- package/src/duckdb_node.cpp +29 -8
- package/src/duckdb_node.hpp +2 -0
- package/src/statement.cpp +9 -0
- package/test/test_all_types.test.ts +13 -12
- package/test/tokenize.test.ts +74 -0
package/lib/duckdb.d.ts
CHANGED
@@ -127,6 +127,20 @@ export type ReplacementScanCallback = (
|
|
127
127
|
table: string
|
128
128
|
) => ReplacementScanResult | null;
|
129
129
|
|
130
|
+
export enum TokenType {
|
131
|
+
IDENTIFIER = 0,
|
132
|
+
NUMERIC_CONSTANT = 1,
|
133
|
+
STRING_CONSTANT = 2,
|
134
|
+
OPERATOR = 3,
|
135
|
+
KEYWORD = 4,
|
136
|
+
COMMENT = 5,
|
137
|
+
}
|
138
|
+
|
139
|
+
export interface ScriptTokens {
|
140
|
+
offsets: number[];
|
141
|
+
types: TokenType[];
|
142
|
+
}
|
143
|
+
|
130
144
|
export class Database {
|
131
145
|
constructor(path: string, accessMode?: number | Record<string,string>, callback?: Callback<any>);
|
132
146
|
constructor(path: string, callback?: Callback<any>);
|
@@ -169,6 +183,8 @@ export class Database {
|
|
169
183
|
registerReplacementScan(
|
170
184
|
replacementScan: ReplacementScanCallback
|
171
185
|
): Promise<void>;
|
186
|
+
|
187
|
+
tokenize(text: string): ScriptTokens;
|
172
188
|
}
|
173
189
|
|
174
190
|
export type GenericTypeInfo = {
|
package/lib/duckdb.js
CHANGED
@@ -64,6 +64,10 @@ var Statement = duckdb.Statement;
|
|
64
64
|
* @class
|
65
65
|
*/
|
66
66
|
var QueryResult = duckdb.QueryResult;
|
67
|
+
/**
|
68
|
+
* Types of tokens return by `tokenize`.
|
69
|
+
*/
|
70
|
+
var TokenType = duckdb.TokenType;
|
67
71
|
|
68
72
|
/**
|
69
73
|
* @method
|
@@ -631,6 +635,14 @@ Database.prototype.unregister_udf = function () {
|
|
631
635
|
|
632
636
|
Database.prototype.registerReplacementScan;
|
633
637
|
|
638
|
+
/**
|
639
|
+
* Return positions and types of tokens in given text
|
640
|
+
* @method
|
641
|
+
* @arg text
|
642
|
+
* @return {ScriptTokens}
|
643
|
+
*/
|
644
|
+
Database.prototype.tokenize;
|
645
|
+
|
634
646
|
/**
|
635
647
|
* Not implemented
|
636
648
|
*/
|
package/package.json
CHANGED
package/src/database.cpp
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "duckdb/parser/expression/constant_expression.hpp"
|
2
2
|
#include "duckdb/parser/expression/function_expression.hpp"
|
3
|
+
#include "duckdb/parser/parser.hpp"
|
3
4
|
#include "duckdb/parser/tableref/table_function_ref.hpp"
|
4
5
|
#include "duckdb/storage/buffer_manager.hpp"
|
5
6
|
#include "duckdb_node.hpp"
|
@@ -18,7 +19,8 @@ Napi::FunctionReference Database::Init(Napi::Env env, Napi::Object exports) {
|
|
18
19
|
{InstanceMethod("close_internal", &Database::Close), InstanceMethod("wait", &Database::Wait),
|
19
20
|
InstanceMethod("serialize", &Database::Serialize), InstanceMethod("parallelize", &Database::Parallelize),
|
20
21
|
InstanceMethod("connect", &Database::Connect), InstanceMethod("interrupt", &Database::Interrupt),
|
21
|
-
InstanceMethod("registerReplacementScan", &Database::RegisterReplacementScan)
|
22
|
+
InstanceMethod("registerReplacementScan", &Database::RegisterReplacementScan),
|
23
|
+
InstanceMethod("tokenize", &Database::Tokenize)});
|
22
24
|
|
23
25
|
exports.Set("Database", t);
|
24
26
|
|
@@ -364,4 +366,31 @@ Napi::Value Database::RegisterReplacementScan(const Napi::CallbackInfo &info) {
|
|
364
366
|
return deferred.Promise();
|
365
367
|
}
|
366
368
|
|
369
|
+
Napi::Value Database::Tokenize(const Napi::CallbackInfo &info) {
|
370
|
+
auto env = info.Env();
|
371
|
+
|
372
|
+
if (info.Length() < 1) {
|
373
|
+
throw Napi::TypeError::New(env, "Text argument expected");
|
374
|
+
}
|
375
|
+
|
376
|
+
std::string text = info[0].As<Napi::String>();
|
377
|
+
|
378
|
+
auto tokens = duckdb::Parser::Tokenize(text);
|
379
|
+
auto numTokens = tokens.size();
|
380
|
+
|
381
|
+
auto offsets = Napi::Array::New(env, numTokens);
|
382
|
+
auto types = Napi::Array::New(env, numTokens);
|
383
|
+
|
384
|
+
for (size_t i = 0; i < numTokens; i++) {
|
385
|
+
auto token = tokens[i];
|
386
|
+
offsets.Set(i, token.start);
|
387
|
+
types.Set(i, (uint8_t)token.type);
|
388
|
+
}
|
389
|
+
|
390
|
+
auto result = Napi::Object::New(env);
|
391
|
+
result.Set("offsets", offsets);
|
392
|
+
result.Set("types", types);
|
393
|
+
return result;
|
394
|
+
}
|
395
|
+
|
367
396
|
} // namespace node_duckdb
|
package/src/duckdb_node.cpp
CHANGED
@@ -12,15 +12,36 @@ NodeDuckDB::NodeDuckDB(Napi::Env env, Napi::Object exports) {
|
|
12
12
|
statement_constructor = node_duckdb::Statement::Init(env, exports);
|
13
13
|
query_result_constructor = node_duckdb::QueryResult::Init(env, exports);
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
auto token_type_enum = Napi::Object::New(env);
|
16
|
+
|
17
|
+
token_type_enum.Set("IDENTIFIER", 0);
|
18
|
+
token_type_enum.Set("NUMERIC_CONSTANT", 1);
|
19
|
+
token_type_enum.Set("STRING_CONSTANT", 2);
|
20
|
+
token_type_enum.Set("OPERATOR", 3);
|
21
|
+
token_type_enum.Set("KEYWORD", 4);
|
22
|
+
token_type_enum.Set("COMMENT", 5);
|
23
|
+
|
24
|
+
// TypeScript enums expose an inverse mapping.
|
25
|
+
token_type_enum.Set((uint32_t)0, "IDENTIFIER");
|
26
|
+
token_type_enum.Set((uint32_t)1, "NUMERIC_CONSTANT");
|
27
|
+
token_type_enum.Set((uint32_t)2, "STRING_CONSTANT");
|
28
|
+
token_type_enum.Set((uint32_t)3, "OPERATOR");
|
29
|
+
token_type_enum.Set((uint32_t)4, "KEYWORD");
|
30
|
+
token_type_enum.Set((uint32_t)5, "COMMENT");
|
31
|
+
|
32
|
+
token_type_enum_ref = Napi::ObjectReference::New(token_type_enum);
|
33
|
+
|
34
|
+
exports.DefineProperties(
|
35
|
+
{DEFINE_CONSTANT_INTEGER(exports, node_duckdb::Database::DUCKDB_NODEJS_ERROR, ERROR) DEFINE_CONSTANT_INTEGER(
|
17
36
|
exports, node_duckdb::Database::DUCKDB_NODEJS_READONLY, OPEN_READONLY) // same as SQLite
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
37
|
+
DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_READWRITE) // ignored
|
38
|
+
DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_CREATE) // ignored
|
39
|
+
DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_FULLMUTEX) // ignored
|
40
|
+
DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_SHAREDCACHE) // ignored
|
41
|
+
DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_PRIVATECACHE) // ignored
|
42
|
+
|
43
|
+
Napi::PropertyDescriptor::Value("TokenType", token_type_enum,
|
44
|
+
static_cast<napi_property_attributes>(napi_enumerable | napi_configurable))});
|
24
45
|
}
|
25
46
|
|
26
47
|
NODE_API_ADDON(NodeDuckDB);
|
package/src/duckdb_node.hpp
CHANGED
@@ -23,6 +23,7 @@ public:
|
|
23
23
|
Napi::FunctionReference connection_constructor;
|
24
24
|
Napi::FunctionReference statement_constructor;
|
25
25
|
Napi::FunctionReference query_result_constructor;
|
26
|
+
Napi::ObjectReference token_type_enum_ref;
|
26
27
|
};
|
27
28
|
|
28
29
|
namespace node_duckdb {
|
@@ -109,6 +110,7 @@ public:
|
|
109
110
|
Napi::Value Interrupt(const Napi::CallbackInfo &info);
|
110
111
|
Napi::Value Close(const Napi::CallbackInfo &info);
|
111
112
|
Napi::Value RegisterReplacementScan(const Napi::CallbackInfo &info);
|
113
|
+
Napi::Value Tokenize(const Napi::CallbackInfo &info);
|
112
114
|
|
113
115
|
public:
|
114
116
|
constexpr static int DUCKDB_NODEJS_ERROR = -1;
|
package/src/statement.cpp
CHANGED
@@ -187,6 +187,15 @@ static Napi::Value convert_col_val(Napi::Env &env, duckdb::Value dval, duckdb::L
|
|
187
187
|
const auto scale = duckdb::Interval::SECS_PER_DAY * duckdb::Interval::MSECS_PER_SEC;
|
188
188
|
value = Napi::Date::New(env, double(dval.GetValue<int32_t>() * scale));
|
189
189
|
} break;
|
190
|
+
case duckdb::LogicalTypeId::TIMESTAMP_NS: {
|
191
|
+
value = Napi::Date::New(env, double(dval.GetValue<int64_t>() / (duckdb::Interval::MICROS_PER_MSEC * 1000)));
|
192
|
+
} break;
|
193
|
+
case duckdb::LogicalTypeId::TIMESTAMP_MS: {
|
194
|
+
value = Napi::Date::New(env, double(dval.GetValue<int64_t>()));
|
195
|
+
} break;
|
196
|
+
case duckdb::LogicalTypeId::TIMESTAMP_SEC: {
|
197
|
+
value = Napi::Date::New(env, double(dval.GetValue<int64_t>() * duckdb::Interval::MSECS_PER_SEC));
|
198
|
+
} break;
|
190
199
|
case duckdb::LogicalTypeId::TIMESTAMP:
|
191
200
|
case duckdb::LogicalTypeId::TIMESTAMP_TZ: {
|
192
201
|
value = Napi::Date::New(env, double(dval.GetValue<int64_t>() / duckdb::Interval::MICROS_PER_MSEC));
|
@@ -22,10 +22,12 @@ function timedelta(obj: { days: number; micros: number; months: number }) {
|
|
22
22
|
const replacement_values: Record<string, string> = {
|
23
23
|
timestamp:
|
24
24
|
"'1990-01-01 00:00:00'::TIMESTAMP, '9999-12-31 23:59:59'::TIMESTAMP, NULL::TIMESTAMP",
|
25
|
-
|
26
|
-
|
27
|
-
//
|
28
|
-
|
25
|
+
timestamp_s:
|
26
|
+
"'1990-01-01 00:00:00'::TIMESTAMP_S, '9999-12-31 23:59:59'::TIMESTAMP_S, NULL::TIMESTAMP_S",
|
27
|
+
// note: timestamp_ns does not support extreme values
|
28
|
+
timestamp_ns: "'1990-01-01 00:00:00'::TIMESTAMP_NS, NULL::TIMESTAMP_NS",
|
29
|
+
timestamp_ms:
|
30
|
+
"'1990-01-01 00:00:00'::TIMESTAMP_MS, '9999-12-31 23:59:59'::TIMESTAMP_MS, NULL::TIMESTAMP_MS",
|
29
31
|
timestamp_tz:
|
30
32
|
"'1990-01-01 00:00:00Z'::TIMESTAMPTZ, '9999-12-31 23:59:59.999999Z'::TIMESTAMPTZ, NULL::TIMESTAMPTZ",
|
31
33
|
date: "'1990-01-01'::DATE, '9999-12-31'::DATE, NULL::DATE",
|
@@ -157,7 +159,7 @@ const correct_answer_map: Record<string, any[]> = {
|
|
157
159
|
null,
|
158
160
|
],
|
159
161
|
map: ["{}", "{key1=🦆🦆🦆🦆🦆🦆, key2=goose}", null],
|
160
|
-
union: [
|
162
|
+
union: ["Frank", "5", null],
|
161
163
|
|
162
164
|
time_tz: ["00:00:00-1559", "23:59:59.999999+1559", null],
|
163
165
|
interval: [
|
@@ -176,16 +178,15 @@ const correct_answer_map: Record<string, any[]> = {
|
|
176
178
|
null,
|
177
179
|
],
|
178
180
|
date: [new Date("1990-01-01"), new Date("9999-12-31"), null],
|
179
|
-
timestamp_s: [
|
180
|
-
|
181
|
-
|
182
|
-
"1677-09-21 00:12:43.145225",
|
183
|
-
"2262-04-11 23:47:16.854775",
|
181
|
+
timestamp_s: [
|
182
|
+
new Date(Date.UTC(1990, 0, 1)),
|
183
|
+
new Date("9999-12-31T23:59:59.000Z"),
|
184
184
|
null,
|
185
185
|
],
|
186
|
+
timestamp_ns: [new Date(Date.UTC(1990, 0, 1)), null],
|
186
187
|
timestamp_ms: [
|
187
|
-
|
188
|
-
"
|
188
|
+
new Date(Date.UTC(1990, 0, 1)),
|
189
|
+
new Date("9999-12-31T23:59:59.000Z"),
|
189
190
|
null,
|
190
191
|
],
|
191
192
|
timestamp_tz: [
|
@@ -0,0 +1,74 @@
|
|
1
|
+
import * as assert from 'assert';
|
2
|
+
import * as duckdb from '..';
|
3
|
+
|
4
|
+
describe('tokenize', function () {
|
5
|
+
it('should return correct tokens for a single statement', function () {
|
6
|
+
const db = new duckdb.Database(':memory:');
|
7
|
+
const output = db.tokenize('select 1;');
|
8
|
+
assert.deepStrictEqual(output, {
|
9
|
+
offsets: [0, 7, 8],
|
10
|
+
types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR]
|
11
|
+
});
|
12
|
+
});
|
13
|
+
it('should return correct tokens for a multiple statements', function () {
|
14
|
+
const db = new duckdb.Database(':memory:');
|
15
|
+
const output = db.tokenize('select 1; select 2;');
|
16
|
+
assert.deepStrictEqual(output, {
|
17
|
+
offsets: [0, 7, 8, 10, 17, 18],
|
18
|
+
types: [
|
19
|
+
duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR,
|
20
|
+
duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR
|
21
|
+
]
|
22
|
+
});
|
23
|
+
});
|
24
|
+
it('should return no tokens for an empty string', function () {
|
25
|
+
const db = new duckdb.Database(':memory:');
|
26
|
+
const output = db.tokenize('');
|
27
|
+
assert.deepStrictEqual(output, {
|
28
|
+
offsets: [],
|
29
|
+
types: []
|
30
|
+
});
|
31
|
+
});
|
32
|
+
it('should handle quoted semicolons in string constants', function () {
|
33
|
+
const db = new duckdb.Database(':memory:');
|
34
|
+
const output = db.tokenize(`select ';';`);
|
35
|
+
assert.deepStrictEqual(output, {
|
36
|
+
offsets: [0, 7, 10],
|
37
|
+
types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.STRING_CONSTANT, duckdb.TokenType.OPERATOR]
|
38
|
+
});
|
39
|
+
});
|
40
|
+
it('should handle quoted semicolons in identifiers', function () {
|
41
|
+
const db = new duckdb.Database(':memory:');
|
42
|
+
const output = db.tokenize(`from ";";`);
|
43
|
+
assert.deepStrictEqual(output, {
|
44
|
+
offsets: [0, 5, 8],
|
45
|
+
types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.IDENTIFIER, duckdb.TokenType.OPERATOR]
|
46
|
+
});
|
47
|
+
});
|
48
|
+
it('should handle comments', function () {
|
49
|
+
const db = new duckdb.Database(':memory:');
|
50
|
+
const output = db.tokenize(`select /* comment */ 1`);
|
51
|
+
// Note that the tokenizer doesn't return tokens for comments.
|
52
|
+
assert.deepStrictEqual(output, {
|
53
|
+
offsets: [0, 21],
|
54
|
+
types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT]
|
55
|
+
});
|
56
|
+
});
|
57
|
+
it('should handle invalid syntax', function () {
|
58
|
+
const db = new duckdb.Database(':memory:');
|
59
|
+
const output = db.tokenize(`selec 1`);
|
60
|
+
// The misspelled keyword is scanned as an identifier.
|
61
|
+
assert.deepStrictEqual(output, {
|
62
|
+
offsets: [0, 6],
|
63
|
+
types: [duckdb.TokenType.IDENTIFIER, duckdb.TokenType.NUMERIC_CONSTANT]
|
64
|
+
});
|
65
|
+
});
|
66
|
+
it('should support inverse TokenType mapping', function () {
|
67
|
+
assert.equal(duckdb.TokenType[duckdb.TokenType.IDENTIFIER], "IDENTIFIER");
|
68
|
+
assert.equal(duckdb.TokenType[duckdb.TokenType.NUMERIC_CONSTANT], "NUMERIC_CONSTANT");
|
69
|
+
assert.equal(duckdb.TokenType[duckdb.TokenType.STRING_CONSTANT], "STRING_CONSTANT");
|
70
|
+
assert.equal(duckdb.TokenType[duckdb.TokenType.OPERATOR], "OPERATOR");
|
71
|
+
assert.equal(duckdb.TokenType[duckdb.TokenType.KEYWORD], "KEYWORD");
|
72
|
+
assert.equal(duckdb.TokenType[duckdb.TokenType.COMMENT], "COMMENT");
|
73
|
+
});
|
74
|
+
});
|