duckdb 0.9.2-dev2.0 → 0.9.2-dev7.0

Sign up to get free protection for your applications and to get access to all the features.
package/lib/duckdb.d.ts CHANGED
@@ -127,6 +127,20 @@ export type ReplacementScanCallback = (
127
127
  table: string
128
128
  ) => ReplacementScanResult | null;
129
129
 
130
+ export enum TokenType {
131
+ IDENTIFIER = 0,
132
+ NUMERIC_CONSTANT = 1,
133
+ STRING_CONSTANT = 2,
134
+ OPERATOR = 3,
135
+ KEYWORD = 4,
136
+ COMMENT = 5,
137
+ }
138
+
139
+ export interface ScriptTokens {
140
+ offsets: number[];
141
+ types: TokenType[];
142
+ }
143
+
130
144
  export class Database {
131
145
  constructor(path: string, accessMode?: number | Record<string,string>, callback?: Callback<any>);
132
146
  constructor(path: string, callback?: Callback<any>);
@@ -169,6 +183,8 @@ export class Database {
169
183
  registerReplacementScan(
170
184
  replacementScan: ReplacementScanCallback
171
185
  ): Promise<void>;
186
+
187
+ tokenize(text: string): ScriptTokens;
172
188
  }
173
189
 
174
190
  export type GenericTypeInfo = {
package/lib/duckdb.js CHANGED
@@ -64,6 +64,10 @@ var Statement = duckdb.Statement;
64
64
  * @class
65
65
  */
66
66
  var QueryResult = duckdb.QueryResult;
67
+ /**
68
+ * Types of tokens return by `tokenize`.
69
+ */
70
+ var TokenType = duckdb.TokenType;
67
71
 
68
72
  /**
69
73
  * @method
@@ -631,6 +635,14 @@ Database.prototype.unregister_udf = function () {
631
635
 
632
636
  Database.prototype.registerReplacementScan;
633
637
 
638
+ /**
639
+ * Return positions and types of tokens in given text
640
+ * @method
641
+ * @arg text
642
+ * @return {ScriptTokens}
643
+ */
644
+ Database.prototype.tokenize;
645
+
634
646
  /**
635
647
  * Not implemented
636
648
  */
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.9.2-dev2.0",
5
+ "version": "0.9.2-dev7.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/database.cpp CHANGED
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/parser/expression/constant_expression.hpp"
2
2
  #include "duckdb/parser/expression/function_expression.hpp"
3
+ #include "duckdb/parser/parser.hpp"
3
4
  #include "duckdb/parser/tableref/table_function_ref.hpp"
4
5
  #include "duckdb/storage/buffer_manager.hpp"
5
6
  #include "duckdb_node.hpp"
@@ -18,7 +19,8 @@ Napi::FunctionReference Database::Init(Napi::Env env, Napi::Object exports) {
18
19
  {InstanceMethod("close_internal", &Database::Close), InstanceMethod("wait", &Database::Wait),
19
20
  InstanceMethod("serialize", &Database::Serialize), InstanceMethod("parallelize", &Database::Parallelize),
20
21
  InstanceMethod("connect", &Database::Connect), InstanceMethod("interrupt", &Database::Interrupt),
21
- InstanceMethod("registerReplacementScan", &Database::RegisterReplacementScan)});
22
+ InstanceMethod("registerReplacementScan", &Database::RegisterReplacementScan),
23
+ InstanceMethod("tokenize", &Database::Tokenize)});
22
24
 
23
25
  exports.Set("Database", t);
24
26
 
@@ -364,4 +366,31 @@ Napi::Value Database::RegisterReplacementScan(const Napi::CallbackInfo &info) {
364
366
  return deferred.Promise();
365
367
  }
366
368
 
369
+ Napi::Value Database::Tokenize(const Napi::CallbackInfo &info) {
370
+ auto env = info.Env();
371
+
372
+ if (info.Length() < 1) {
373
+ throw Napi::TypeError::New(env, "Text argument expected");
374
+ }
375
+
376
+ std::string text = info[0].As<Napi::String>();
377
+
378
+ auto tokens = duckdb::Parser::Tokenize(text);
379
+ auto numTokens = tokens.size();
380
+
381
+ auto offsets = Napi::Array::New(env, numTokens);
382
+ auto types = Napi::Array::New(env, numTokens);
383
+
384
+ for (size_t i = 0; i < numTokens; i++) {
385
+ auto token = tokens[i];
386
+ offsets.Set(i, token.start);
387
+ types.Set(i, (uint8_t)token.type);
388
+ }
389
+
390
+ auto result = Napi::Object::New(env);
391
+ result.Set("offsets", offsets);
392
+ result.Set("types", types);
393
+ return result;
394
+ }
395
+
367
396
  } // namespace node_duckdb
@@ -12,15 +12,36 @@ NodeDuckDB::NodeDuckDB(Napi::Env env, Napi::Object exports) {
12
12
  statement_constructor = node_duckdb::Statement::Init(env, exports);
13
13
  query_result_constructor = node_duckdb::QueryResult::Init(env, exports);
14
14
 
15
- exports.DefineProperties({
16
- DEFINE_CONSTANT_INTEGER(exports, node_duckdb::Database::DUCKDB_NODEJS_ERROR, ERROR) DEFINE_CONSTANT_INTEGER(
15
+ auto token_type_enum = Napi::Object::New(env);
16
+
17
+ token_type_enum.Set("IDENTIFIER", 0);
18
+ token_type_enum.Set("NUMERIC_CONSTANT", 1);
19
+ token_type_enum.Set("STRING_CONSTANT", 2);
20
+ token_type_enum.Set("OPERATOR", 3);
21
+ token_type_enum.Set("KEYWORD", 4);
22
+ token_type_enum.Set("COMMENT", 5);
23
+
24
+ // TypeScript enums expose an inverse mapping.
25
+ token_type_enum.Set((uint32_t)0, "IDENTIFIER");
26
+ token_type_enum.Set((uint32_t)1, "NUMERIC_CONSTANT");
27
+ token_type_enum.Set((uint32_t)2, "STRING_CONSTANT");
28
+ token_type_enum.Set((uint32_t)3, "OPERATOR");
29
+ token_type_enum.Set((uint32_t)4, "KEYWORD");
30
+ token_type_enum.Set((uint32_t)5, "COMMENT");
31
+
32
+ token_type_enum_ref = Napi::ObjectReference::New(token_type_enum);
33
+
34
+ exports.DefineProperties(
35
+ {DEFINE_CONSTANT_INTEGER(exports, node_duckdb::Database::DUCKDB_NODEJS_ERROR, ERROR) DEFINE_CONSTANT_INTEGER(
17
36
  exports, node_duckdb::Database::DUCKDB_NODEJS_READONLY, OPEN_READONLY) // same as SQLite
18
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_READWRITE) // ignored
19
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_CREATE) // ignored
20
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_FULLMUTEX) // ignored
21
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_SHAREDCACHE) // ignored
22
- DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_PRIVATECACHE) // ignored
23
- });
37
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_READWRITE) // ignored
38
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_CREATE) // ignored
39
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_FULLMUTEX) // ignored
40
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_SHAREDCACHE) // ignored
41
+ DEFINE_CONSTANT_INTEGER(exports, 0, OPEN_PRIVATECACHE) // ignored
42
+
43
+ Napi::PropertyDescriptor::Value("TokenType", token_type_enum,
44
+ static_cast<napi_property_attributes>(napi_enumerable | napi_configurable))});
24
45
  }
25
46
 
26
47
  NODE_API_ADDON(NodeDuckDB);
@@ -23,6 +23,7 @@ public:
23
23
  Napi::FunctionReference connection_constructor;
24
24
  Napi::FunctionReference statement_constructor;
25
25
  Napi::FunctionReference query_result_constructor;
26
+ Napi::ObjectReference token_type_enum_ref;
26
27
  };
27
28
 
28
29
  namespace node_duckdb {
@@ -109,6 +110,7 @@ public:
109
110
  Napi::Value Interrupt(const Napi::CallbackInfo &info);
110
111
  Napi::Value Close(const Napi::CallbackInfo &info);
111
112
  Napi::Value RegisterReplacementScan(const Napi::CallbackInfo &info);
113
+ Napi::Value Tokenize(const Napi::CallbackInfo &info);
112
114
 
113
115
  public:
114
116
  constexpr static int DUCKDB_NODEJS_ERROR = -1;
package/src/statement.cpp CHANGED
@@ -187,6 +187,15 @@ static Napi::Value convert_col_val(Napi::Env &env, duckdb::Value dval, duckdb::L
187
187
  const auto scale = duckdb::Interval::SECS_PER_DAY * duckdb::Interval::MSECS_PER_SEC;
188
188
  value = Napi::Date::New(env, double(dval.GetValue<int32_t>() * scale));
189
189
  } break;
190
+ case duckdb::LogicalTypeId::TIMESTAMP_NS: {
191
+ value = Napi::Date::New(env, double(dval.GetValue<int64_t>() / (duckdb::Interval::MICROS_PER_MSEC * 1000)));
192
+ } break;
193
+ case duckdb::LogicalTypeId::TIMESTAMP_MS: {
194
+ value = Napi::Date::New(env, double(dval.GetValue<int64_t>()));
195
+ } break;
196
+ case duckdb::LogicalTypeId::TIMESTAMP_SEC: {
197
+ value = Napi::Date::New(env, double(dval.GetValue<int64_t>() * duckdb::Interval::MSECS_PER_SEC));
198
+ } break;
190
199
  case duckdb::LogicalTypeId::TIMESTAMP:
191
200
  case duckdb::LogicalTypeId::TIMESTAMP_TZ: {
192
201
  value = Napi::Date::New(env, double(dval.GetValue<int64_t>() / duckdb::Interval::MICROS_PER_MSEC));
@@ -22,10 +22,12 @@ function timedelta(obj: { days: number; micros: number; months: number }) {
22
22
  const replacement_values: Record<string, string> = {
23
23
  timestamp:
24
24
  "'1990-01-01 00:00:00'::TIMESTAMP, '9999-12-31 23:59:59'::TIMESTAMP, NULL::TIMESTAMP",
25
- // TODO: fix these, they are currently being returned as strings
26
- // timestamp_s: "'1990-01-01 00:00:00'::TIMESTAMP_S",
27
- // timestamp_ns: "'1990-01-01 00:00:00'::TIMESTAMP_NS",
28
- // timestamp_ms: "'1990-01-01 00:00:00'::TIMESTAMP_MS",
25
+ timestamp_s:
26
+ "'1990-01-01 00:00:00'::TIMESTAMP_S, '9999-12-31 23:59:59'::TIMESTAMP_S, NULL::TIMESTAMP_S",
27
+ // note: timestamp_ns does not support extreme values
28
+ timestamp_ns: "'1990-01-01 00:00:00'::TIMESTAMP_NS, NULL::TIMESTAMP_NS",
29
+ timestamp_ms:
30
+ "'1990-01-01 00:00:00'::TIMESTAMP_MS, '9999-12-31 23:59:59'::TIMESTAMP_MS, NULL::TIMESTAMP_MS",
29
31
  timestamp_tz:
30
32
  "'1990-01-01 00:00:00Z'::TIMESTAMPTZ, '9999-12-31 23:59:59.999999Z'::TIMESTAMPTZ, NULL::TIMESTAMPTZ",
31
33
  date: "'1990-01-01'::DATE, '9999-12-31'::DATE, NULL::DATE",
@@ -157,7 +159,7 @@ const correct_answer_map: Record<string, any[]> = {
157
159
  null,
158
160
  ],
159
161
  map: ["{}", "{key1=🦆🦆🦆🦆🦆🦆, key2=goose}", null],
160
- union: ['Frank', '5', null],
162
+ union: ["Frank", "5", null],
161
163
 
162
164
  time_tz: ["00:00:00-1559", "23:59:59.999999+1559", null],
163
165
  interval: [
@@ -176,16 +178,15 @@ const correct_answer_map: Record<string, any[]> = {
176
178
  null,
177
179
  ],
178
180
  date: [new Date("1990-01-01"), new Date("9999-12-31"), null],
179
- timestamp_s: ["290309-12-22 (BC) 00:00:00", "294247-01-10 04:00:54", null],
180
-
181
- timestamp_ns: [
182
- "1677-09-21 00:12:43.145225",
183
- "2262-04-11 23:47:16.854775",
181
+ timestamp_s: [
182
+ new Date(Date.UTC(1990, 0, 1)),
183
+ new Date("9999-12-31T23:59:59.000Z"),
184
184
  null,
185
185
  ],
186
+ timestamp_ns: [new Date(Date.UTC(1990, 0, 1)), null],
186
187
  timestamp_ms: [
187
- "290309-12-22 (BC) 00:00:00",
188
- "294247-01-10 04:00:54.775",
188
+ new Date(Date.UTC(1990, 0, 1)),
189
+ new Date("9999-12-31T23:59:59.000Z"),
189
190
  null,
190
191
  ],
191
192
  timestamp_tz: [
@@ -0,0 +1,74 @@
1
+ import * as assert from 'assert';
2
+ import * as duckdb from '..';
3
+
4
+ describe('tokenize', function () {
5
+ it('should return correct tokens for a single statement', function () {
6
+ const db = new duckdb.Database(':memory:');
7
+ const output = db.tokenize('select 1;');
8
+ assert.deepStrictEqual(output, {
9
+ offsets: [0, 7, 8],
10
+ types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR]
11
+ });
12
+ });
13
+ it('should return correct tokens for a multiple statements', function () {
14
+ const db = new duckdb.Database(':memory:');
15
+ const output = db.tokenize('select 1; select 2;');
16
+ assert.deepStrictEqual(output, {
17
+ offsets: [0, 7, 8, 10, 17, 18],
18
+ types: [
19
+ duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR,
20
+ duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT, duckdb.TokenType.OPERATOR
21
+ ]
22
+ });
23
+ });
24
+ it('should return no tokens for an empty string', function () {
25
+ const db = new duckdb.Database(':memory:');
26
+ const output = db.tokenize('');
27
+ assert.deepStrictEqual(output, {
28
+ offsets: [],
29
+ types: []
30
+ });
31
+ });
32
+ it('should handle quoted semicolons in string constants', function () {
33
+ const db = new duckdb.Database(':memory:');
34
+ const output = db.tokenize(`select ';';`);
35
+ assert.deepStrictEqual(output, {
36
+ offsets: [0, 7, 10],
37
+ types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.STRING_CONSTANT, duckdb.TokenType.OPERATOR]
38
+ });
39
+ });
40
+ it('should handle quoted semicolons in identifiers', function () {
41
+ const db = new duckdb.Database(':memory:');
42
+ const output = db.tokenize(`from ";";`);
43
+ assert.deepStrictEqual(output, {
44
+ offsets: [0, 5, 8],
45
+ types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.IDENTIFIER, duckdb.TokenType.OPERATOR]
46
+ });
47
+ });
48
+ it('should handle comments', function () {
49
+ const db = new duckdb.Database(':memory:');
50
+ const output = db.tokenize(`select /* comment */ 1`);
51
+ // Note that the tokenizer doesn't return tokens for comments.
52
+ assert.deepStrictEqual(output, {
53
+ offsets: [0, 21],
54
+ types: [duckdb.TokenType.KEYWORD, duckdb.TokenType.NUMERIC_CONSTANT]
55
+ });
56
+ });
57
+ it('should handle invalid syntax', function () {
58
+ const db = new duckdb.Database(':memory:');
59
+ const output = db.tokenize(`selec 1`);
60
+ // The misspelled keyword is scanned as an identifier.
61
+ assert.deepStrictEqual(output, {
62
+ offsets: [0, 6],
63
+ types: [duckdb.TokenType.IDENTIFIER, duckdb.TokenType.NUMERIC_CONSTANT]
64
+ });
65
+ });
66
+ it('should support inverse TokenType mapping', function () {
67
+ assert.equal(duckdb.TokenType[duckdb.TokenType.IDENTIFIER], "IDENTIFIER");
68
+ assert.equal(duckdb.TokenType[duckdb.TokenType.NUMERIC_CONSTANT], "NUMERIC_CONSTANT");
69
+ assert.equal(duckdb.TokenType[duckdb.TokenType.STRING_CONSTANT], "STRING_CONSTANT");
70
+ assert.equal(duckdb.TokenType[duckdb.TokenType.OPERATOR], "OPERATOR");
71
+ assert.equal(duckdb.TokenType[duckdb.TokenType.KEYWORD], "KEYWORD");
72
+ assert.equal(duckdb.TokenType[duckdb.TokenType.COMMENT], "COMMENT");
73
+ });
74
+ });