@malloydata/db-duckdb 0.0.330 → 0.0.332

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import { DuckDBCommon } from './duckdb_common';
2
- import type { Connection, TableData } from 'duckdb';
3
- import { Database } from 'duckdb';
2
+ import { DuckDBInstance } from '@duckdb/node-api';
3
+ import type { DuckDBConnection as DuckDBNodeConnection } from '@duckdb/node-api';
4
4
  import type { ConnectionConfig, QueryDataRow, QueryOptionsReader, RunSQLOptions } from '@malloydata/malloy';
5
5
  export interface DuckDBConnectionOptions extends ConnectionConfig {
6
6
  additionalExtensions?: string[];
@@ -10,8 +10,8 @@ export interface DuckDBConnectionOptions extends ConnectionConfig {
10
10
  readOnly?: boolean;
11
11
  }
12
12
  interface ActiveDB {
13
- database: Database;
14
- connections: Connection[];
13
+ instance: DuckDBInstance;
14
+ connections: DuckDBNodeConnection[];
15
15
  }
16
16
  export declare class DuckDBConnection extends DuckDBCommon {
17
17
  readonly name: string;
@@ -20,7 +20,7 @@ export declare class DuckDBConnection extends DuckDBCommon {
20
20
  private workingDirectory;
21
21
  private readOnly;
22
22
  connecting: Promise<void>;
23
- protected connection: Connection | null;
23
+ protected connection: DuckDBNodeConnection | null;
24
24
  protected setupError: Error | undefined;
25
25
  protected isSetup: Promise<void> | undefined;
26
26
  static activeDBs: Record<string, ActiveDB>;
@@ -30,7 +30,7 @@ export declare class DuckDBConnection extends DuckDBCommon {
30
30
  loadExtension(ext: string): Promise<void>;
31
31
  protected setup(): Promise<void>;
32
32
  protected runDuckDBQuery(sql: string): Promise<{
33
- rows: TableData;
33
+ rows: QueryDataRow[];
34
34
  totalRows: number;
35
35
  }>;
36
36
  runSQLStream(sql: string, { rowLimit, abortSignal }?: RunSQLOptions): AsyncIterableIterator<QueryDataRow>;
@@ -28,7 +28,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
28
28
  exports.DuckDBConnection = void 0;
29
29
  const crypto_1 = __importDefault(require("crypto"));
30
30
  const duckdb_common_1 = require("./duckdb_common");
31
- const duckdb_1 = require("duckdb");
31
+ const node_api_1 = require("@duckdb/node-api");
32
32
  const package_json_1 = __importDefault(require("@malloydata/malloy/package.json"));
33
33
  class DuckDBConnection extends duckdb_common_1.DuckDBCommon {
34
34
  constructor(arg, arg2, workingDirectory, queryOptions) {
@@ -80,24 +80,22 @@ class DuckDBConnection extends duckdb_common_1.DuckDBCommon {
80
80
  this.connecting = this.init();
81
81
  }
82
82
  async init() {
83
- return new Promise(resolve => {
83
+ try {
84
84
  if (this.databasePath in DuckDBConnection.activeDBs) {
85
85
  const activeDB = DuckDBConnection.activeDBs[this.databasePath];
86
- this.connection = activeDB.database.connect();
86
+ this.connection = await activeDB.instance.connect();
87
87
  activeDB.connections.push(this.connection);
88
- resolve();
89
88
  }
90
89
  else {
91
90
  const config = {
92
- 'custom_user_agent': `Malloy/${package_json_1.default.version}`,
91
+ custom_user_agent: `Malloy/${package_json_1.default.version}`,
93
92
  };
94
93
  if (this.isMotherDuck) {
95
94
  if (!this.motherDuckToken &&
96
95
  !process.env['motherduck_token'] &&
97
96
  !process.env['MOTHERDUCK_TOKEN']) {
98
97
  this.setupError = new Error('Please set your MotherDuck Token');
99
- // Resolve instead of error because errors cannot be caught.
100
- return resolve();
98
+ return;
101
99
  }
102
100
  if (this.motherDuckToken) {
103
101
  config['motherduck_token'] = this.motherDuckToken;
@@ -106,23 +104,18 @@ class DuckDBConnection extends duckdb_common_1.DuckDBCommon {
106
104
  if (this.readOnly) {
107
105
  config['access_mode'] = 'READ_ONLY';
108
106
  }
109
- const database = new duckdb_1.Database(this.databasePath, config, err => {
110
- if (err) {
111
- this.setupError = err;
112
- }
113
- else {
114
- this.connection = database.connect();
115
- const activeDB = {
116
- database,
117
- connections: [],
118
- };
119
- DuckDBConnection.activeDBs[this.databasePath] = activeDB;
120
- activeDB.connections.push(this.connection);
121
- }
122
- resolve();
123
- });
107
+ const instance = await node_api_1.DuckDBInstance.create(this.databasePath, config);
108
+ this.connection = await instance.connect();
109
+ const activeDB = {
110
+ instance,
111
+ connections: [this.connection],
112
+ };
113
+ DuckDBConnection.activeDBs[this.databasePath] = activeDB;
124
114
  }
125
- });
115
+ }
116
+ catch (err) {
117
+ this.setupError = err instanceof Error ? err : new Error(String(err));
118
+ }
126
119
  }
127
120
  async loadExtension(ext) {
128
121
  try {
@@ -172,25 +165,16 @@ class DuckDBConnection extends duckdb_common_1.DuckDBCommon {
172
165
  await this.isSetup;
173
166
  }
174
167
  async runDuckDBQuery(sql) {
175
- return new Promise((resolve, reject) => {
176
- if (this.connection) {
177
- this.connection.all(sql, (err, rows) => {
178
- if (err) {
179
- reject(err);
180
- }
181
- else {
182
- // rows = processBigInts(rows);
183
- resolve({
184
- rows,
185
- totalRows: rows.length,
186
- });
187
- }
188
- });
189
- }
190
- else {
191
- reject(new Error('Connection not open'));
192
- }
193
- });
168
+ if (!this.connection) {
169
+ throw new Error('Connection not open');
170
+ }
171
+ const result = await this.connection.run(sql);
172
+ // getRowObjectsJson() converts nested types (LIST, STRUCT) to JS arrays/objects
173
+ const rows = (await result.getRowObjectsJson());
174
+ return {
175
+ rows,
176
+ totalRows: rows.length,
177
+ };
194
178
  }
195
179
  async *runSQLStream(sql, { rowLimit, abortSignal } = {}) {
196
180
  const defaultOptions = this.readQueryOptions();
@@ -204,14 +188,17 @@ class DuckDBConnection extends duckdb_common_1.DuckDBCommon {
204
188
  await this.runDuckDBQuery(statements[0]);
205
189
  statements.shift();
206
190
  }
191
+ const result = await this.connection.stream(statements[0]);
207
192
  let index = 0;
208
- for await (const row of this.connection.stream(statements[0])) {
209
- if ((rowLimit !== undefined && index >= rowLimit) ||
210
- (abortSignal === null || abortSignal === void 0 ? void 0 : abortSignal.aborted)) {
211
- break;
193
+ for await (const chunk of result.yieldRowObjectJson()) {
194
+ for (const row of chunk) {
195
+ if ((rowLimit !== undefined && index >= rowLimit) ||
196
+ (abortSignal === null || abortSignal === void 0 ? void 0 : abortSignal.aborted)) {
197
+ return;
198
+ }
199
+ index++;
200
+ yield row;
212
201
  }
213
- index++;
214
- yield row;
215
202
  }
216
203
  }
217
204
  async createHash(sqlCommand) {
@@ -222,7 +209,7 @@ class DuckDBConnection extends duckdb_common_1.DuckDBCommon {
222
209
  if (activeDB) {
223
210
  activeDB.connections = activeDB.connections.filter(connection => connection !== this.connection);
224
211
  if (activeDB.connections.length === 0) {
225
- activeDB.database.close();
212
+ activeDB.instance.closeSync();
226
213
  delete DuckDBConnection.activeDBs[this.databasePath];
227
214
  }
228
215
  }
@@ -1,24 +1,6 @@
1
1
  import * as duckdb from '@duckdb/duckdb-wasm';
2
2
  import type { FetchSchemaOptions, QueryDataRow, QueryOptionsReader, RunSQLOptions, SQLSourceDef, ConnectionConfig, TableSourceDef, SQLSourceRequest } from '@malloydata/malloy';
3
- import type { StructRow } from 'apache-arrow';
4
3
  import { DuckDBCommon } from './duckdb_common';
5
- /**
6
- * Arrow's toJSON() doesn't really do what I'd expect, since
7
- * it still includes Arrow objects like DecimalBigNums and Vectors,
8
- * so we need this fairly gross function to unwrap those.
9
- *
10
- * @param value Element from an Arrow StructRow.
11
- * @return Vanilla Javascript value
12
- */
13
- export declare const unwrapArrow: (value: unknown) => any;
14
- /**
15
- * Process a single Arrow result row into a Malloy QueryDataRow
16
- * Unfortunately simply calling JSONParse(JSON.stringify(row)) even
17
- * winds up converting DecimalBigNums to strings instead of numbers.
18
- * For some reason a custom replacer only sees DecimalBigNums as
19
- * strings, as well.
20
- */
21
- export declare const unwrapRow: (row: StructRow) => QueryDataRow;
22
4
  type RemoteFileCallback = (tableName: string) => Promise<Uint8Array | undefined>;
23
5
  export interface DuckDBWasmOptions extends ConnectionConfig {
24
6
  additionalExtensions?: string[];
@@ -58,105 +58,173 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
58
58
  return (mod && mod.__esModule) ? mod : { "default": mod };
59
59
  };
60
60
  Object.defineProperty(exports, "__esModule", { value: true });
61
- exports.DuckDBWASMConnection = exports.unwrapRow = exports.unwrapArrow = void 0;
61
+ exports.DuckDBWASMConnection = void 0;
62
62
  const duckdb = __importStar(require("@duckdb/duckdb-wasm"));
63
63
  const web_worker_1 = __importDefault(require("web-worker"));
64
+ const apache_arrow_1 = require("apache-arrow");
64
65
  const duckdb_common_1 = require("./duckdb_common");
65
66
  const TABLE_MATCH = /FROM\s*('([^']*)'|"([^"]*)")/gi;
66
67
  const TABLE_FUNCTION_MATCH = /FROM\s+[a-z0-9_]+\(('([^']*)'|"([^"]*)")/gi;
67
68
  const FILE_EXTS = ['.csv', '.tsv', '.parquet'];
68
- const isIterable = (x) => Symbol.iterator in x;
69
+ // ----------------------------------------------------------------------------
70
+ // Arrow value unwrapping functions
71
+ // These convert Arrow values to vanilla JS using schema type information.
72
+ // ----------------------------------------------------------------------------
69
73
  /**
70
- * Arrow's toJSON() doesn't really do what I'd expect, since
71
- * it still includes Arrow objects like DecimalBigNums and Vectors,
72
- * so we need this fairly gross function to unwrap those.
73
- *
74
- * @param value Element from an Arrow StructRow.
75
- * @return Vanilla Javascript value
74
+ * Convert an Arrow value to vanilla JS using the Arrow DataType.
75
+ * Uses schema type info to correctly handle decimals and nested types.
76
76
  */
77
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
78
- const unwrapArrow = (value) => {
79
- if (value === null) {
80
- return value;
81
- }
82
- else if (value instanceof Date) {
83
- return value;
84
- }
85
- else if (typeof value === 'bigint') {
86
- // Safe bigints can be represented as numbers without precision loss
87
- if (value >= BigInt(Number.MIN_SAFE_INTEGER) &&
88
- value <= BigInt(Number.MAX_SAFE_INTEGER)) {
89
- return Number(value);
90
- }
91
- // Large bigints stay as strings to preserve precision
92
- return value.toString();
77
+ function unwrapValue(value, fieldType) {
78
+ if (value === null || value === undefined) {
79
+ return null;
93
80
  }
94
- else if (typeof value === 'object') {
95
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
96
- const obj = value;
97
- // DecimalBigNums appear as Uint32Arrays, but can be identified
98
- // because they have a Symbol.toPrimitive method
99
- if (obj[Symbol.toPrimitive]) {
100
- const primitiveValue = obj[Symbol.toPrimitive]();
101
- if (typeof primitiveValue === 'string') {
102
- const num = Number(primitiveValue);
103
- // Safe integers can be represented as numbers without precision loss
104
- if (Number.isSafeInteger(num)) {
105
- return num;
106
- }
107
- // Large numbers stay as strings to preserve precision (HUGEINT)
108
- return primitiveValue;
81
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
82
+ const children = fieldType.children;
83
+ switch (fieldType.typeId) {
84
+ case apache_arrow_1.Type.Decimal:
85
+ return unwrapDecimal(value, fieldType);
86
+ case apache_arrow_1.Type.Date:
87
+ case apache_arrow_1.Type.Timestamp:
88
+ if (typeof value === 'number') {
89
+ return new Date(value);
109
90
  }
110
- if (typeof primitiveValue === 'number') {
111
- return primitiveValue;
91
+ if (value instanceof Date) {
92
+ return value;
112
93
  }
113
- if (typeof primitiveValue === 'bigint') {
114
- if (primitiveValue >= BigInt(Number.MIN_SAFE_INTEGER) &&
115
- primitiveValue <= BigInt(Number.MAX_SAFE_INTEGER)) {
116
- return Number(primitiveValue);
117
- }
118
- return primitiveValue.toString();
94
+ return unwrapPrimitive(value);
95
+ case apache_arrow_1.Type.List:
96
+ case apache_arrow_1.Type.FixedSizeList:
97
+ if (children && children.length > 0) {
98
+ return unwrapArray(value, children[0].type);
119
99
  }
120
- return primitiveValue.toString();
121
- }
122
- else if (Array.isArray(value)) {
123
- return value.map(exports.unwrapArrow);
100
+ return unwrapPrimitive(value);
101
+ case apache_arrow_1.Type.Struct:
102
+ if (children && children.length > 0) {
103
+ return unwrapStruct(value, children);
104
+ }
105
+ return unwrapPrimitive(value);
106
+ case apache_arrow_1.Type.Map:
107
+ // Maps have a single child which is a struct with key/value fields
108
+ if (children && children.length > 0) {
109
+ return unwrapArray(value, children[0].type);
110
+ }
111
+ return unwrapPrimitive(value);
112
+ default:
113
+ return unwrapPrimitive(value);
114
+ }
115
+ }
116
+ function unwrapDecimal(value, fieldType) {
117
+ var _a;
118
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
119
+ const scale = (_a = fieldType.scale) !== null && _a !== void 0 ? _a : 0;
120
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
121
+ const obj = value;
122
+ if (!obj || !obj[Symbol.toPrimitive]) {
123
+ return value;
124
+ }
125
+ const raw = obj[Symbol.toPrimitive]();
126
+ if (typeof raw === 'bigint') {
127
+ // Check if the unscaled value exceeds safe integer range
128
+ const absRaw = raw < BigInt(0) ? -raw : raw;
129
+ if (absRaw > BigInt(Number.MAX_SAFE_INTEGER)) {
130
+ // Too large for precise JS number - format as decimal string
131
+ return formatBigDecimal(raw, scale);
124
132
  }
125
- else if (isIterable(value)) {
126
- // Catch Arrow Vector objects
127
- return [...value].map(exports.unwrapArrow);
133
+ if (scale > 0) {
134
+ return Number(raw) / 10 ** scale;
128
135
  }
129
- else {
130
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
131
- const result = {};
132
- for (const key in obj) {
133
- if (Object.prototype.hasOwnProperty.call(obj, key)) {
134
- result[key] = (0, exports.unwrapArrow)(obj[key]);
135
- }
136
- }
137
- return result;
136
+ return Number(raw);
137
+ }
138
+ if (typeof raw === 'string') {
139
+ // Large decimals may come as strings - check if too large for Number
140
+ const absStr = raw.startsWith('-') ? raw.slice(1) : raw;
141
+ if (absStr.length > 15) {
142
+ // String is likely too large for precise Number - format with decimal
143
+ return formatBigDecimalFromString(raw, scale);
138
144
  }
139
145
  }
146
+ const num = Number(raw);
147
+ return scale > 0 ? num / 10 ** scale : num;
148
+ }
149
+ function unwrapArray(value, elementType) {
150
+ const arr = Array.isArray(value) ? value : [...value];
151
+ return arr.map(v => unwrapValue(v, elementType));
152
+ }
153
+ function unwrapStruct(value, children) {
154
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
155
+ const obj = value;
156
+ const result = {};
157
+ for (const field of children) {
158
+ result[field.name] = unwrapValue(obj[field.name], field.type);
159
+ }
160
+ return result;
161
+ }
162
+ function unwrapPrimitive(value) {
163
+ if (value instanceof Date)
164
+ return value;
165
+ if (typeof value === 'bigint')
166
+ return safeNumber(value);
167
+ if (typeof value !== 'object' || value === null)
168
+ return value;
169
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
170
+ const obj = value;
171
+ if (obj[Symbol.toPrimitive]) {
172
+ return safeNumber(obj[Symbol.toPrimitive]());
173
+ }
140
174
  return value;
141
- };
142
- exports.unwrapArrow = unwrapArrow;
175
+ }
176
+ function safeNumber(value) {
177
+ if (typeof value === 'number') {
178
+ return value;
179
+ }
180
+ const num = Number(value);
181
+ if (Number.isSafeInteger(num) ||
182
+ (Number.isFinite(num) && !Number.isInteger(num))) {
183
+ return num;
184
+ }
185
+ return String(value);
186
+ }
187
+ function formatBigDecimal(raw, scale) {
188
+ const isNegative = raw < BigInt(0);
189
+ const str = (isNegative ? -raw : raw).toString();
190
+ return formatDecimalString(str, scale, isNegative);
191
+ }
192
+ function formatBigDecimalFromString(raw, scale) {
193
+ const isNegative = raw.startsWith('-');
194
+ const str = isNegative ? raw.slice(1) : raw;
195
+ return formatDecimalString(str, scale, isNegative);
196
+ }
197
+ function formatDecimalString(str, scale, isNegative) {
198
+ let result;
199
+ if (scale <= 0) {
200
+ result = str;
201
+ }
202
+ else if (scale >= str.length) {
203
+ result = '0.' + '0'.repeat(scale - str.length) + str;
204
+ }
205
+ else {
206
+ result = str.slice(0, -scale) + '.' + str.slice(-scale);
207
+ }
208
+ return isNegative ? '-' + result : result;
209
+ }
143
210
  /**
144
- * Process a single Arrow result row into a Malloy QueryDataRow
145
- * Unfortunately simply calling JSONParse(JSON.stringify(row)) even
146
- * winds up converting DecimalBigNums to strings instead of numbers.
147
- * For some reason a custom replacer only sees DecimalBigNums as
148
- * strings, as well.
211
+ * Process a single Arrow result row into a Malloy QueryDataRow.
149
212
  */
150
- const unwrapRow = (row) => {
151
- return (0, exports.unwrapArrow)(row.toJSON());
152
- };
153
- exports.unwrapRow = unwrapRow;
213
+ function unwrapRow(row, schema) {
214
+ const json = row.toJSON();
215
+ const result = {};
216
+ for (const field of schema.fields) {
217
+ // Cast is safe: unwrapValue returns QueryValue-compatible types
218
+ result[field.name] = unwrapValue(json[field.name], field.type);
219
+ }
220
+ return result;
221
+ }
154
222
  /**
155
- * Process a duckedb Table into an array of Malloy QueryDataRows
223
+ * Process a DuckDB Table into an array of Malloy QueryDataRows.
156
224
  */
157
- const unwrapTable = (table) => {
158
- return table.toArray().map(exports.unwrapRow);
159
- };
225
+ function unwrapTable(table) {
226
+ return table.toArray().map(row => unwrapRow(row, table.schema));
227
+ }
160
228
  const isNode = () => typeof navigator === 'undefined';
161
229
  class DuckDBWASMConnection extends duckdb_common_1.DuckDBCommon {
162
230
  constructor(arg, arg2, workingDirectory, queryOptions) {
@@ -224,6 +292,7 @@ class DuckDBWASMConnection extends duckdb_common_1.DuckDBCommon {
224
292
  if (this.databasePath) {
225
293
  await this._database.open({
226
294
  path: this.databasePath,
295
+ accessMode: duckdb.DuckDBAccessMode.AUTOMATIC,
227
296
  });
228
297
  }
229
298
  URL.revokeObjectURL(workerUrl);
@@ -329,7 +398,7 @@ class DuckDBWASMConnection extends duckdb_common_1.DuckDBCommon {
329
398
  (abortSignal === null || abortSignal === void 0 ? void 0 : abortSignal.aborted)) {
330
399
  break;
331
400
  }
332
- yield (0, exports.unwrapRow)(row);
401
+ yield unwrapRow(row, chunk.schema);
333
402
  index++;
334
403
  }
335
404
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@malloydata/db-duckdb",
3
- "version": "0.0.330",
3
+ "version": "0.0.332",
4
4
  "license": "MIT",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -44,11 +44,11 @@
44
44
  "prepublishOnly": "npm run build"
45
45
  },
46
46
  "dependencies": {
47
- "@duckdb/duckdb-wasm": "1.29.1-dev132.0",
48
- "@malloydata/malloy": "0.0.330",
47
+ "@duckdb/duckdb-wasm": "1.33.1-dev13.0",
48
+ "@malloydata/malloy": "0.0.332",
49
49
  "@motherduck/wasm-client": "^0.6.6",
50
50
  "apache-arrow": "^17.0.0",
51
- "duckdb": "1.3.4",
51
+ "@duckdb/node-api": "1.4.3-r.1",
52
52
  "web-worker": "^1.3.0"
53
53
  }
54
54
  }