@malloydata/db-duckdb 0.0.330 → 0.0.331
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,24 +1,6 @@
|
|
|
1
1
|
import * as duckdb from '@duckdb/duckdb-wasm';
|
|
2
2
|
import type { FetchSchemaOptions, QueryDataRow, QueryOptionsReader, RunSQLOptions, SQLSourceDef, ConnectionConfig, TableSourceDef, SQLSourceRequest } from '@malloydata/malloy';
|
|
3
|
-
import type { StructRow } from 'apache-arrow';
|
|
4
3
|
import { DuckDBCommon } from './duckdb_common';
|
|
5
|
-
/**
|
|
6
|
-
* Arrow's toJSON() doesn't really do what I'd expect, since
|
|
7
|
-
* it still includes Arrow objects like DecimalBigNums and Vectors,
|
|
8
|
-
* so we need this fairly gross function to unwrap those.
|
|
9
|
-
*
|
|
10
|
-
* @param value Element from an Arrow StructRow.
|
|
11
|
-
* @return Vanilla Javascript value
|
|
12
|
-
*/
|
|
13
|
-
export declare const unwrapArrow: (value: unknown) => any;
|
|
14
|
-
/**
|
|
15
|
-
* Process a single Arrow result row into a Malloy QueryDataRow
|
|
16
|
-
* Unfortunately simply calling JSONParse(JSON.stringify(row)) even
|
|
17
|
-
* winds up converting DecimalBigNums to strings instead of numbers.
|
|
18
|
-
* For some reason a custom replacer only sees DecimalBigNums as
|
|
19
|
-
* strings, as well.
|
|
20
|
-
*/
|
|
21
|
-
export declare const unwrapRow: (row: StructRow) => QueryDataRow;
|
|
22
4
|
type RemoteFileCallback = (tableName: string) => Promise<Uint8Array | undefined>;
|
|
23
5
|
export interface DuckDBWasmOptions extends ConnectionConfig {
|
|
24
6
|
additionalExtensions?: string[];
|
|
@@ -58,105 +58,173 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
58
58
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
59
59
|
};
|
|
60
60
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
61
|
-
exports.DuckDBWASMConnection =
|
|
61
|
+
exports.DuckDBWASMConnection = void 0;
|
|
62
62
|
const duckdb = __importStar(require("@duckdb/duckdb-wasm"));
|
|
63
63
|
const web_worker_1 = __importDefault(require("web-worker"));
|
|
64
|
+
const apache_arrow_1 = require("apache-arrow");
|
|
64
65
|
const duckdb_common_1 = require("./duckdb_common");
|
|
65
66
|
const TABLE_MATCH = /FROM\s*('([^']*)'|"([^"]*)")/gi;
|
|
66
67
|
const TABLE_FUNCTION_MATCH = /FROM\s+[a-z0-9_]+\(('([^']*)'|"([^"]*)")/gi;
|
|
67
68
|
const FILE_EXTS = ['.csv', '.tsv', '.parquet'];
|
|
68
|
-
|
|
69
|
+
// ----------------------------------------------------------------------------
|
|
70
|
+
// Arrow value unwrapping functions
|
|
71
|
+
// These convert Arrow values to vanilla JS using schema type information.
|
|
72
|
+
// ----------------------------------------------------------------------------
|
|
69
73
|
/**
|
|
70
|
-
* Arrow
|
|
71
|
-
*
|
|
72
|
-
* so we need this fairly gross function to unwrap those.
|
|
73
|
-
*
|
|
74
|
-
* @param value Element from an Arrow StructRow.
|
|
75
|
-
* @return Vanilla Javascript value
|
|
74
|
+
* Convert an Arrow value to vanilla JS using the Arrow DataType.
|
|
75
|
+
* Uses schema type info to correctly handle decimals and nested types.
|
|
76
76
|
*/
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
return value;
|
|
81
|
-
}
|
|
82
|
-
else if (value instanceof Date) {
|
|
83
|
-
return value;
|
|
84
|
-
}
|
|
85
|
-
else if (typeof value === 'bigint') {
|
|
86
|
-
// Safe bigints can be represented as numbers without precision loss
|
|
87
|
-
if (value >= BigInt(Number.MIN_SAFE_INTEGER) &&
|
|
88
|
-
value <= BigInt(Number.MAX_SAFE_INTEGER)) {
|
|
89
|
-
return Number(value);
|
|
90
|
-
}
|
|
91
|
-
// Large bigints stay as strings to preserve precision
|
|
92
|
-
return value.toString();
|
|
77
|
+
function unwrapValue(value, fieldType) {
|
|
78
|
+
if (value === null || value === undefined) {
|
|
79
|
+
return null;
|
|
93
80
|
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
if (typeof
|
|
102
|
-
|
|
103
|
-
// Safe integers can be represented as numbers without precision loss
|
|
104
|
-
if (Number.isSafeInteger(num)) {
|
|
105
|
-
return num;
|
|
106
|
-
}
|
|
107
|
-
// Large numbers stay as strings to preserve precision (HUGEINT)
|
|
108
|
-
return primitiveValue;
|
|
81
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
82
|
+
const children = fieldType.children;
|
|
83
|
+
switch (fieldType.typeId) {
|
|
84
|
+
case apache_arrow_1.Type.Decimal:
|
|
85
|
+
return unwrapDecimal(value, fieldType);
|
|
86
|
+
case apache_arrow_1.Type.Date:
|
|
87
|
+
case apache_arrow_1.Type.Timestamp:
|
|
88
|
+
if (typeof value === 'number') {
|
|
89
|
+
return new Date(value);
|
|
109
90
|
}
|
|
110
|
-
if (
|
|
111
|
-
return
|
|
91
|
+
if (value instanceof Date) {
|
|
92
|
+
return value;
|
|
112
93
|
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
return primitiveValue.toString();
|
|
94
|
+
return unwrapPrimitive(value);
|
|
95
|
+
case apache_arrow_1.Type.List:
|
|
96
|
+
case apache_arrow_1.Type.FixedSizeList:
|
|
97
|
+
if (children && children.length > 0) {
|
|
98
|
+
return unwrapArray(value, children[0].type);
|
|
119
99
|
}
|
|
120
|
-
return
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
100
|
+
return unwrapPrimitive(value);
|
|
101
|
+
case apache_arrow_1.Type.Struct:
|
|
102
|
+
if (children && children.length > 0) {
|
|
103
|
+
return unwrapStruct(value, children);
|
|
104
|
+
}
|
|
105
|
+
return unwrapPrimitive(value);
|
|
106
|
+
case apache_arrow_1.Type.Map:
|
|
107
|
+
// Maps have a single child which is a struct with key/value fields
|
|
108
|
+
if (children && children.length > 0) {
|
|
109
|
+
return unwrapArray(value, children[0].type);
|
|
110
|
+
}
|
|
111
|
+
return unwrapPrimitive(value);
|
|
112
|
+
default:
|
|
113
|
+
return unwrapPrimitive(value);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function unwrapDecimal(value, fieldType) {
|
|
117
|
+
var _a;
|
|
118
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
119
|
+
const scale = (_a = fieldType.scale) !== null && _a !== void 0 ? _a : 0;
|
|
120
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
121
|
+
const obj = value;
|
|
122
|
+
if (!obj || !obj[Symbol.toPrimitive]) {
|
|
123
|
+
return value;
|
|
124
|
+
}
|
|
125
|
+
const raw = obj[Symbol.toPrimitive]();
|
|
126
|
+
if (typeof raw === 'bigint') {
|
|
127
|
+
// Check if the unscaled value exceeds safe integer range
|
|
128
|
+
const absRaw = raw < BigInt(0) ? -raw : raw;
|
|
129
|
+
if (absRaw > BigInt(Number.MAX_SAFE_INTEGER)) {
|
|
130
|
+
// Too large for precise JS number - format as decimal string
|
|
131
|
+
return formatBigDecimal(raw, scale);
|
|
124
132
|
}
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
return [...value].map(exports.unwrapArrow);
|
|
133
|
+
if (scale > 0) {
|
|
134
|
+
return Number(raw) / 10 ** scale;
|
|
128
135
|
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
return result;
|
|
136
|
+
return Number(raw);
|
|
137
|
+
}
|
|
138
|
+
if (typeof raw === 'string') {
|
|
139
|
+
// Large decimals may come as strings - check if too large for Number
|
|
140
|
+
const absStr = raw.startsWith('-') ? raw.slice(1) : raw;
|
|
141
|
+
if (absStr.length > 15) {
|
|
142
|
+
// String is likely too large for precise Number - format with decimal
|
|
143
|
+
return formatBigDecimalFromString(raw, scale);
|
|
138
144
|
}
|
|
139
145
|
}
|
|
146
|
+
const num = Number(raw);
|
|
147
|
+
return scale > 0 ? num / 10 ** scale : num;
|
|
148
|
+
}
|
|
149
|
+
function unwrapArray(value, elementType) {
|
|
150
|
+
const arr = Array.isArray(value) ? value : [...value];
|
|
151
|
+
return arr.map(v => unwrapValue(v, elementType));
|
|
152
|
+
}
|
|
153
|
+
function unwrapStruct(value, children) {
|
|
154
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
155
|
+
const obj = value;
|
|
156
|
+
const result = {};
|
|
157
|
+
for (const field of children) {
|
|
158
|
+
result[field.name] = unwrapValue(obj[field.name], field.type);
|
|
159
|
+
}
|
|
160
|
+
return result;
|
|
161
|
+
}
|
|
162
|
+
function unwrapPrimitive(value) {
|
|
163
|
+
if (value instanceof Date)
|
|
164
|
+
return value;
|
|
165
|
+
if (typeof value === 'bigint')
|
|
166
|
+
return safeNumber(value);
|
|
167
|
+
if (typeof value !== 'object' || value === null)
|
|
168
|
+
return value;
|
|
169
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
170
|
+
const obj = value;
|
|
171
|
+
if (obj[Symbol.toPrimitive]) {
|
|
172
|
+
return safeNumber(obj[Symbol.toPrimitive]());
|
|
173
|
+
}
|
|
140
174
|
return value;
|
|
141
|
-
}
|
|
142
|
-
|
|
175
|
+
}
|
|
176
|
+
function safeNumber(value) {
|
|
177
|
+
if (typeof value === 'number') {
|
|
178
|
+
return value;
|
|
179
|
+
}
|
|
180
|
+
const num = Number(value);
|
|
181
|
+
if (Number.isSafeInteger(num) ||
|
|
182
|
+
(Number.isFinite(num) && !Number.isInteger(num))) {
|
|
183
|
+
return num;
|
|
184
|
+
}
|
|
185
|
+
return String(value);
|
|
186
|
+
}
|
|
187
|
+
function formatBigDecimal(raw, scale) {
|
|
188
|
+
const isNegative = raw < BigInt(0);
|
|
189
|
+
const str = (isNegative ? -raw : raw).toString();
|
|
190
|
+
return formatDecimalString(str, scale, isNegative);
|
|
191
|
+
}
|
|
192
|
+
function formatBigDecimalFromString(raw, scale) {
|
|
193
|
+
const isNegative = raw.startsWith('-');
|
|
194
|
+
const str = isNegative ? raw.slice(1) : raw;
|
|
195
|
+
return formatDecimalString(str, scale, isNegative);
|
|
196
|
+
}
|
|
197
|
+
function formatDecimalString(str, scale, isNegative) {
|
|
198
|
+
let result;
|
|
199
|
+
if (scale <= 0) {
|
|
200
|
+
result = str;
|
|
201
|
+
}
|
|
202
|
+
else if (scale >= str.length) {
|
|
203
|
+
result = '0.' + '0'.repeat(scale - str.length) + str;
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
result = str.slice(0, -scale) + '.' + str.slice(-scale);
|
|
207
|
+
}
|
|
208
|
+
return isNegative ? '-' + result : result;
|
|
209
|
+
}
|
|
143
210
|
/**
|
|
144
|
-
* Process a single Arrow result row into a Malloy QueryDataRow
|
|
145
|
-
* Unfortunately simply calling JSONParse(JSON.stringify(row)) even
|
|
146
|
-
* winds up converting DecimalBigNums to strings instead of numbers.
|
|
147
|
-
* For some reason a custom replacer only sees DecimalBigNums as
|
|
148
|
-
* strings, as well.
|
|
211
|
+
* Process a single Arrow result row into a Malloy QueryDataRow.
|
|
149
212
|
*/
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
};
|
|
153
|
-
|
|
213
|
+
function unwrapRow(row, schema) {
|
|
214
|
+
const json = row.toJSON();
|
|
215
|
+
const result = {};
|
|
216
|
+
for (const field of schema.fields) {
|
|
217
|
+
// Cast is safe: unwrapValue returns QueryValue-compatible types
|
|
218
|
+
result[field.name] = unwrapValue(json[field.name], field.type);
|
|
219
|
+
}
|
|
220
|
+
return result;
|
|
221
|
+
}
|
|
154
222
|
/**
|
|
155
|
-
* Process a
|
|
223
|
+
* Process a DuckDB Table into an array of Malloy QueryDataRows.
|
|
156
224
|
*/
|
|
157
|
-
|
|
158
|
-
return table.toArray().map(
|
|
159
|
-
}
|
|
225
|
+
function unwrapTable(table) {
|
|
226
|
+
return table.toArray().map(row => unwrapRow(row, table.schema));
|
|
227
|
+
}
|
|
160
228
|
const isNode = () => typeof navigator === 'undefined';
|
|
161
229
|
class DuckDBWASMConnection extends duckdb_common_1.DuckDBCommon {
|
|
162
230
|
constructor(arg, arg2, workingDirectory, queryOptions) {
|
|
@@ -329,7 +397,7 @@ class DuckDBWASMConnection extends duckdb_common_1.DuckDBCommon {
|
|
|
329
397
|
(abortSignal === null || abortSignal === void 0 ? void 0 : abortSignal.aborted)) {
|
|
330
398
|
break;
|
|
331
399
|
}
|
|
332
|
-
yield (
|
|
400
|
+
yield unwrapRow(row, chunk.schema);
|
|
333
401
|
index++;
|
|
334
402
|
}
|
|
335
403
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@malloydata/db-duckdb",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.331",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
47
|
"@duckdb/duckdb-wasm": "1.29.1-dev132.0",
|
|
48
|
-
"@malloydata/malloy": "0.0.
|
|
48
|
+
"@malloydata/malloy": "0.0.331",
|
|
49
49
|
"@motherduck/wasm-client": "^0.6.6",
|
|
50
50
|
"apache-arrow": "^17.0.0",
|
|
51
51
|
"duckdb": "1.3.4",
|