@visactor/vquery 0.1.45 → 0.1.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ import { DataSourceType, DataSourceValue } from '../types';
2
+ export declare class DataSourceBuilder {
3
+ private type;
4
+ private value;
5
+ constructor(type: DataSourceType, value: DataSourceValue);
6
+ static from(type: DataSourceType, value: DataSourceValue): DataSourceBuilder;
7
+ build(): Promise<{
8
+ type: DataSourceType;
9
+ blob: Blob;
10
+ }>;
11
+ /**
12
+ * 将不同类型的数据转换为Blob
13
+ */
14
+ private static convertToBlob;
15
+ private static fetchBlob;
16
+ }
@@ -0,0 +1 @@
1
+ export { DataSourceBuilder } from './dataSourceBuilder';
@@ -0,0 +1,19 @@
1
+ import { DuckDB } from './db/duckDb';
2
+ export declare class Dataset {
3
+ private duckDB;
4
+ private _datasetId;
5
+ private _tableName;
6
+ constructor(duckDB: DuckDB, datasetId: string, tableName: string);
7
+ queryBySQL(sql: string): Promise<{
8
+ performance: {
9
+ startAt: string;
10
+ endAt: string;
11
+ duration: number;
12
+ };
13
+ dataset: any[];
14
+ table: any;
15
+ }>;
16
+ disConnect(): Promise<void>;
17
+ get datasetId(): string;
18
+ get tableName(): string;
19
+ }
@@ -1,4 +1,4 @@
1
- import { QueryResult } from '../types';
1
+ import { QueryResult } from '../types/DataSet';
2
2
  export declare class DuckDB {
3
3
  private db;
4
4
  private connection;
@@ -16,7 +16,7 @@ export declare class DuckDB {
16
16
  * @param fileName 文件名
17
17
  * @param source 文件内容
18
18
  */
19
- writeFile: <T extends string | ArrayBuffer | Uint8Array | Blob>(fileName: string, source: T) => Promise<void>;
19
+ writeFile: <T extends Blob>(fileName: string, source: T) => Promise<void>;
20
20
  /**
21
21
  * @description 执行 SQL 查询
22
22
  * @param sql SQL 语句
@@ -26,11 +26,6 @@ export declare class DuckDB {
26
26
  dataset: any[];
27
27
  table: any;
28
28
  }>;
29
- /**
30
- * 确保一个文件存在,如果不存在,则根据同名文件创建临时表
31
- * @param fileName 文件名
32
- */
33
- private ensureSchema;
34
29
  /**
35
30
  * @description 获取文件的 Schema
36
31
  * @param fileName 文件名
@@ -1,11 +1,21 @@
1
+ import { DataSource } from '../types';
2
+ import { DatasetSchema } from '../types/DataSet';
1
3
  export declare class IndexedDB {
2
4
  private db;
3
5
  private dbName;
4
- private storeName;
6
+ private datasetStoreName;
5
7
  constructor(dbName: string);
6
8
  open: () => Promise<void>;
7
9
  close: () => void;
8
- writeFile: (fileName: string, data: Blob) => Promise<void>;
9
- readFile: (fileName: string) => Promise<Blob | null>;
10
- listFiles: () => Promise<string[]>;
10
+ writeDataset: (datasetId: string, dataSource: DataSource, datasetSchema: DatasetSchema) => Promise<void>;
11
+ readDataset: (datasetId: string) => Promise<{
12
+ dataSource: DataSource;
13
+ datasetSchema: DatasetSchema;
14
+ } | null>;
15
+ deleteDataset: (datasetId: string) => Promise<void>;
16
+ listDatasets: () => Promise<{
17
+ datasetId: string;
18
+ dataSource: DataSource;
19
+ datasetSchema: DatasetSchema;
20
+ }[]>;
11
21
  }
package/dist/index.cjs CHANGED
@@ -27,8 +27,42 @@ var __webpack_require__ = {};
27
27
  var __webpack_exports__ = {};
28
28
  __webpack_require__.r(__webpack_exports__);
29
29
  __webpack_require__.d(__webpack_exports__, {
30
+ DataSourceBuilder: ()=>DataSourceBuilder,
31
+ isHttpUrl: ()=>isHttpUrl,
32
+ isBase64Url: ()=>isBase64Url,
33
+ isUrl: ()=>isUrl,
30
34
  VQuery: ()=>VQuery
31
35
  });
36
+ class Dataset {
37
+ duckDB;
38
+ _datasetId;
39
+ _tableName;
40
+ constructor(duckDB, datasetId, tableName){
41
+ this.duckDB = duckDB;
42
+ this._datasetId = datasetId;
43
+ this._tableName = tableName;
44
+ }
45
+ async queryBySQL(sql) {
46
+ const start = performance?.now?.()?.toFixed(3) ?? Date.now().toFixed(3);
47
+ const result = await this.duckDB.query(sql);
48
+ const end = performance?.now?.()?.toFixed(3) ?? Date.now().toFixed(3);
49
+ return {
50
+ ...result,
51
+ performance: {
52
+ startAt: start,
53
+ endAt: end,
54
+ duration: Number(end) - Number(start)
55
+ }
56
+ };
57
+ }
58
+ async disConnect() {}
59
+ get datasetId() {
60
+ return this._datasetId;
61
+ }
62
+ get tableName() {
63
+ return this._tableName;
64
+ }
65
+ }
32
66
  const duckdb_wasm_namespaceObject = require("@duckdb/duckdb-wasm");
33
67
  class DuckDB {
34
68
  db = null;
@@ -71,16 +105,10 @@ class DuckDB {
71
105
  writeFile = async (fileName, source)=>{
72
106
  if (!this.db) throw new Error('db is null');
73
107
  let uint8Array;
74
- if ('string' == typeof source) {
75
- const response = await fetch(source);
76
- const buffer = await response.arrayBuffer();
77
- uint8Array = new Uint8Array(buffer);
78
- } else if (source instanceof Blob) {
108
+ if (source instanceof Blob) {
79
109
  const buffer = await source.arrayBuffer();
80
110
  uint8Array = new Uint8Array(buffer);
81
- } else if (source instanceof ArrayBuffer) uint8Array = new Uint8Array(source);
82
- else if (source instanceof Uint8Array) uint8Array = source;
83
- else throw new Error('Unsupported source type');
111
+ } else throw new Error('Unsupported source type');
84
112
  await this.db.registerFileBuffer(fileName, uint8Array);
85
113
  };
86
114
  query = async (sql)=>{
@@ -92,13 +120,8 @@ class DuckDB {
92
120
  table
93
121
  };
94
122
  };
95
- ensureSchema = async (fileName)=>{
96
- if (!this.connection) throw new Error('connection is null');
97
- await this.connection.query(`CREATE TEMP TABLE IF NOT EXISTS "${fileName}" AS SELECT * FROM read_csv_auto('${fileName}')`);
98
- };
99
123
  getSchema = async (fileName)=>{
100
124
  if (!this.connection) throw new Error('connection is null');
101
- await this.ensureSchema(fileName);
102
125
  const result = await this.connection.query(`PRAGMA table_info('${fileName}')`);
103
126
  return result.toArray().map((row)=>row.toJSON());
104
127
  };
@@ -106,16 +129,16 @@ class DuckDB {
106
129
  class IndexedDB {
107
130
  db = null;
108
131
  dbName;
109
- storeName = 'vqueryFiles';
132
+ datasetStoreName = 'vqueryDatasets';
110
133
  constructor(dbName){
111
134
  this.dbName = dbName;
112
135
  }
113
136
  open = ()=>new Promise((resolve, reject)=>{
114
- const request = indexedDB.open(this.dbName, 1);
137
+ const request = indexedDB.open(this.dbName, 2);
115
138
  request.onupgradeneeded = (event)=>{
116
139
  const db = event.target.result;
117
- if (!db.objectStoreNames.contains(this.storeName)) db.createObjectStore(this.storeName, {
118
- keyPath: 'name'
140
+ if (!db.objectStoreNames.contains(this.datasetStoreName)) db.createObjectStore(this.datasetStoreName, {
141
+ keyPath: 'datasetId'
119
142
  });
120
143
  };
121
144
  request.onsuccess = (event)=>{
@@ -132,15 +155,16 @@ class IndexedDB {
132
155
  this.db = null;
133
156
  }
134
157
  };
135
- writeFile = (fileName, data)=>new Promise((resolve, reject)=>{
158
+ writeDataset = (datasetId, dataSource, datasetSchema)=>new Promise((resolve, reject)=>{
136
159
  if (!this.db) return reject('DB is not open');
137
160
  const transaction = this.db.transaction([
138
- this.storeName
161
+ this.datasetStoreName
139
162
  ], 'readwrite');
140
- const store = transaction.objectStore(this.storeName);
163
+ const store = transaction.objectStore(this.datasetStoreName);
141
164
  const request = store.put({
142
- name: fileName,
143
- data
165
+ datasetId,
166
+ dataSource,
167
+ datasetSchema
144
168
  });
145
169
  request.onsuccess = ()=>{
146
170
  resolve();
@@ -149,92 +173,226 @@ class IndexedDB {
149
173
  reject(event.target.error);
150
174
  };
151
175
  });
152
- readFile = (fileName)=>new Promise((resolve, reject)=>{
176
+ readDataset = (datasetId)=>new Promise((resolve, reject)=>{
153
177
  if (!this.db) return reject('DB is not open');
154
178
  const transaction = this.db.transaction([
155
- this.storeName
179
+ this.datasetStoreName
156
180
  ], 'readonly');
157
- const store = transaction.objectStore(this.storeName);
158
- const request = store.get(fileName);
181
+ const store = transaction.objectStore(this.datasetStoreName);
182
+ const request = store.get(datasetId);
159
183
  request.onsuccess = (event)=>{
160
184
  const result = event.target.result;
161
- result ? resolve(result.data) : resolve(null);
185
+ resolve(result || null);
162
186
  };
163
187
  request.onerror = (event)=>{
164
188
  reject(event.target.error);
165
189
  };
166
190
  });
167
- listFiles = ()=>new Promise((resolve, reject)=>{
191
+ deleteDataset = (datasetId)=>new Promise((resolve, reject)=>{
168
192
  if (!this.db) return reject('DB is not open');
169
193
  const transaction = this.db.transaction([
170
- this.storeName
194
+ this.datasetStoreName
195
+ ], 'readwrite');
196
+ const store = transaction.objectStore(this.datasetStoreName);
197
+ const request = store.delete(datasetId);
198
+ request.onsuccess = ()=>{
199
+ resolve();
200
+ };
201
+ request.onerror = (event)=>{
202
+ reject(event.target.error);
203
+ };
204
+ });
205
+ listDatasets = ()=>new Promise((resolve, reject)=>{
206
+ if (!this.db) return reject('DB is not open');
207
+ const transaction = this.db.transaction([
208
+ this.datasetStoreName
171
209
  ], 'readonly');
172
- const store = transaction.objectStore(this.storeName);
173
- const request = store.getAllKeys();
210
+ const store = transaction.objectStore(this.datasetStoreName);
211
+ const request = store.getAll();
174
212
  request.onsuccess = (event)=>{
175
- const keys = event.target.result;
176
- resolve(keys);
213
+ const result = event.target.result;
214
+ resolve(result);
177
215
  };
178
216
  request.onerror = (event)=>{
179
217
  reject(event.target.error);
180
218
  };
181
219
  });
182
220
  }
221
+ const isUrl = (url)=>isHttpUrl(url) || isBase64Url(url);
222
+ const isHttpUrl = (url)=>url.startsWith('http://') || url.startsWith('https://');
223
+ const isBase64Url = (url)=>url.startsWith('data:');
224
+ class DataSourceBuilder {
225
+ type;
226
+ value;
227
+ constructor(type, value){
228
+ this.type = type;
229
+ this.value = value;
230
+ }
231
+ static from(type, value) {
232
+ return new DataSourceBuilder(type, value);
233
+ }
234
+ async build() {
235
+ const blob = await DataSourceBuilder.convertToBlob(this.type, this.value);
236
+ return {
237
+ type: this.type,
238
+ blob: blob
239
+ };
240
+ }
241
+ static async convertToBlob(type, value) {
242
+ if (value instanceof Blob) return value;
243
+ const convertCsvToBlob = (csvSource)=>{
244
+ if (csvSource instanceof ArrayBuffer) return new Blob([
245
+ csvSource
246
+ ], {
247
+ type: 'text/csv'
248
+ });
249
+ if ('string' == typeof csvSource && isUrl(csvSource)) return DataSourceBuilder.fetchBlob(csvSource);
250
+ return new Blob([
251
+ JSON.stringify(csvSource)
252
+ ], {
253
+ type: 'text/csv'
254
+ });
255
+ };
256
+ const convertJsonToBlob = (jsonSource)=>{
257
+ if (jsonSource instanceof ArrayBuffer) return new Blob([
258
+ jsonSource
259
+ ], {
260
+ type: 'application/json'
261
+ });
262
+ if ('string' == typeof jsonSource && isUrl(jsonSource)) return DataSourceBuilder.fetchBlob(jsonSource);
263
+ return new Blob([
264
+ JSON.stringify(jsonSource)
265
+ ], {
266
+ type: 'application/json'
267
+ });
268
+ };
269
+ const convertParquetToBlob = (parquetSource)=>{
270
+ if (parquetSource instanceof ArrayBuffer) return new Blob([
271
+ parquetSource
272
+ ], {
273
+ type: 'application/parquet'
274
+ });
275
+ if ('string' == typeof parquetSource && isUrl(parquetSource)) return DataSourceBuilder.fetchBlob(parquetSource);
276
+ return new Blob([
277
+ parquetSource
278
+ ], {
279
+ type: 'application/parquet'
280
+ });
281
+ };
282
+ const convertXlsxToBlob = (xlsxSource)=>{
283
+ if (xlsxSource instanceof ArrayBuffer) return new Blob([
284
+ xlsxSource
285
+ ], {
286
+ type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
287
+ });
288
+ if ('string' == typeof xlsxSource && isUrl(xlsxSource)) return DataSourceBuilder.fetchBlob(xlsxSource);
289
+ return new Blob([
290
+ xlsxSource
291
+ ], {
292
+ type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
293
+ });
294
+ };
295
+ switch(type){
296
+ case 'csv':
297
+ return convertCsvToBlob(value);
298
+ case 'json':
299
+ return convertJsonToBlob(value);
300
+ case 'xlsx':
301
+ return convertXlsxToBlob(value);
302
+ case 'parquet':
303
+ return convertParquetToBlob(value);
304
+ default:
305
+ return new Blob([
306
+ value
307
+ ]);
308
+ }
309
+ }
310
+ static async fetchBlob(url) {
311
+ const response = await fetch(url);
312
+ return await response.blob();
313
+ }
314
+ }
315
+ function mapDataTypeToDuckDB(type) {
316
+ switch(type){
317
+ case 'number':
318
+ return 'DOUBLE';
319
+ case 'string':
320
+ return 'VARCHAR';
321
+ case 'date':
322
+ return 'DATE';
323
+ case 'datetime':
324
+ return 'TIMESTAMP';
325
+ case 'timestamp':
326
+ return 'TIMESTAMP';
327
+ default:
328
+ return 'VARCHAR';
329
+ }
330
+ }
183
331
  class VQuery {
184
332
  duckDB;
185
333
  indexedDB;
334
+ isInitialized = false;
186
335
  constructor(dbName = 'vquery'){
187
336
  this.duckDB = new DuckDB();
188
337
  this.indexedDB = new IndexedDB(dbName);
189
338
  }
190
- init = async ()=>{
191
- await this.duckDB.init();
192
- await this.indexedDB.open();
193
- };
194
- close = async ()=>{
195
- await this.duckDB.close();
196
- this.indexedDB.close();
197
- };
198
- writeFile = async (fileName, source)=>{
199
- let blob;
200
- if ('string' == typeof source) {
201
- const response = await fetch(source);
202
- blob = await response.blob();
203
- } else if (source instanceof ArrayBuffer) blob = new Blob([
204
- source
205
- ]);
206
- else if (source instanceof Uint8Array) blob = new Blob([
207
- source.slice()
208
- ]);
209
- else if (source instanceof Blob) blob = source;
210
- else throw new Error('Unsupported source type');
211
- await this.indexedDB.writeFile(fileName, blob);
212
- await this.duckDB.writeFile(fileName, blob);
213
- };
214
- readFile = async (fileName)=>{
215
- const blob = await this.indexedDB.readFile(fileName);
216
- if (blob) await this.duckDB.writeFile(fileName, blob);
217
- else throw new Error(`File ${fileName} not found in IndexedDB`);
218
- };
219
- listFiles = ()=>this.indexedDB.listFiles();
220
- query = async (sql)=>{
221
- const start = performance?.now?.()?.toFixed(3) ?? Date.now().toFixed(3);
222
- const result = await this.duckDB.query(sql);
223
- const end = performance?.now?.()?.toFixed(3) ?? Date.now().toFixed(3);
224
- return {
225
- ...result,
226
- performance: {
227
- startAt: start,
228
- endAt: end,
229
- duration: Number(end) - Number(start)
230
- }
339
+ async ensureInitialized() {
340
+ if (!this.isInitialized) {
341
+ await this.duckDB.init();
342
+ await this.indexedDB.open();
343
+ this.isInitialized = true;
344
+ }
345
+ }
346
+ async createDataset(datasetId, data, type, datasetSchema) {
347
+ await this.ensureInitialized();
348
+ const dataSource = await DataSourceBuilder.from(type, data).build();
349
+ await this.indexedDB.writeDataset(datasetId, dataSource, datasetSchema);
350
+ }
351
+ async updateDataset(datasetId, data, type, datasetSchema) {
352
+ await this.ensureInitialized();
353
+ const dataSource = await DataSourceBuilder.from(type, data).build();
354
+ await this.indexedDB.writeDataset(datasetId, dataSource, datasetSchema);
355
+ }
356
+ async deleteDataset(datasetId) {
357
+ await this.ensureInitialized();
358
+ await this.indexedDB.deleteDataset(datasetId);
359
+ }
360
+ async listDatasets() {
361
+ await this.ensureInitialized();
362
+ return this.indexedDB.listDatasets();
363
+ }
364
+ async connectDataset(datasetId) {
365
+ await this.ensureInitialized();
366
+ const datasetInfo = await this.indexedDB.readDataset(datasetId);
367
+ if (!datasetInfo) throw new Error(`Dataset ${datasetId} not found`);
368
+ const { dataSource, datasetSchema } = datasetInfo;
369
+ const readFunctionMap = {
370
+ csv: 'read_csv_auto',
371
+ json: 'read_json_auto',
372
+ xlsx: 'read_excel',
373
+ parquet: 'read_parquet'
231
374
  };
232
- };
233
- getSchema = async (fileName)=>this.duckDB.getSchema(fileName);
375
+ const readFunction = readFunctionMap[dataSource.type];
376
+ if (!readFunction) throw new Error(`Unsupported dataSource type: ${dataSource.type}`);
377
+ await this.duckDB.writeFile(datasetId, dataSource.blob);
378
+ const columnsStruct = `{${datasetSchema.columns.map((c)=>`'${c.name}': '${mapDataTypeToDuckDB(c.type)}'`).join(', ')}}`;
379
+ const columnNames = datasetSchema.columns.map((c)=>`"${c.name}"`).join(', ');
380
+ const createViewSql = `CREATE OR REPLACE VIEW "${datasetId}" AS SELECT ${columnNames} FROM ${readFunction}('${datasetId}', columns=${columnsStruct})`;
381
+ await this.duckDB.query(createViewSql);
382
+ return new Dataset(this.duckDB, datasetId, datasetSchema.datasetAlias || datasetId);
383
+ }
234
384
  }
385
+ exports.DataSourceBuilder = __webpack_exports__.DataSourceBuilder;
235
386
  exports.VQuery = __webpack_exports__.VQuery;
387
+ exports.isBase64Url = __webpack_exports__.isBase64Url;
388
+ exports.isHttpUrl = __webpack_exports__.isHttpUrl;
389
+ exports.isUrl = __webpack_exports__.isUrl;
236
390
  for(var __webpack_i__ in __webpack_exports__)if (-1 === [
237
- "VQuery"
391
+ "DataSourceBuilder",
392
+ "VQuery",
393
+ "isBase64Url",
394
+ "isHttpUrl",
395
+ "isUrl"
238
396
  ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
239
397
  Object.defineProperty(exports, '__esModule', {
240
398
  value: true
package/dist/index.d.ts CHANGED
@@ -1 +1,3 @@
1
1
  export { VQuery } from './vquery';
2
+ export { DataSourceBuilder } from './dataSourceBuilder/dataSourceBuilder';
3
+ export * from './utils';
package/dist/index.js CHANGED
@@ -1,4 +1,34 @@
1
1
  import { AsyncDuckDB, ConsoleLogger, selectBundle } from "@duckdb/duckdb-wasm";
2
+ class Dataset {
3
+ duckDB;
4
+ _datasetId;
5
+ _tableName;
6
+ constructor(duckDB, datasetId, tableName){
7
+ this.duckDB = duckDB;
8
+ this._datasetId = datasetId;
9
+ this._tableName = tableName;
10
+ }
11
+ async queryBySQL(sql) {
12
+ const start = performance?.now?.()?.toFixed(3) ?? Date.now().toFixed(3);
13
+ const result = await this.duckDB.query(sql);
14
+ const end = performance?.now?.()?.toFixed(3) ?? Date.now().toFixed(3);
15
+ return {
16
+ ...result,
17
+ performance: {
18
+ startAt: start,
19
+ endAt: end,
20
+ duration: Number(end) - Number(start)
21
+ }
22
+ };
23
+ }
24
+ async disConnect() {}
25
+ get datasetId() {
26
+ return this._datasetId;
27
+ }
28
+ get tableName() {
29
+ return this._tableName;
30
+ }
31
+ }
2
32
  class DuckDB {
3
33
  db = null;
4
34
  connection = null;
@@ -40,16 +70,10 @@ class DuckDB {
40
70
  writeFile = async (fileName, source)=>{
41
71
  if (!this.db) throw new Error('db is null');
42
72
  let uint8Array;
43
- if ('string' == typeof source) {
44
- const response = await fetch(source);
45
- const buffer = await response.arrayBuffer();
46
- uint8Array = new Uint8Array(buffer);
47
- } else if (source instanceof Blob) {
73
+ if (source instanceof Blob) {
48
74
  const buffer = await source.arrayBuffer();
49
75
  uint8Array = new Uint8Array(buffer);
50
- } else if (source instanceof ArrayBuffer) uint8Array = new Uint8Array(source);
51
- else if (source instanceof Uint8Array) uint8Array = source;
52
- else throw new Error('Unsupported source type');
76
+ } else throw new Error('Unsupported source type');
53
77
  await this.db.registerFileBuffer(fileName, uint8Array);
54
78
  };
55
79
  query = async (sql)=>{
@@ -61,13 +85,8 @@ class DuckDB {
61
85
  table
62
86
  };
63
87
  };
64
- ensureSchema = async (fileName)=>{
65
- if (!this.connection) throw new Error('connection is null');
66
- await this.connection.query(`CREATE TEMP TABLE IF NOT EXISTS "${fileName}" AS SELECT * FROM read_csv_auto('${fileName}')`);
67
- };
68
88
  getSchema = async (fileName)=>{
69
89
  if (!this.connection) throw new Error('connection is null');
70
- await this.ensureSchema(fileName);
71
90
  const result = await this.connection.query(`PRAGMA table_info('${fileName}')`);
72
91
  return result.toArray().map((row)=>row.toJSON());
73
92
  };
@@ -75,16 +94,16 @@ class DuckDB {
75
94
  class IndexedDB {
76
95
  db = null;
77
96
  dbName;
78
- storeName = 'vqueryFiles';
97
+ datasetStoreName = 'vqueryDatasets';
79
98
  constructor(dbName){
80
99
  this.dbName = dbName;
81
100
  }
82
101
  open = ()=>new Promise((resolve, reject)=>{
83
- const request = indexedDB.open(this.dbName, 1);
102
+ const request = indexedDB.open(this.dbName, 2);
84
103
  request.onupgradeneeded = (event)=>{
85
104
  const db = event.target.result;
86
- if (!db.objectStoreNames.contains(this.storeName)) db.createObjectStore(this.storeName, {
87
- keyPath: 'name'
105
+ if (!db.objectStoreNames.contains(this.datasetStoreName)) db.createObjectStore(this.datasetStoreName, {
106
+ keyPath: 'datasetId'
88
107
  });
89
108
  };
90
109
  request.onsuccess = (event)=>{
@@ -101,15 +120,16 @@ class IndexedDB {
101
120
  this.db = null;
102
121
  }
103
122
  };
104
- writeFile = (fileName, data)=>new Promise((resolve, reject)=>{
123
+ writeDataset = (datasetId, dataSource, datasetSchema)=>new Promise((resolve, reject)=>{
105
124
  if (!this.db) return reject('DB is not open');
106
125
  const transaction = this.db.transaction([
107
- this.storeName
126
+ this.datasetStoreName
108
127
  ], 'readwrite');
109
- const store = transaction.objectStore(this.storeName);
128
+ const store = transaction.objectStore(this.datasetStoreName);
110
129
  const request = store.put({
111
- name: fileName,
112
- data
130
+ datasetId,
131
+ dataSource,
132
+ datasetSchema
113
133
  });
114
134
  request.onsuccess = ()=>{
115
135
  resolve();
@@ -118,87 +138,213 @@ class IndexedDB {
118
138
  reject(event.target.error);
119
139
  };
120
140
  });
121
- readFile = (fileName)=>new Promise((resolve, reject)=>{
141
+ readDataset = (datasetId)=>new Promise((resolve, reject)=>{
122
142
  if (!this.db) return reject('DB is not open');
123
143
  const transaction = this.db.transaction([
124
- this.storeName
144
+ this.datasetStoreName
125
145
  ], 'readonly');
126
- const store = transaction.objectStore(this.storeName);
127
- const request = store.get(fileName);
146
+ const store = transaction.objectStore(this.datasetStoreName);
147
+ const request = store.get(datasetId);
128
148
  request.onsuccess = (event)=>{
129
149
  const result = event.target.result;
130
- result ? resolve(result.data) : resolve(null);
150
+ resolve(result || null);
131
151
  };
132
152
  request.onerror = (event)=>{
133
153
  reject(event.target.error);
134
154
  };
135
155
  });
136
- listFiles = ()=>new Promise((resolve, reject)=>{
156
+ deleteDataset = (datasetId)=>new Promise((resolve, reject)=>{
137
157
  if (!this.db) return reject('DB is not open');
138
158
  const transaction = this.db.transaction([
139
- this.storeName
159
+ this.datasetStoreName
160
+ ], 'readwrite');
161
+ const store = transaction.objectStore(this.datasetStoreName);
162
+ const request = store.delete(datasetId);
163
+ request.onsuccess = ()=>{
164
+ resolve();
165
+ };
166
+ request.onerror = (event)=>{
167
+ reject(event.target.error);
168
+ };
169
+ });
170
+ listDatasets = ()=>new Promise((resolve, reject)=>{
171
+ if (!this.db) return reject('DB is not open');
172
+ const transaction = this.db.transaction([
173
+ this.datasetStoreName
140
174
  ], 'readonly');
141
- const store = transaction.objectStore(this.storeName);
142
- const request = store.getAllKeys();
175
+ const store = transaction.objectStore(this.datasetStoreName);
176
+ const request = store.getAll();
143
177
  request.onsuccess = (event)=>{
144
- const keys = event.target.result;
145
- resolve(keys);
178
+ const result = event.target.result;
179
+ resolve(result);
146
180
  };
147
181
  request.onerror = (event)=>{
148
182
  reject(event.target.error);
149
183
  };
150
184
  });
151
185
  }
186
+ const isUrl = (url)=>isHttpUrl(url) || isBase64Url(url);
187
+ const isHttpUrl = (url)=>url.startsWith('http://') || url.startsWith('https://');
188
+ const isBase64Url = (url)=>url.startsWith('data:');
189
+ class DataSourceBuilder {
190
+ type;
191
+ value;
192
+ constructor(type, value){
193
+ this.type = type;
194
+ this.value = value;
195
+ }
196
+ static from(type, value) {
197
+ return new DataSourceBuilder(type, value);
198
+ }
199
+ async build() {
200
+ const blob = await DataSourceBuilder.convertToBlob(this.type, this.value);
201
+ return {
202
+ type: this.type,
203
+ blob: blob
204
+ };
205
+ }
206
+ static async convertToBlob(type, value) {
207
+ if (value instanceof Blob) return value;
208
+ const convertCsvToBlob = (csvSource)=>{
209
+ if (csvSource instanceof ArrayBuffer) return new Blob([
210
+ csvSource
211
+ ], {
212
+ type: 'text/csv'
213
+ });
214
+ if ('string' == typeof csvSource && isUrl(csvSource)) return DataSourceBuilder.fetchBlob(csvSource);
215
+ return new Blob([
216
+ JSON.stringify(csvSource)
217
+ ], {
218
+ type: 'text/csv'
219
+ });
220
+ };
221
+ const convertJsonToBlob = (jsonSource)=>{
222
+ if (jsonSource instanceof ArrayBuffer) return new Blob([
223
+ jsonSource
224
+ ], {
225
+ type: 'application/json'
226
+ });
227
+ if ('string' == typeof jsonSource && isUrl(jsonSource)) return DataSourceBuilder.fetchBlob(jsonSource);
228
+ return new Blob([
229
+ JSON.stringify(jsonSource)
230
+ ], {
231
+ type: 'application/json'
232
+ });
233
+ };
234
+ const convertParquetToBlob = (parquetSource)=>{
235
+ if (parquetSource instanceof ArrayBuffer) return new Blob([
236
+ parquetSource
237
+ ], {
238
+ type: 'application/parquet'
239
+ });
240
+ if ('string' == typeof parquetSource && isUrl(parquetSource)) return DataSourceBuilder.fetchBlob(parquetSource);
241
+ return new Blob([
242
+ parquetSource
243
+ ], {
244
+ type: 'application/parquet'
245
+ });
246
+ };
247
+ const convertXlsxToBlob = (xlsxSource)=>{
248
+ if (xlsxSource instanceof ArrayBuffer) return new Blob([
249
+ xlsxSource
250
+ ], {
251
+ type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
252
+ });
253
+ if ('string' == typeof xlsxSource && isUrl(xlsxSource)) return DataSourceBuilder.fetchBlob(xlsxSource);
254
+ return new Blob([
255
+ xlsxSource
256
+ ], {
257
+ type: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
258
+ });
259
+ };
260
+ switch(type){
261
+ case 'csv':
262
+ return convertCsvToBlob(value);
263
+ case 'json':
264
+ return convertJsonToBlob(value);
265
+ case 'xlsx':
266
+ return convertXlsxToBlob(value);
267
+ case 'parquet':
268
+ return convertParquetToBlob(value);
269
+ default:
270
+ return new Blob([
271
+ value
272
+ ]);
273
+ }
274
+ }
275
+ static async fetchBlob(url) {
276
+ const response = await fetch(url);
277
+ return await response.blob();
278
+ }
279
+ }
280
+ function mapDataTypeToDuckDB(type) {
281
+ switch(type){
282
+ case 'number':
283
+ return 'DOUBLE';
284
+ case 'string':
285
+ return 'VARCHAR';
286
+ case 'date':
287
+ return 'DATE';
288
+ case 'datetime':
289
+ return 'TIMESTAMP';
290
+ case 'timestamp':
291
+ return 'TIMESTAMP';
292
+ default:
293
+ return 'VARCHAR';
294
+ }
295
+ }
152
296
  class VQuery {
153
297
  duckDB;
154
298
  indexedDB;
299
+ isInitialized = false;
155
300
  constructor(dbName = 'vquery'){
156
301
  this.duckDB = new DuckDB();
157
302
  this.indexedDB = new IndexedDB(dbName);
158
303
  }
159
- init = async ()=>{
160
- await this.duckDB.init();
161
- await this.indexedDB.open();
162
- };
163
- close = async ()=>{
164
- await this.duckDB.close();
165
- this.indexedDB.close();
166
- };
167
- writeFile = async (fileName, source)=>{
168
- let blob;
169
- if ('string' == typeof source) {
170
- const response = await fetch(source);
171
- blob = await response.blob();
172
- } else if (source instanceof ArrayBuffer) blob = new Blob([
173
- source
174
- ]);
175
- else if (source instanceof Uint8Array) blob = new Blob([
176
- source.slice()
177
- ]);
178
- else if (source instanceof Blob) blob = source;
179
- else throw new Error('Unsupported source type');
180
- await this.indexedDB.writeFile(fileName, blob);
181
- await this.duckDB.writeFile(fileName, blob);
182
- };
183
- readFile = async (fileName)=>{
184
- const blob = await this.indexedDB.readFile(fileName);
185
- if (blob) await this.duckDB.writeFile(fileName, blob);
186
- else throw new Error(`File ${fileName} not found in IndexedDB`);
187
- };
188
- listFiles = ()=>this.indexedDB.listFiles();
189
- query = async (sql)=>{
190
- const start = performance?.now?.()?.toFixed(3) ?? Date.now().toFixed(3);
191
- const result = await this.duckDB.query(sql);
192
- const end = performance?.now?.()?.toFixed(3) ?? Date.now().toFixed(3);
193
- return {
194
- ...result,
195
- performance: {
196
- startAt: start,
197
- endAt: end,
198
- duration: Number(end) - Number(start)
199
- }
304
+ async ensureInitialized() {
305
+ if (!this.isInitialized) {
306
+ await this.duckDB.init();
307
+ await this.indexedDB.open();
308
+ this.isInitialized = true;
309
+ }
310
+ }
311
+ async createDataset(datasetId, data, type, datasetSchema) {
312
+ await this.ensureInitialized();
313
+ const dataSource = await DataSourceBuilder.from(type, data).build();
314
+ await this.indexedDB.writeDataset(datasetId, dataSource, datasetSchema);
315
+ }
316
+ async updateDataset(datasetId, data, type, datasetSchema) {
317
+ await this.ensureInitialized();
318
+ const dataSource = await DataSourceBuilder.from(type, data).build();
319
+ await this.indexedDB.writeDataset(datasetId, dataSource, datasetSchema);
320
+ }
321
+ async deleteDataset(datasetId) {
322
+ await this.ensureInitialized();
323
+ await this.indexedDB.deleteDataset(datasetId);
324
+ }
325
+ async listDatasets() {
326
+ await this.ensureInitialized();
327
+ return this.indexedDB.listDatasets();
328
+ }
329
+ async connectDataset(datasetId) {
330
+ await this.ensureInitialized();
331
+ const datasetInfo = await this.indexedDB.readDataset(datasetId);
332
+ if (!datasetInfo) throw new Error(`Dataset ${datasetId} not found`);
333
+ const { dataSource, datasetSchema } = datasetInfo;
334
+ const readFunctionMap = {
335
+ csv: 'read_csv_auto',
336
+ json: 'read_json_auto',
337
+ xlsx: 'read_excel',
338
+ parquet: 'read_parquet'
200
339
  };
201
- };
202
- getSchema = async (fileName)=>this.duckDB.getSchema(fileName);
340
+ const readFunction = readFunctionMap[dataSource.type];
341
+ if (!readFunction) throw new Error(`Unsupported dataSource type: ${dataSource.type}`);
342
+ await this.duckDB.writeFile(datasetId, dataSource.blob);
343
+ const columnsStruct = `{${datasetSchema.columns.map((c)=>`'${c.name}': '${mapDataTypeToDuckDB(c.type)}'`).join(', ')}}`;
344
+ const columnNames = datasetSchema.columns.map((c)=>`"${c.name}"`).join(', ');
345
+ const createViewSql = `CREATE OR REPLACE VIEW "${datasetId}" AS SELECT ${columnNames} FROM ${readFunction}('${datasetId}', columns=${columnsStruct})`;
346
+ await this.duckDB.query(createViewSql);
347
+ return new Dataset(this.duckDB, datasetId, datasetSchema.datasetAlias || datasetId);
348
+ }
203
349
  }
204
- export { VQuery };
350
+ export { DataSourceBuilder, VQuery, isBase64Url, isHttpUrl, isUrl };
@@ -0,0 +1,11 @@
1
+ export type { QueryResult } from './QueryResult';
2
+ export type DataType = 'number' | 'string' | 'date' | 'datetime' | 'timestamp';
3
+ export interface DatasetColumn {
4
+ type: DataType;
5
+ name: string;
6
+ }
7
+ export interface DatasetSchema {
8
+ datasetId: string;
9
+ datasetAlias: string;
10
+ columns: DatasetColumn[];
11
+ }
@@ -0,0 +1,7 @@
1
+ export type TidyDatum = Record<string, number | string | null | boolean | undefined>;
2
+ export type DataSourceType = 'csv' | 'json' | 'xlsx' | 'parquet';
3
+ export type DataSourceValue = string | ArrayBuffer | Blob | TidyDatum[];
4
+ export interface DataSource {
5
+ type: DataSourceType;
6
+ blob: Blob;
7
+ }
@@ -1 +1,3 @@
1
- export type { QueryResult } from './QueryResult';
1
+ export * from './DataSet';
2
+ export * from './DataSource';
3
+ export * from './QueryResult';
@@ -0,0 +1 @@
1
+ export { isUrl, isHttpUrl, isBase64Url } from './url';
@@ -0,0 +1,3 @@
1
+ export declare const isUrl: (url: string) => boolean;
2
+ export declare const isHttpUrl: (url: string) => boolean;
3
+ export declare const isBase64Url: (url: string) => boolean;
package/dist/vquery.d.ts CHANGED
@@ -1,48 +1,33 @@
1
- import { QueryResult } from './types';
1
+ import { Dataset } from './dataset';
2
+ import { DatasetSchema, TidyDatum, DataSourceType } from './types';
2
3
  export declare class VQuery {
3
4
  private duckDB;
4
5
  private indexedDB;
6
+ private isInitialized;
5
7
  constructor(dbName?: string);
8
+ private ensureInitialized;
6
9
  /**
7
- * @description 初始化数据库
10
+ * 创建数据集,根据表结构和数据,存储信息到indexedDB
8
11
  */
9
- init: () => Promise<void>;
12
+ createDataset(datasetId: string, data: string | ArrayBuffer | Blob | TidyDatum[], type: DataSourceType, datasetSchema: DatasetSchema): Promise<void>;
10
13
  /**
11
- * @description 关闭数据库
14
+ * 修改数据集,更新信息到indexedDB内
12
15
  */
13
- close: () => Promise<void>;
16
+ updateDataset(datasetId: string, data: string | ArrayBuffer | Blob | TidyDatum[], type: DataSourceType, datasetSchema: DatasetSchema): Promise<void>;
14
17
  /**
15
- * @description 注册文件
16
- * @param fileName 文件名
17
- * @param source 文件内容
18
+ * 删除数据集,从indexdb移除数据集
18
19
  */
19
- writeFile: (fileName: string, source: string | ArrayBuffer | Uint8Array | Blob) => Promise<void>;
20
+ deleteDataset(datasetId: string): Promise<void>;
20
21
  /**
21
- * @description 从 IndexedDB 读取文件并注册到 DuckDB
22
- * @param fileName 文件名
22
+ * 获取所有可用数据集
23
23
  */
24
- readFile: (fileName: string) => Promise<void>;
24
+ listDatasets(): Promise<{
25
+ datasetId: string;
26
+ dataSource: import("./types").DataSource;
27
+ datasetSchema: DatasetSchema;
28
+ }[]>;
25
29
  /**
26
- * @description 列出 IndexedDB 中的所有文件
30
+ * 连接数据集,返回数据集信息,从indexedDB获取表结构,使用DuckDB在内存中创建表
27
31
  */
28
- listFiles: () => Promise<string[]>;
29
- /**
30
- * @description 执行 SQL 查询
31
- * @param sql SQL 语句
32
- */
33
- query: (sql: string) => Promise<{
34
- performance: {
35
- startAt: string;
36
- endAt: string;
37
- duration: number;
38
- };
39
- dataset: any[];
40
- table: any;
41
- }>;
42
- /**
43
- * @description 获取文件的 Schema
44
- * @param fileName 文件名
45
- * @returns 文件的 Schema
46
- */
47
- getSchema: (fileName: string) => Promise<QueryResult>;
32
+ connectDataset(datasetId: string): Promise<Dataset>;
48
33
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@visactor/vquery",
3
- "version": "0.1.45",
3
+ "version": "0.1.46",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {