squirreling 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,10 +7,10 @@
7
7
  [![minzipped](https://img.shields.io/bundlephobia/minzip/squirreling)](https://www.npmjs.com/package/squirreling)
8
8
  [![workflow status](https://github.com/hyparam/squirreling/actions/workflows/ci.yml/badge.svg)](https://github.com/hyparam/squirreling/actions)
9
9
  [![mit license](https://img.shields.io/badge/License-MIT-orange.svg)](https://opensource.org/licenses/MIT)
10
- ![coverage](https://img.shields.io/badge/Coverage-90-darkred)
10
+ ![coverage](https://img.shields.io/badge/Coverage-93-darkred)
11
11
  [![dependencies](https://img.shields.io/badge/Dependencies-0-blueviolet)](https://www.npmjs.com/package/squirreling?activeTab=dependencies)
12
12
 
13
- Squirreling is a streaming async SQL engine for JavaScript. It is designed to provide efficient streaming of results from pluggable backend for highly efficient retrieval of data for browser applications.
13
+ Squirreling is a streaming async SQL engine for JavaScript. It is designed to provide efficient streaming of results from pluggable backends for highly efficient retrieval of data for browser applications.
14
14
 
15
15
  ## Features
16
16
 
@@ -22,6 +22,8 @@ Squirreling is a streaming async SQL engine for JavaScript. It is designed to pr
22
22
  - Constant memory usage for simple queries with LIMIT
23
23
  - Robust error handling and validation designed for LLM tool use
24
24
  - In-memory data option for simple use cases
25
+ - Select only
26
+ - No joins (yet)
25
27
 
26
28
  ## Usage
27
29
 
@@ -39,18 +41,19 @@ const users = [
39
41
  ]
40
42
 
41
43
  // Process rows as they arrive (streaming)
42
- for await (const user of executeSql({
44
+ for await (const { cnt } of executeSql({
43
45
  tables: { users },
44
- query: 'SELECT * FROM users WHERE active = TRUE LIMIT 100',
46
+ query: 'SELECT count(*) as cnt FROM users WHERE active = TRUE LIMIT 10',
45
47
  })) {
46
- console.log(user.name)
48
+ console.log('Count', cnt)
47
49
  }
48
50
  ```
49
51
 
50
52
  There is an exported helper function `collect` to gather all rows into an array if needed:
51
53
 
52
54
  ```javascript
53
- import { collect } from 'squirreling'
55
+ import { collect, executeSql } from 'squirreling'
56
+
54
57
  const allUsers = await collect(executeSql({
55
58
  tables: { users },
56
59
  query: 'SELECT * FROM users',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Squirreling SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -0,0 +1,86 @@
1
+ /**
2
+ * @import { AsyncDataSource, AsyncRow, SqlPrimitive } from '../types.js'
3
+ */
4
+
5
+
6
+ /**
7
+ * Wraps an async generator of plain objects into an AsyncDataSource
8
+ *
9
+ * @param {AsyncGenerator<AsyncRow>} gen
10
+ * @returns {AsyncDataSource}
11
+ */
12
+ export function generatorSource(gen) {
13
+ return {
14
+ async *getRows() {
15
+ yield* gen
16
+ },
17
+ }
18
+ }
19
+
20
+ /**
21
+ * Creates an async row accessor that wraps a plain JavaScript object
22
+ *
23
+ * @param {Record<string, SqlPrimitive>} obj - the plain object
24
+ * @returns {AsyncRow} a row accessor interface
25
+ */
26
+ function asyncRow(obj) {
27
+ /** @type {AsyncRow} */
28
+ const row = {}
29
+ for (const [key, value] of Object.entries(obj)) {
30
+ row[key] = () => Promise.resolve(value)
31
+ }
32
+ return row
33
+ }
34
+
35
+ /**
36
+ * Creates an async memory-backed data source from an array of plain objects
37
+ *
38
+ * @param {Record<string, SqlPrimitive>[]} data - array of plain objects
39
+ * @returns {AsyncDataSource} an async data source interface
40
+ */
41
+ export function memorySource(data) {
42
+ return {
43
+ async *getRows() {
44
+ for (const item of data) {
45
+ yield asyncRow(item)
46
+ }
47
+ },
48
+ }
49
+ }
50
+
51
+ /**
52
+ * Wraps a data source that caches all accessed rows in memory
53
+ * @param {AsyncDataSource} source
54
+ * @returns {AsyncDataSource}
55
+ */
56
+ export function cachedDataSource(source) {
57
+ /** @type {Map<string, Promise<SqlPrimitive>>} */
58
+ const cache = new Map()
59
+ return {
60
+ /**
61
+ * @returns {AsyncGenerator<AsyncRow>}
62
+ */
63
+ async *getRows() {
64
+ let index = 0
65
+ for await (const row of source.getRows()) {
66
+ const rowIndex = index
67
+ /** @type {AsyncRow} */
68
+ const out = {}
69
+ for (const [key, cell] of Object.entries(row)) {
70
+ // Wrap the cell to cache accesses
71
+ out[key] = () => {
72
+ const cacheKey = `${rowIndex}:${key}`
73
+ let value = cache.get(cacheKey)
74
+ if (!value) {
75
+ value = cell()
76
+ cache.set(cacheKey, value)
77
+ }
78
+ return value
79
+ }
80
+ }
81
+ yield out
82
+ index++
83
+ }
84
+ },
85
+ }
86
+ }
@@ -3,19 +3,21 @@ import { evaluateExpr } from './expression.js'
3
3
  /**
4
4
  * Evaluates an aggregate function over a set of rows
5
5
  *
6
- * @import { AggregateColumn, ExprNode, RowSource } from '../types.js'
7
- * @param {AggregateColumn} col - aggregate column definition
8
- * @param {RowSource[]} rows - rows to aggregate
6
+ * @import { AggregateColumn, AsyncDataSource, ExprNode, AsyncRow } from '../types.js'
7
+ * @param {Object} options
8
+ * @param {AggregateColumn} options.col - aggregate column definition
9
+ * @param {AsyncRow[]} options.rows - rows to aggregate
10
+ * @param {Record<string, AsyncDataSource>} options.tables
9
11
  * @returns {Promise<number | null>} aggregated result
10
12
  */
11
- export async function evaluateAggregate(col, rows) {
13
+ export async function evaluateAggregate({ col, rows, tables }) {
12
14
  const { arg, func } = col
13
15
 
14
16
  if (func === 'COUNT') {
15
17
  if (arg.kind === 'star') return rows.length
16
18
  let count = 0
17
- for (let i = 0; i < rows.length; i += 1) {
18
- const v = await evaluateExpr({ node: arg.expr, row: rows[i] })
19
+ for (const row of rows) {
20
+ const v = await evaluateExpr({ node: arg.expr, row, tables })
19
21
  if (v !== null && v !== undefined) {
20
22
  count += 1
21
23
  }
@@ -34,8 +36,8 @@ export async function evaluateAggregate(col, rows) {
34
36
  /** @type {number | null} */
35
37
  let max = null
36
38
 
37
- for (let i = 0; i < rows.length; i += 1) {
38
- const raw = await evaluateExpr({ node: arg.expr, row: rows[i] })
39
+ for (const row of rows) {
40
+ const raw = await evaluateExpr({ node: arg.expr, row, tables })
39
41
  if (raw == null) continue
40
42
  const num = Number(raw)
41
43
  if (!Number.isFinite(num)) continue
@@ -80,20 +82,17 @@ export function defaultAggregateAlias(col) {
80
82
  export function defaultAggregateAliasExpr(expr) {
81
83
  if (expr.type === 'identifier') {
82
84
  return expr.name
83
- }
84
- if (expr.type === 'literal') {
85
+ } else if (expr.type === 'literal') {
85
86
  return String(expr.value)
86
- }
87
- if (expr.type === 'cast') {
87
+ } else if (expr.type === 'cast') {
88
88
  return defaultAggregateAliasExpr(expr.expr) + '_as_' + expr.toType
89
- }
90
- if (expr.type === 'unary') {
89
+ } else if (expr.type === 'unary') {
91
90
  return expr.op + '_' + defaultAggregateAliasExpr(expr.argument)
92
- }
93
- if (expr.type === 'binary') {
91
+ } else if (expr.type === 'binary') {
94
92
  return defaultAggregateAliasExpr(expr.left) + '_' + expr.op + '_' + defaultAggregateAliasExpr(expr.right)
95
- }
96
- if (expr.type === 'function') {
93
+ } else if (expr.type === 'function') {
97
94
  return expr.name.toLowerCase() + '_' + expr.args.map(defaultAggregateAliasExpr).join('_')
95
+ } else {
96
+ return 'expr'
98
97
  }
99
98
  }
@@ -1,19 +1,18 @@
1
1
  import { evaluateExpr } from './expression.js'
2
2
  import { parseSql } from '../parse/parse.js'
3
- import { createAsyncMemorySource, createRowAccessor } from '../backend/memory.js'
3
+ import { generatorSource, memorySource } from '../backend/dataSource.js'
4
4
  import { defaultAggregateAlias, evaluateAggregate } from './aggregates.js'
5
5
  import { evaluateHavingExpr } from './having.js'
6
- import { collect } from './utils.js'
7
6
 
8
7
  /**
9
- * @import { AsyncDataSource, ExecuteSqlOptions, ExprNode, OrderByItem, RowSource, SelectStatement, SqlPrimitive } from '../types.js'
8
+ * @import { AsyncDataSource, ExecuteSqlOptions, ExprNode, OrderByItem, AsyncRow, SelectStatement, SqlPrimitive } from '../types.js'
10
9
  */
11
10
 
12
11
  /**
13
12
  * Executes a SQL SELECT query against named data sources
14
13
  *
15
14
  * @param {ExecuteSqlOptions} options - the execution options
16
- * @returns {AsyncGenerator<Record<string, any>>} async generator yielding result rows
15
+ * @returns {AsyncGenerator<AsyncRow>} async generator yielding result rows
17
16
  */
18
17
  export async function* executeSql({ tables, query }) {
19
18
  const select = parseSql(query)
@@ -31,7 +30,7 @@ export async function* executeSql({ tables, query }) {
31
30
  const normalizedTables = {}
32
31
  for (const [name, source] of Object.entries(tables)) {
33
32
  if (Array.isArray(source)) {
34
- normalizedTables[name] = createAsyncMemorySource(source)
33
+ normalizedTables[name] = memorySource(source)
35
34
  } else {
36
35
  normalizedTables[name] = source
37
36
  }
@@ -45,7 +44,7 @@ export async function* executeSql({ tables, query }) {
45
44
  *
46
45
  * @param {SelectStatement} select
47
46
  * @param {Record<string, AsyncDataSource>} tables
48
- * @returns {AsyncGenerator<Record<string, any>>} async generator yielding result rows
47
+ * @returns {AsyncGenerator<AsyncRow>}
49
48
  */
50
49
  export async function* executeSelect(select, tables) {
51
50
  /** @type {AsyncDataSource} */
@@ -60,8 +59,7 @@ export async function* executeSelect(select, tables) {
60
59
  dataSource = table
61
60
  } else {
62
61
  // Nested subquery - recursively resolve
63
- const derivedData = await collect(executeSelect(select.from.query, tables))
64
- dataSource = createAsyncMemorySource(derivedData)
62
+ dataSource = generatorSource(executeSelect(select.from.query, tables))
65
63
  }
66
64
 
67
65
  yield* evaluateSelectAst(select, dataSource, tables)
@@ -98,15 +96,15 @@ function defaultDerivedAlias(expr) {
98
96
  /**
99
97
  * Creates a stable string key for a row to enable deduplication
100
98
  *
101
- * @param {Record<string, any>} row
102
- * @returns {string} a stable string representation of the row
99
+ * @param {AsyncRow} row
100
+ * @returns {Promise<string>} a stable string representation of the row
103
101
  */
104
- function stableRowKey(row) {
102
+ async function stableRowKey(row) {
105
103
  const keys = Object.keys(row).sort()
106
104
  /** @type {string[]} */
107
105
  const parts = []
108
106
  for (const k of keys) {
109
- const v = row[k]
107
+ const v = await row[k]()
110
108
  parts.push(k + ':' + JSON.stringify(v))
111
109
  }
112
110
  return parts.join('|')
@@ -140,18 +138,18 @@ function compareValues(a, b) {
140
138
  /**
141
139
  * Applies DISTINCT filtering to remove duplicate rows
142
140
  *
143
- * @param {Record<string, any>[]} rows - The input rows
144
- * @param {boolean} distinct - Whether to apply deduplication
145
- * @returns {Record<string, any>[]} The deduplicated rows
141
+ * @param {AsyncRow[]} rows - the input rows
142
+ * @param {boolean} distinct - whether to apply deduplication
143
+ * @returns {Promise<AsyncRow[]>} the deduplicated rows
146
144
  */
147
- function applyDistinct(rows, distinct) {
145
+ async function applyDistinct(rows, distinct) {
148
146
  if (!distinct) return rows
149
147
  /** @type {Set<string>} */
150
148
  const seen = new Set()
151
- /** @type {Record<string, any>[]} */
149
+ /** @type {AsyncRow[]} */
152
150
  const result = []
153
151
  for (const row of rows) {
154
- const key = stableRowKey(row)
152
+ const key = await stableRowKey(row)
155
153
  if (seen.has(key)) continue
156
154
  seen.add(key)
157
155
  result.push(row)
@@ -162,13 +160,13 @@ function applyDistinct(rows, distinct) {
162
160
  /**
163
161
  * Applies ORDER BY sorting to RowSource array (before projection)
164
162
  *
165
- * @param {RowSource[]} rows - the input row sources
163
+ * @param {AsyncRow[]} rows - the input row sources
166
164
  * @param {OrderByItem[]} orderBy - the sort specifications
167
165
  * @param {Record<string, AsyncDataSource>} tables
168
- * @returns {Promise<RowSource[]>} the sorted row sources
166
+ * @returns {Promise<AsyncRow[]>} the sorted row sources
169
167
  */
170
168
  async function sortRowSources(rows, orderBy, tables) {
171
- if (!orderBy?.length) return rows
169
+ if (!orderBy.length) return rows
172
170
 
173
171
  // Pre-evaluate ORDER BY expressions for all rows
174
172
  /** @type {SqlPrimitive[][]} */
@@ -223,13 +221,13 @@ async function sortRowSources(rows, orderBy, tables) {
223
221
  /**
224
222
  * Applies ORDER BY sorting to rows
225
223
  *
226
- * @param {Record<string, any>[]} rows - the input rows
224
+ * @param {AsyncRow[]} rows - the input rows
227
225
  * @param {OrderByItem[]} orderBy - the sort specifications
228
226
  * @param {Record<string, AsyncDataSource>} tables
229
- * @returns {Promise<Record<string, any>[]>} the sorted rows
227
+ * @returns {Promise<AsyncRow[]>} the sorted rows
230
228
  */
231
229
  async function applyOrderBy(rows, orderBy, tables) {
232
- if (!orderBy?.length) return rows
230
+ if (!orderBy.length) return rows
233
231
 
234
232
  // Pre-evaluate ORDER BY expressions for all rows
235
233
  /** @type {SqlPrimitive[][]} */
@@ -238,7 +236,7 @@ async function applyOrderBy(rows, orderBy, tables) {
238
236
  /** @type {SqlPrimitive[]} */
239
237
  const rowValues = []
240
238
  for (const term of orderBy) {
241
- const value = await evaluateExpr({ node: term.expr, row: createRowAccessor(row), tables })
239
+ const value = await evaluateExpr({ node: term.expr, row, tables })
242
240
  rowValues.push(value)
243
241
  }
244
242
  evaluatedValues.push(rowValues)
@@ -284,22 +282,17 @@ async function applyOrderBy(rows, orderBy, tables) {
284
282
  /**
285
283
  * Evaluates a select with a resolved FROM data source
286
284
  *
287
- * @param {SelectStatement} select - the parsed SQL AST
288
- * @param {AsyncDataSource} dataSource - the async data source
285
+ * @param {SelectStatement} select
286
+ * @param {AsyncDataSource} dataSource
289
287
  * @param {Record<string, AsyncDataSource>} tables
290
- * @returns {AsyncGenerator<Record<string, any>>} async generator yielding result rows
288
+ * @returns {AsyncGenerator<AsyncRow>}
291
289
  */
292
290
  async function* evaluateSelectAst(select, dataSource, tables) {
293
291
  // SQL priority: from, where, group by, having, select, order by, offset, limit
294
292
 
295
293
  const hasAggregate = select.columns.some(col => col.kind === 'aggregate')
296
- const useGrouping = hasAggregate || select.groupBy?.length > 0
297
-
298
- // Determine if we need to buffer (collect all rows first)
299
- const needsBuffering =
300
- select.orderBy.length > 0 ||
301
- select.distinct ||
302
- useGrouping
294
+ const useGrouping = hasAggregate || select.groupBy.length > 0
295
+ const needsBuffering = useGrouping || select.orderBy.length > 0
303
296
 
304
297
  if (needsBuffering) {
305
298
  // BUFFERING PATH: Collect all rows, process, then yield
@@ -311,52 +304,49 @@ async function* evaluateSelectAst(select, dataSource, tables) {
311
304
  }
312
305
 
313
306
  /**
314
- * Streaming evaluation for simple queries (no ORDER BY, DISTINCT, or GROUP BY)
307
+ * Streaming evaluation for simple queries (no ORDER BY or GROUP BY)
308
+ * Supports DISTINCT by tracking seen row keys without buffering full rows
315
309
  *
316
310
  * @param {SelectStatement} select
317
311
  * @param {AsyncDataSource} dataSource
318
312
  * @param {Record<string, AsyncDataSource>} tables
319
- * @returns {AsyncGenerator<Record<string, any>>}
313
+ * @returns {AsyncGenerator<AsyncRow>}
320
314
  */
321
315
  async function* evaluateStreaming(select, dataSource, tables) {
322
316
  let rowsYielded = 0
323
317
  let rowsSkipped = 0
324
318
  const offset = select.offset ?? 0
325
319
  const limit = select.limit ?? Infinity
320
+ if (limit <= 0) return
321
+
322
+ // For DISTINCT, track seen row keys
323
+ /** @type {Set<string> | undefined} */
324
+ const seen = select.distinct ? new Set() : undefined
326
325
 
327
326
  for await (const row of dataSource.getRows()) {
328
327
  // WHERE filter
329
328
  if (select.where) {
330
- const passes = await evaluateExpr({ node: select.where, row, tables })
331
-
332
- if (!passes) {
333
- continue
334
- }
329
+ const pass = await evaluateExpr({ node: select.where, row, tables })
330
+ if (!pass) continue
335
331
  }
336
332
 
337
- // OFFSET handling
338
- if (rowsSkipped < offset) {
333
+ // For non-DISTINCT queries, we can skip rows before projection (optimization)
334
+ if (!seen && rowsSkipped < offset) {
339
335
  rowsSkipped++
340
336
  continue
341
337
  }
342
338
 
343
- // LIMIT handling
344
- if (rowsYielded >= limit) {
345
- break
346
- }
347
-
348
339
  // SELECT projection
349
- /** @type {Record<string, any>} */
340
+ /** @type {AsyncRow} */
350
341
  const outRow = {}
351
342
  for (const col of select.columns) {
352
343
  if (col.kind === 'star') {
353
- const keys = row.getKeys()
354
- for (const key of keys) {
355
- outRow[key] = row.getCell(key)
344
+ for (const [key, cell] of Object.entries(row)) {
345
+ outRow[key] = cell
356
346
  }
357
347
  } else if (col.kind === 'derived') {
358
348
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
359
- outRow[alias] = await evaluateExpr({ node: col.expr, row, tables })
349
+ outRow[alias] = () => evaluateExpr({ node: col.expr, row, tables })
360
350
  } else if (col.kind === 'aggregate') {
361
351
  throw new Error(
362
352
  'Aggregate functions require GROUP BY or will act on the whole dataset; add GROUP BY or remove aggregates'
@@ -364,31 +354,46 @@ async function* evaluateStreaming(select, dataSource, tables) {
364
354
  }
365
355
  }
366
356
 
357
+ // DISTINCT: skip duplicate rows
358
+ if (seen) {
359
+ const key = await stableRowKey(outRow)
360
+ if (seen.has(key)) continue
361
+ seen.add(key)
362
+ // OFFSET applies to distinct rows
363
+ if (rowsSkipped < offset) {
364
+ rowsSkipped++
365
+ continue
366
+ }
367
+ }
368
+
367
369
  yield outRow
368
370
  rowsYielded++
371
+ if (rowsYielded >= limit) {
372
+ break
373
+ }
369
374
  }
370
375
  }
371
376
 
372
377
  /**
373
- * Buffered evaluation for complex queries (with ORDER BY, DISTINCT, or GROUP BY)
378
+ * Buffered evaluation for complex queries (with ORDER BY or GROUP BY)
374
379
  *
375
380
  * @param {SelectStatement} select
376
381
  * @param {AsyncDataSource} dataSource
377
382
  * @param {Record<string, AsyncDataSource>} tables
378
383
  * @param {boolean} hasAggregate
379
384
  * @param {boolean} useGrouping
380
- * @returns {AsyncGenerator<Record<string, any>>}
385
+ * @returns {AsyncGenerator<AsyncRow>}
381
386
  */
382
387
  async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGrouping) {
383
388
  // Step 1: Collect all rows from data source
384
- /** @type {RowSource[]} */
389
+ /** @type {AsyncRow[]} */
385
390
  const working = []
386
391
  for await (const row of dataSource.getRows()) {
387
392
  working.push(row)
388
393
  }
389
394
 
390
395
  // Step 2: WHERE clause filtering
391
- /** @type {RowSource[]} */
396
+ /** @type {AsyncRow[]} */
392
397
  const filtered = []
393
398
 
394
399
  for (const row of working) {
@@ -403,16 +408,16 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
403
408
  }
404
409
 
405
410
  // Step 3: Projection (grouping vs non-grouping)
406
- /** @type {Record<string, any>[]} */
411
+ /** @type {AsyncRow[]} */
407
412
  let projected = []
408
413
 
409
414
  if (useGrouping) {
410
415
  // Grouping due to GROUP BY or aggregate functions
411
- /** @type {RowSource[][]} */
416
+ /** @type {AsyncRow[][]} */
412
417
  const groups = []
413
418
 
414
- if (select.groupBy?.length) {
415
- /** @type {Map<string, RowSource[]>} */
419
+ if (select.groupBy.length) {
420
+ /** @type {Map<string, AsyncRow[]>} */
416
421
  const map = new Map()
417
422
  for (const row of filtered) {
418
423
  /** @type {string[]} */
@@ -440,15 +445,14 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
440
445
  }
441
446
 
442
447
  for (const group of groups) {
443
- /** @type {Record<string, any>} */
448
+ /** @type {AsyncRow} */
444
449
  const resultRow = {}
445
450
  for (const col of select.columns) {
446
451
  if (col.kind === 'star') {
447
452
  const firstRow = group[0]
448
453
  if (firstRow) {
449
- const keys = firstRow.getKeys()
450
- for (const key of keys) {
451
- resultRow[key] = firstRow.getCell(key)
454
+ for (const [key, cell] of Object.entries(firstRow)) {
455
+ resultRow[key] = cell
452
456
  }
453
457
  }
454
458
  continue
@@ -457,18 +461,16 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
457
461
  if (col.kind === 'derived') {
458
462
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
459
463
  if (group.length > 0) {
460
- const value = await evaluateExpr({ node: col.expr, row: group[0], tables })
461
- resultRow[alias] = value
464
+ resultRow[alias] = () => evaluateExpr({ node: col.expr, row: group[0], tables })
462
465
  } else {
463
- resultRow[alias] = undefined
466
+ delete resultRow[alias]
464
467
  }
465
468
  continue
466
469
  }
467
470
 
468
471
  if (col.kind === 'aggregate') {
469
472
  const alias = col.alias ?? defaultAggregateAlias(col)
470
- const value = await evaluateAggregate(col, group)
471
- resultRow[alias] = value
473
+ resultRow[alias] = () => evaluateAggregate({ col, rows: group, tables })
472
474
  continue
473
475
  }
474
476
  }
@@ -487,19 +489,26 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
487
489
  // Sort before projection so ORDER BY can access columns not in SELECT
488
490
  const sorted = await sortRowSources(filtered, select.orderBy, tables)
489
491
 
490
- for (const row of sorted) {
491
- /** @type {Record<string, any>} */
492
+ // OPTIMIZATION: For non-DISTINCT queries, apply OFFSET/LIMIT before projection
493
+ // to avoid reading expensive cells for rows that won't be in the final result
494
+ let rowsToProject = sorted
495
+ if (!select.distinct) {
496
+ const start = select.offset ?? 0
497
+ const end = select.limit ? start + select.limit : sorted.length
498
+ rowsToProject = sorted.slice(start, end)
499
+ }
500
+
501
+ for (const row of rowsToProject) {
502
+ /** @type {AsyncRow} */
492
503
  const outRow = {}
493
504
  for (const col of select.columns) {
494
505
  if (col.kind === 'star') {
495
- const keys = row.getKeys()
496
- for (const key of keys) {
497
- outRow[key] = row.getCell(key)
506
+ for (const [key, cell] of Object.entries(row)) {
507
+ outRow[key] = cell
498
508
  }
499
509
  } else if (col.kind === 'derived') {
500
510
  const alias = col.alias ?? defaultDerivedAlias(col.expr)
501
- const value = await evaluateExpr({ node: col.expr, row, tables })
502
- outRow[alias] = value
511
+ outRow[alias] = () => evaluateExpr({ node: col.expr, row, tables })
503
512
  }
504
513
  }
505
514
  projected.push(outRow)
@@ -507,17 +516,25 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
507
516
  }
508
517
 
509
518
  // Step 4: DISTINCT
510
- projected = applyDistinct(projected, select.distinct)
519
+ projected = await applyDistinct(projected, select.distinct)
511
520
 
512
521
  // Step 5: ORDER BY (final sort for grouped queries)
513
522
  projected = await applyOrderBy(projected, select.orderBy, tables)
514
523
 
515
524
  // Step 6: OFFSET and LIMIT
516
- const start = select.offset ?? 0
517
- const end = select.limit ? start + select.limit : projected.length
518
-
519
- // Step 7: Yield results
520
- for (let i = start; i < end && i < projected.length; i++) {
521
- yield projected[i]
525
+ // For non-DISTINCT, non-grouping queries, OFFSET/LIMIT was already applied before projection
526
+ if (select.distinct || useGrouping) {
527
+ const start = select.offset ?? 0
528
+ const end = select.limit ? start + select.limit : projected.length
529
+
530
+ // Step 7: Yield results
531
+ for (let i = start; i < end && i < projected.length; i++) {
532
+ yield projected[i]
533
+ }
534
+ } else {
535
+ // Already limited, yield all projected rows
536
+ for (const row of projected) {
537
+ yield row
538
+ }
522
539
  }
523
540
  }
@@ -1,8 +1,7 @@
1
1
  import { executeSelect } from './execute.js'
2
- import { collect } from './utils.js'
3
2
 
4
3
  /**
5
- * @import { ExprNode, RowSource, SqlPrimitive, AsyncDataSource } from '../types.js'
4
+ * @import { ExprNode, AsyncRow, SqlPrimitive, AsyncDataSource } from '../types.js'
6
5
  */
7
6
 
8
7
  /**
@@ -10,8 +9,8 @@ import { collect } from './utils.js'
10
9
  *
11
10
  * @param {Object} params
12
11
  * @param {ExprNode} params.node - The expression node to evaluate
13
- * @param {RowSource} params.row - The data row to evaluate against
14
- * @param {Record<string, AsyncDataSource>} [params.tables]
12
+ * @param {AsyncRow} params.row - The data row to evaluate against
13
+ * @param {Record<string, AsyncDataSource>} params.tables
15
14
  * @returns {Promise<SqlPrimitive>} The result of the evaluation
16
15
  */
17
16
  export async function evaluateExpr({ node, row, tables }) {
@@ -20,7 +19,19 @@ export async function evaluateExpr({ node, row, tables }) {
20
19
  }
21
20
 
22
21
  if (node.type === 'identifier') {
23
- return row.getCell(node.name)
22
+ return row[node.name]?.()
23
+ }
24
+
25
+ // Scalar subquery - returns a single value
26
+ if (node.type === 'subquery') {
27
+ const gen = executeSelect(node.subquery, tables)
28
+ const first = await gen.next() // Start the generator
29
+ gen.return() // Stop further execution
30
+ if (first.done) return null
31
+ /** @type {AsyncRow} */
32
+ const firstRow = first.value
33
+ const firstKey = Object.keys(firstRow)[0]
34
+ return firstRow[firstKey]()
24
35
  }
25
36
 
26
37
  // Unary operators
@@ -228,29 +239,37 @@ export async function evaluateExpr({ node, row, tables }) {
228
239
  // IN and NOT IN with subqueries
229
240
  if (node.type === 'in') {
230
241
  const exprVal = await evaluateExpr({ node: node.expr, row, tables })
231
- const results = await collect(executeSelect(node.subquery, tables))
232
- if (results.length === 0) return false
233
- const firstKey = Object.keys(results[0])[0]
234
- const values = results.map(r => r[firstKey])
242
+ const results = executeSelect(node.subquery, tables)
243
+ /** @type {SqlPrimitive[]} */
244
+ const values = []
245
+ for await (const resRow of results) {
246
+ const firstKey = Object.keys(resRow)[0]
247
+ const val = await resRow[firstKey]()
248
+ values.push(val)
249
+ }
235
250
  return values.includes(exprVal)
236
251
  }
237
252
  if (node.type === 'not in') {
238
253
  const exprVal = await evaluateExpr({ node: node.expr, row, tables })
239
- const results = await collect(executeSelect(node.subquery, tables))
240
- if (results.length === 0) return true
241
- const firstKey = Object.keys(results[0])[0]
242
- const values = results.map(r => r[firstKey])
254
+ const results = executeSelect(node.subquery, tables)
255
+ /** @type {SqlPrimitive[]} */
256
+ const values = []
257
+ for await (const resRow of results) {
258
+ const firstKey = Object.keys(resRow)[0]
259
+ const val = await resRow[firstKey]()
260
+ values.push(val)
261
+ }
243
262
  return !values.includes(exprVal)
244
263
  }
245
264
 
246
265
  // EXISTS and NOT EXISTS with subqueries
247
266
  if (node.type === 'exists') {
248
- const results = await collect(executeSelect(node.subquery, tables))
249
- return results.length > 0
267
+ const results = await executeSelect(node.subquery, tables).next()
268
+ return results.done === false
250
269
  }
251
270
  if (node.type === 'not exists') {
252
- const results = await collect(executeSelect(node.subquery, tables))
253
- return results.length === 0
271
+ const results = await executeSelect(node.subquery, tables).next()
272
+ return results.done === true
254
273
  }
255
274
 
256
275
  // CASE expressions
@@ -1,5 +1,5 @@
1
1
  /**
2
- * @import { AggregateFunc, AsyncDataSource, ExprNode, RowSource, SqlPrimitive } from '../types.js'
2
+ * @import { AggregateFunc, AsyncDataSource, ExprNode, AsyncRow, SqlPrimitive } from '../types.js'
3
3
  */
4
4
 
5
5
  import { isAggregateFunc } from '../validation.js'
@@ -8,32 +8,17 @@ import { evaluateExpr } from './expression.js'
8
8
  /**
9
9
  * Creates a context for evaluating HAVING expressions
10
10
  *
11
- * @param {Record<string, any>} resultRow - the aggregated result row
12
- * @param {RowSource[]} group - the group of rows
13
- * @returns {RowSource} a context row for HAVING evaluation
11
+ * @param {AsyncRow} resultRow - the aggregated result row
12
+ * @param {AsyncRow[]} group - the group of rows
13
+ * @returns {AsyncRow} a context row for HAVING evaluation
14
14
  */
15
15
  function createHavingContext(resultRow, group) {
16
16
  // Include the first row of the group (for GROUP BY columns)
17
17
  const firstRow = group[0]
18
- /** @type {Record<string, any>} */
19
- const context = {}
20
18
  if (firstRow) {
21
- const keys = firstRow.getKeys()
22
- for (const key of keys) {
23
- context[key] = firstRow.getCell(key)
24
- }
25
- }
26
- // Merge with result row (which has aggregates computed)
27
- Object.assign(context, resultRow)
28
-
29
- // Return a Row accessor wrapping the context
30
- return {
31
- getCell(name) {
32
- return context[name]
33
- },
34
- getKeys() {
35
- return Object.keys(context)
36
- },
19
+ return { ...firstRow, ...resultRow }
20
+ } else {
21
+ return resultRow
37
22
  }
38
23
  }
39
24
 
@@ -41,8 +26,8 @@ function createHavingContext(resultRow, group) {
41
26
  * Evaluates a HAVING expression with support for aggregate functions
42
27
  *
43
28
  * @param {ExprNode} expr - the HAVING expression
44
- * @param {Record<string, any>} row - the aggregated result row
45
- * @param {RowSource[]} group - the group of rows for re-evaluating aggregates
29
+ * @param {AsyncRow} row - the aggregated result row
30
+ * @param {AsyncRow[]} group - the group of rows for re-evaluating aggregates
46
31
  * @param {Record<string, AsyncDataSource>} tables
47
32
  * @returns {Promise<boolean>} whether the HAVING condition is satisfied
48
33
  */
@@ -129,8 +114,8 @@ export async function evaluateHavingExpr(expr, row, group, tables) {
129
114
  * Evaluates a value in a HAVING expression
130
115
  *
131
116
  * @param {ExprNode} expr
132
- * @param {RowSource} context - the context row
133
- * @param {RowSource[]} group - the group of rows
117
+ * @param {AsyncRow} context - the context row
118
+ * @param {AsyncRow[]} group - the group of rows
134
119
  * @param {Record<string, AsyncDataSource>} tables
135
120
  * @returns {Promise<SqlPrimitive>} the evaluated value
136
121
  */
@@ -155,7 +140,7 @@ function evaluateHavingValue(expr, context, group, tables) {
155
140
  *
156
141
  * @param {AggregateFunc} funcName - aggregate function name
157
142
  * @param {ExprNode[]} args - function arguments
158
- * @param {RowSource[]} group - the group of rows
143
+ * @param {AsyncRow[]} group - the group of rows
159
144
  * @param {Record<string, AsyncDataSource>} tables
160
145
  * @returns {Promise<SqlPrimitive>} the aggregate result
161
146
  */
@@ -1,13 +1,19 @@
1
1
  /**
2
- * Collects all results from an async generator into an array
2
+ * Collects and materialize all results from an async row generator into an array
3
3
  *
4
- * @template T
5
- * @param {AsyncGenerator<T>} asyncGen - the async generator
6
- * @returns {Promise<T[]>} array of all yielded values
4
+ * @import {AsyncRow, SqlPrimitive} from '../types.js'
5
+ * @param {AsyncGenerator<AsyncRow>} asyncRows
6
+ * @returns {Promise<Record<string, SqlPrimitive>[]>} array of all yielded values
7
7
  */
8
- export async function collect(asyncGen) {
8
+ export async function collect(asyncRows) {
9
+ /** @type {Record<string, SqlPrimitive>[]} */
9
10
  const results = []
10
- for await (const item of asyncGen) {
11
+ for await (const asyncRow of asyncRows) {
12
+ /** @type {Record<string, SqlPrimitive>} */
13
+ const item = {}
14
+ for (const [key, cell] of Object.entries(asyncRow)) {
15
+ item[key] = await cell()
16
+ }
11
17
  results.push(item)
12
18
  }
13
19
  return results
package/src/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { ExecuteSqlOptions, SelectStatement } from './types.js'
1
+ import type { AsyncRow, ExecuteSqlOptions, SelectStatement, SqlPrimitive } from './types.js'
2
2
 
3
3
  /**
4
4
  * Executes a SQL SELECT query against an array of data rows
@@ -8,12 +8,12 @@ import type { ExecuteSqlOptions, SelectStatement } from './types.js'
8
8
  * @param options.query - SQL query string
9
9
  * @returns async generator yielding rows matching the query
10
10
  */
11
- export function executeSql(options: ExecuteSqlOptions): AsyncGenerator<Record<string, any>>
11
+ export function executeSql(options: ExecuteSqlOptions): AsyncGenerator<AsyncRow>
12
12
 
13
13
  /**
14
14
  * Parses a SQL query string into an abstract syntax tree
15
15
  *
16
- * @param sql - SQL query string to parse
16
+ * @param query - SQL query string to parse
17
17
  * @returns parsed SQL select statement
18
18
  */
19
19
  export function parseSql(query: string): SelectStatement
@@ -24,4 +24,4 @@ export function parseSql(query: string): SelectStatement
24
24
  * @param asyncGen - the async generator
25
25
  * @returns array of all yielded values
26
26
  */
27
- export function collect<T>(asyncGen: AsyncGenerator<T>): Promise<T[]>
27
+ export function collect<T>(asyncGen: AsyncGenerator<AsyncRow>): Promise<Record<string, SqlPrimitive>[]>
@@ -20,6 +20,17 @@ function parsePrimary(c) {
20
20
  const tok = c.current()
21
21
 
22
22
  if (tok.type === 'paren' && tok.value === '(') {
23
+ // Peek ahead to see if this is a scalar subquery
24
+ const nextTok = c.peek(1)
25
+ if (nextTok.type === 'keyword' && nextTok.value === 'SELECT') {
26
+ // It's a scalar subquery
27
+ const subquery = c.parseSubquery()
28
+ return {
29
+ type: 'subquery',
30
+ subquery,
31
+ }
32
+ }
33
+ // Regular grouped expression
23
34
  c.consume()
24
35
  const expr = parseExpression(c)
25
36
  c.expect('paren', ')')
@@ -132,9 +143,6 @@ function parsePrimary(c) {
132
143
  }
133
144
  if (tok.value === 'EXISTS') {
134
145
  c.consume() // EXISTS
135
- if (!c.parseSubquery) {
136
- throw new Error('Subquery parsing not available in this context')
137
- }
138
146
  const subquery = c.parseSubquery()
139
147
  return {
140
148
  type: 'exists',
package/src/types.d.ts CHANGED
@@ -4,17 +4,15 @@
4
4
  * Provides an async iterator over rows.
5
5
  */
6
6
  export interface AsyncDataSource {
7
- getRows(): AsyncIterable<RowSource>
8
- }
9
- export interface RowSource {
10
- getCell(name: string): any
11
- getKeys(): string[]
7
+ getRows(): AsyncIterable<AsyncRow>
12
8
  }
9
+ export type AsyncRow = Record<string, AsyncCell>
10
+ export type AsyncCell = () => Promise<SqlPrimitive>
13
11
 
14
- export type RawData = Record<string, any>[]
12
+ export type Row = Record<string, SqlPrimitive>[]
15
13
 
16
14
  export interface ExecuteSqlOptions {
17
- tables: Record<string, RawData | AsyncDataSource>
15
+ tables: Record<string, Row | AsyncDataSource>
18
16
  query: string
19
17
  }
20
18
 
@@ -23,7 +21,7 @@ export type SqlPrimitive = string | number | bigint | boolean | null
23
21
  export interface SelectStatement {
24
22
  distinct: boolean
25
23
  columns: SelectColumn[]
26
- from?: string | FromSubquery
24
+ from: string | FromSubquery
27
25
  joins: JoinClause[]
28
26
  where?: ExprNode
29
27
  groupBy: ExprNode[]
@@ -122,6 +120,11 @@ export interface CaseNode {
122
120
  elseResult?: ExprNode
123
121
  }
124
122
 
123
+ export interface SubqueryNode {
124
+ type: 'subquery'
125
+ subquery: SelectStatement
126
+ }
127
+
125
128
  export type ExprNode =
126
129
  | LiteralNode
127
130
  | IdentifierNode
@@ -134,6 +137,7 @@ export type ExprNode =
134
137
  | InValuesNode
135
138
  | ExistsNode
136
139
  | CaseNode
140
+ | SubqueryNode
137
141
 
138
142
  export interface StarColumn {
139
143
  kind: 'star'
@@ -197,7 +201,7 @@ export interface ExprCursor {
197
201
  match(type: TokenType, value?: string): boolean
198
202
  expect(type: TokenType, value: string): Token
199
203
  expectIdentifier(): Token
200
- parseSubquery?: () => SelectStatement
204
+ parseSubquery: () => SelectStatement
201
205
  }
202
206
 
203
207
  // Tokenizer types
@@ -1,36 +0,0 @@
1
- /**
2
- * @import { AsyncDataSource, RowSource } from '../types.js'
3
- */
4
-
5
- /**
6
- * Creates a row accessor that wraps a plain JavaScript object
7
- *
8
- * @param {Record<string, any>} obj - the plain object
9
- * @returns {RowSource} a row accessor interface
10
- */
11
- export function createRowAccessor(obj) {
12
- return {
13
- getCell(name) {
14
- return obj[name]
15
- },
16
- getKeys() {
17
- return Object.keys(obj)
18
- },
19
- }
20
- }
21
-
22
- /**
23
- * Creates an async memory-backed data source from an array of plain objects
24
- *
25
- * @param {Record<string, any>[]} data - array of plain objects
26
- * @returns {AsyncDataSource} an async data source interface
27
- */
28
- export function createAsyncMemorySource(data) {
29
- return {
30
- async *getRows() {
31
- for (const item of data) {
32
- yield createRowAccessor(item)
33
- }
34
- },
35
- }
36
- }