squirreling 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,10 +7,10 @@
7
7
  [![minzipped](https://img.shields.io/bundlephobia/minzip/squirreling)](https://www.npmjs.com/package/squirreling)
8
8
  [![workflow status](https://github.com/hyparam/squirreling/actions/workflows/ci.yml/badge.svg)](https://github.com/hyparam/squirreling/actions)
9
9
  [![mit license](https://img.shields.io/badge/License-MIT-orange.svg)](https://opensource.org/licenses/MIT)
10
- ![coverage](https://img.shields.io/badge/Coverage-90-darkred)
10
+ ![coverage](https://img.shields.io/badge/Coverage-93-darkred)
11
11
  [![dependencies](https://img.shields.io/badge/Dependencies-0-blueviolet)](https://www.npmjs.com/package/squirreling?activeTab=dependencies)
12
12
 
13
- Squirreling is a streaming async SQL engine for JavaScript. It is designed to provide efficient streaming of results from pluggable backend for highly efficient retrieval of data for browser applications.
13
+ Squirreling is a streaming async SQL engine for JavaScript. It is designed to provide efficient streaming of results from pluggable backends for highly efficient retrieval of data for browser applications.
14
14
 
15
15
  ## Features
16
16
 
@@ -22,6 +22,8 @@ Squirreling is a streaming async SQL engine for JavaScript. It is designed to pr
22
22
  - Constant memory usage for simple queries with LIMIT
23
23
  - Robust error handling and validation designed for LLM tool use
24
24
  - In-memory data option for simple use cases
25
+ - Select only
26
+ - No joins (yet)
25
27
 
26
28
  ## Usage
27
29
 
@@ -39,18 +41,19 @@ const users = [
39
41
  ]
40
42
 
41
43
  // Process rows as they arrive (streaming)
42
- for await (const user of executeSql({
44
+ for await (const { cnt } of executeSql({
43
45
  tables: { users },
44
- query: 'SELECT * FROM users WHERE active = TRUE LIMIT 100',
46
+ query: 'SELECT count(*) as cnt FROM users WHERE active = TRUE LIMIT 10',
45
47
  })) {
46
- console.log(user.name)
48
+ console.log('Count', cnt)
47
49
  }
48
50
  ```
49
51
 
50
52
  There is an exported helper function `collect` to gather all rows into an array if needed:
51
53
 
52
54
  ```javascript
53
- import { collect } from 'squirreling'
55
+ import { collect, executeSql } from 'squirreling'
56
+
54
57
  const allUsers = await collect(executeSql({
55
58
  tables: { users },
56
59
  query: 'SELECT * FROM users',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "description": "Squirreling SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -0,0 +1,53 @@
1
+ /**
2
+ * @import { AsyncDataSource, AsyncRow } from '../types.js'
3
+ */
4
+
5
+
6
+ /**
7
+ * Wraps an async generator of plain objects into an AsyncDataSource
8
+ *
9
+ * @param {AsyncGenerator<Record<string, any>>} gen
10
+ * @returns {AsyncDataSource}
11
+ */
12
+ export function generatorSource(gen) {
13
+ return {
14
+ async *getRows() {
15
+ for await (const row of gen) {
16
+ yield asyncRow(row)
17
+ }
18
+ },
19
+ }
20
+ }
21
+
22
+ /**
23
+ * Creates an async row accessor that wraps a plain JavaScript object
24
+ *
25
+ * @param {Record<string, any>} obj - the plain object
26
+ * @returns {AsyncRow} a row accessor interface
27
+ */
28
+ export function asyncRow(obj) {
29
+ return {
30
+ getCell(name) {
31
+ return obj[name]
32
+ },
33
+ getKeys() {
34
+ return Object.keys(obj)
35
+ },
36
+ }
37
+ }
38
+
39
+ /**
40
+ * Creates an async memory-backed data source from an array of plain objects
41
+ *
42
+ * @param {Record<string, any>[]} data - array of plain objects
43
+ * @returns {AsyncDataSource} an async data source interface
44
+ */
45
+ export function memorySource(data) {
46
+ return {
47
+ async *getRows() {
48
+ for (const item of data) {
49
+ yield asyncRow(item)
50
+ }
51
+ },
52
+ }
53
+ }
@@ -3,9 +3,9 @@ import { evaluateExpr } from './expression.js'
3
3
  /**
4
4
  * Evaluates an aggregate function over a set of rows
5
5
  *
6
- * @import { AggregateColumn, ExprNode, RowSource } from '../types.js'
6
+ * @import { AggregateColumn, ExprNode, AsyncRow } from '../types.js'
7
7
  * @param {AggregateColumn} col - aggregate column definition
8
- * @param {RowSource[]} rows - rows to aggregate
8
+ * @param {AsyncRow[]} rows - rows to aggregate
9
9
  * @returns {Promise<number | null>} aggregated result
10
10
  */
11
11
  export async function evaluateAggregate(col, rows) {
@@ -1,12 +1,11 @@
1
1
  import { evaluateExpr } from './expression.js'
2
2
  import { parseSql } from '../parse/parse.js'
3
- import { createAsyncMemorySource, createRowAccessor } from '../backend/memory.js'
3
+ import { asyncRow, generatorSource, memorySource } from '../backend/dataSource.js'
4
4
  import { defaultAggregateAlias, evaluateAggregate } from './aggregates.js'
5
5
  import { evaluateHavingExpr } from './having.js'
6
- import { collect } from './utils.js'
7
6
 
8
7
  /**
9
- * @import { AsyncDataSource, ExecuteSqlOptions, ExprNode, OrderByItem, RowSource, SelectStatement, SqlPrimitive } from '../types.js'
8
+ * @import { AsyncDataSource, ExecuteSqlOptions, ExprNode, OrderByItem, AsyncRow, SelectStatement, SqlPrimitive } from '../types.js'
10
9
  */
11
10
 
12
11
  /**
@@ -31,7 +30,7 @@ export async function* executeSql({ tables, query }) {
31
30
  const normalizedTables = {}
32
31
  for (const [name, source] of Object.entries(tables)) {
33
32
  if (Array.isArray(source)) {
34
- normalizedTables[name] = createAsyncMemorySource(source)
33
+ normalizedTables[name] = memorySource(source)
35
34
  } else {
36
35
  normalizedTables[name] = source
37
36
  }
@@ -60,8 +59,7 @@ export async function* executeSelect(select, tables) {
60
59
  dataSource = table
61
60
  } else {
62
61
  // Nested subquery - recursively resolve
63
- const derivedData = await collect(executeSelect(select.from.query, tables))
64
- dataSource = createAsyncMemorySource(derivedData)
62
+ dataSource = generatorSource(executeSelect(select.from.query, tables))
65
63
  }
66
64
 
67
65
  yield* evaluateSelectAst(select, dataSource, tables)
@@ -162,13 +160,13 @@ function applyDistinct(rows, distinct) {
162
160
  /**
163
161
  * Applies ORDER BY sorting to RowSource array (before projection)
164
162
  *
165
- * @param {RowSource[]} rows - the input row sources
163
+ * @param {AsyncRow[]} rows - the input row sources
166
164
  * @param {OrderByItem[]} orderBy - the sort specifications
167
165
  * @param {Record<string, AsyncDataSource>} tables
168
- * @returns {Promise<RowSource[]>} the sorted row sources
166
+ * @returns {Promise<AsyncRow[]>} the sorted row sources
169
167
  */
170
168
  async function sortRowSources(rows, orderBy, tables) {
171
- if (!orderBy?.length) return rows
169
+ if (!orderBy.length) return rows
172
170
 
173
171
  // Pre-evaluate ORDER BY expressions for all rows
174
172
  /** @type {SqlPrimitive[][]} */
@@ -229,7 +227,7 @@ async function sortRowSources(rows, orderBy, tables) {
229
227
  * @returns {Promise<Record<string, any>[]>} the sorted rows
230
228
  */
231
229
  async function applyOrderBy(rows, orderBy, tables) {
232
- if (!orderBy?.length) return rows
230
+ if (!orderBy.length) return rows
233
231
 
234
232
  // Pre-evaluate ORDER BY expressions for all rows
235
233
  /** @type {SqlPrimitive[][]} */
@@ -238,7 +236,7 @@ async function applyOrderBy(rows, orderBy, tables) {
238
236
  /** @type {SqlPrimitive[]} */
239
237
  const rowValues = []
240
238
  for (const term of orderBy) {
241
- const value = await evaluateExpr({ node: term.expr, row: createRowAccessor(row), tables })
239
+ const value = await evaluateExpr({ node: term.expr, row: asyncRow(row), tables })
242
240
  rowValues.push(value)
243
241
  }
244
242
  evaluatedValues.push(rowValues)
@@ -293,13 +291,8 @@ async function* evaluateSelectAst(select, dataSource, tables) {
293
291
  // SQL priority: from, where, group by, having, select, order by, offset, limit
294
292
 
295
293
  const hasAggregate = select.columns.some(col => col.kind === 'aggregate')
296
- const useGrouping = hasAggregate || select.groupBy?.length > 0
297
-
298
- // Determine if we need to buffer (collect all rows first)
299
- const needsBuffering =
300
- select.orderBy.length > 0 ||
301
- select.distinct ||
302
- useGrouping
294
+ const useGrouping = hasAggregate || select.groupBy.length > 0
295
+ const needsBuffering = useGrouping || select.orderBy.length > 0
303
296
 
304
297
  if (needsBuffering) {
305
298
  // BUFFERING PATH: Collect all rows, process, then yield
@@ -311,7 +304,8 @@ async function* evaluateSelectAst(select, dataSource, tables) {
311
304
  }
312
305
 
313
306
  /**
314
- * Streaming evaluation for simple queries (no ORDER BY, DISTINCT, or GROUP BY)
307
+ * Streaming evaluation for simple queries (no ORDER BY or GROUP BY)
308
+ * Supports DISTINCT by tracking seen row keys without buffering full rows
315
309
  *
316
310
  * @param {SelectStatement} select
317
311
  * @param {AsyncDataSource} dataSource
@@ -323,28 +317,25 @@ async function* evaluateStreaming(select, dataSource, tables) {
323
317
  let rowsSkipped = 0
324
318
  const offset = select.offset ?? 0
325
319
  const limit = select.limit ?? Infinity
320
+ if (limit <= 0) return
321
+
322
+ // For DISTINCT, track seen row keys
323
+ /** @type {Set<string> | undefined} */
324
+ const seen = select.distinct ? new Set() : undefined
326
325
 
327
326
  for await (const row of dataSource.getRows()) {
328
327
  // WHERE filter
329
328
  if (select.where) {
330
- const passes = await evaluateExpr({ node: select.where, row, tables })
331
-
332
- if (!passes) {
333
- continue
334
- }
329
+ const pass = await evaluateExpr({ node: select.where, row, tables })
330
+ if (!pass) continue
335
331
  }
336
332
 
337
- // OFFSET handling
338
- if (rowsSkipped < offset) {
333
+ // For non-DISTINCT queries, we can skip rows before projection (optimization)
334
+ if (!seen && rowsSkipped < offset) {
339
335
  rowsSkipped++
340
336
  continue
341
337
  }
342
338
 
343
- // LIMIT handling
344
- if (rowsYielded >= limit) {
345
- break
346
- }
347
-
348
339
  // SELECT projection
349
340
  /** @type {Record<string, any>} */
350
341
  const outRow = {}
@@ -364,13 +355,28 @@ async function* evaluateStreaming(select, dataSource, tables) {
364
355
  }
365
356
  }
366
357
 
358
+ // DISTINCT: skip duplicate rows
359
+ if (seen) {
360
+ const key = stableRowKey(outRow)
361
+ if (seen.has(key)) continue
362
+ seen.add(key)
363
+ // OFFSET applies to distinct rows
364
+ if (rowsSkipped < offset) {
365
+ rowsSkipped++
366
+ continue
367
+ }
368
+ }
369
+
367
370
  yield outRow
368
371
  rowsYielded++
372
+ if (rowsYielded >= limit) {
373
+ break
374
+ }
369
375
  }
370
376
  }
371
377
 
372
378
  /**
373
- * Buffered evaluation for complex queries (with ORDER BY, DISTINCT, or GROUP BY)
379
+ * Buffered evaluation for complex queries (with ORDER BY or GROUP BY)
374
380
  *
375
381
  * @param {SelectStatement} select
376
382
  * @param {AsyncDataSource} dataSource
@@ -381,14 +387,14 @@ async function* evaluateStreaming(select, dataSource, tables) {
381
387
  */
382
388
  async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGrouping) {
383
389
  // Step 1: Collect all rows from data source
384
- /** @type {RowSource[]} */
390
+ /** @type {AsyncRow[]} */
385
391
  const working = []
386
392
  for await (const row of dataSource.getRows()) {
387
393
  working.push(row)
388
394
  }
389
395
 
390
396
  // Step 2: WHERE clause filtering
391
- /** @type {RowSource[]} */
397
+ /** @type {AsyncRow[]} */
392
398
  const filtered = []
393
399
 
394
400
  for (const row of working) {
@@ -408,11 +414,11 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
408
414
 
409
415
  if (useGrouping) {
410
416
  // Grouping due to GROUP BY or aggregate functions
411
- /** @type {RowSource[][]} */
417
+ /** @type {AsyncRow[][]} */
412
418
  const groups = []
413
419
 
414
- if (select.groupBy?.length) {
415
- /** @type {Map<string, RowSource[]>} */
420
+ if (select.groupBy.length) {
421
+ /** @type {Map<string, AsyncRow[]>} */
416
422
  const map = new Map()
417
423
  for (const row of filtered) {
418
424
  /** @type {string[]} */
@@ -487,7 +493,16 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
487
493
  // Sort before projection so ORDER BY can access columns not in SELECT
488
494
  const sorted = await sortRowSources(filtered, select.orderBy, tables)
489
495
 
490
- for (const row of sorted) {
496
+ // OPTIMIZATION: For non-DISTINCT queries, apply OFFSET/LIMIT before projection
497
+ // to avoid reading expensive cells for rows that won't be in the final result
498
+ let rowsToProject = sorted
499
+ if (!select.distinct) {
500
+ const start = select.offset ?? 0
501
+ const end = select.limit ? start + select.limit : sorted.length
502
+ rowsToProject = sorted.slice(start, end)
503
+ }
504
+
505
+ for (const row of rowsToProject) {
491
506
  /** @type {Record<string, any>} */
492
507
  const outRow = {}
493
508
  for (const col of select.columns) {
@@ -513,11 +528,19 @@ async function* evaluateBuffered(select, dataSource, tables, hasAggregate, useGr
513
528
  projected = await applyOrderBy(projected, select.orderBy, tables)
514
529
 
515
530
  // Step 6: OFFSET and LIMIT
516
- const start = select.offset ?? 0
517
- const end = select.limit ? start + select.limit : projected.length
518
-
519
- // Step 7: Yield results
520
- for (let i = start; i < end && i < projected.length; i++) {
521
- yield projected[i]
531
+ // For non-DISTINCT, non-grouping queries, OFFSET/LIMIT was already applied before projection
532
+ if (select.distinct || useGrouping) {
533
+ const start = select.offset ?? 0
534
+ const end = select.limit ? start + select.limit : projected.length
535
+
536
+ // Step 7: Yield results
537
+ for (let i = start; i < end && i < projected.length; i++) {
538
+ yield projected[i]
539
+ }
540
+ } else {
541
+ // Already limited, yield all projected rows
542
+ for (const row of projected) {
543
+ yield row
544
+ }
522
545
  }
523
546
  }
@@ -2,7 +2,7 @@ import { executeSelect } from './execute.js'
2
2
  import { collect } from './utils.js'
3
3
 
4
4
  /**
5
- * @import { ExprNode, RowSource, SqlPrimitive, AsyncDataSource } from '../types.js'
5
+ * @import { ExprNode, AsyncRow, SqlPrimitive, AsyncDataSource } from '../types.js'
6
6
  */
7
7
 
8
8
  /**
@@ -10,7 +10,7 @@ import { collect } from './utils.js'
10
10
  *
11
11
  * @param {Object} params
12
12
  * @param {ExprNode} params.node - The expression node to evaluate
13
- * @param {RowSource} params.row - The data row to evaluate against
13
+ * @param {AsyncRow} params.row - The data row to evaluate against
14
14
  * @param {Record<string, AsyncDataSource>} [params.tables]
15
15
  * @returns {Promise<SqlPrimitive>} The result of the evaluation
16
16
  */
@@ -23,6 +23,16 @@ export async function evaluateExpr({ node, row, tables }) {
23
23
  return row.getCell(node.name)
24
24
  }
25
25
 
26
+ // Scalar subquery - returns a single value
27
+ if (node.type === 'subquery') {
28
+ const results = await collect(executeSelect(node.subquery, tables))
29
+ if (results.length === 0) return null
30
+ // Return the first column of the first row
31
+ const firstRow = results[0]
32
+ const firstKey = Object.keys(firstRow)[0]
33
+ return firstRow[firstKey]
34
+ }
35
+
26
36
  // Unary operators
27
37
  if (node.type === 'unary') {
28
38
  if (node.op === 'NOT') {
@@ -1,5 +1,5 @@
1
1
  /**
2
- * @import { AggregateFunc, AsyncDataSource, ExprNode, RowSource, SqlPrimitive } from '../types.js'
2
+ * @import { AggregateFunc, AsyncDataSource, ExprNode, AsyncRow, SqlPrimitive } from '../types.js'
3
3
  */
4
4
 
5
5
  import { isAggregateFunc } from '../validation.js'
@@ -9,8 +9,8 @@ import { evaluateExpr } from './expression.js'
9
9
  * Creates a context for evaluating HAVING expressions
10
10
  *
11
11
  * @param {Record<string, any>} resultRow - the aggregated result row
12
- * @param {RowSource[]} group - the group of rows
13
- * @returns {RowSource} a context row for HAVING evaluation
12
+ * @param {AsyncRow[]} group - the group of rows
13
+ * @returns {AsyncRow} a context row for HAVING evaluation
14
14
  */
15
15
  function createHavingContext(resultRow, group) {
16
16
  // Include the first row of the group (for GROUP BY columns)
@@ -42,7 +42,7 @@ function createHavingContext(resultRow, group) {
42
42
  *
43
43
  * @param {ExprNode} expr - the HAVING expression
44
44
  * @param {Record<string, any>} row - the aggregated result row
45
- * @param {RowSource[]} group - the group of rows for re-evaluating aggregates
45
+ * @param {AsyncRow[]} group - the group of rows for re-evaluating aggregates
46
46
  * @param {Record<string, AsyncDataSource>} tables
47
47
  * @returns {Promise<boolean>} whether the HAVING condition is satisfied
48
48
  */
@@ -129,8 +129,8 @@ export async function evaluateHavingExpr(expr, row, group, tables) {
129
129
  * Evaluates a value in a HAVING expression
130
130
  *
131
131
  * @param {ExprNode} expr
132
- * @param {RowSource} context - the context row
133
- * @param {RowSource[]} group - the group of rows
132
+ * @param {AsyncRow} context - the context row
133
+ * @param {AsyncRow[]} group - the group of rows
134
134
  * @param {Record<string, AsyncDataSource>} tables
135
135
  * @returns {Promise<SqlPrimitive>} the evaluated value
136
136
  */
@@ -155,7 +155,7 @@ function evaluateHavingValue(expr, context, group, tables) {
155
155
  *
156
156
  * @param {AggregateFunc} funcName - aggregate function name
157
157
  * @param {ExprNode[]} args - function arguments
158
- * @param {RowSource[]} group - the group of rows
158
+ * @param {AsyncRow[]} group - the group of rows
159
159
  * @param {Record<string, AsyncDataSource>} tables
160
160
  * @returns {Promise<SqlPrimitive>} the aggregate result
161
161
  */
@@ -20,6 +20,17 @@ function parsePrimary(c) {
20
20
  const tok = c.current()
21
21
 
22
22
  if (tok.type === 'paren' && tok.value === '(') {
23
+ // Peek ahead to see if this is a scalar subquery
24
+ const nextTok = c.peek(1)
25
+ if (nextTok.type === 'keyword' && nextTok.value === 'SELECT') {
26
+ // It's a scalar subquery
27
+ const subquery = c.parseSubquery()
28
+ return {
29
+ type: 'subquery',
30
+ subquery,
31
+ }
32
+ }
33
+ // Regular grouped expression
23
34
  c.consume()
24
35
  const expr = parseExpression(c)
25
36
  c.expect('paren', ')')
@@ -132,9 +143,6 @@ function parsePrimary(c) {
132
143
  }
133
144
  if (tok.value === 'EXISTS') {
134
145
  c.consume() // EXISTS
135
- if (!c.parseSubquery) {
136
- throw new Error('Subquery parsing not available in this context')
137
- }
138
146
  const subquery = c.parseSubquery()
139
147
  return {
140
148
  type: 'exists',
package/src/types.d.ts CHANGED
@@ -4,9 +4,9 @@
4
4
  * Provides an async iterator over rows.
5
5
  */
6
6
  export interface AsyncDataSource {
7
- getRows(): AsyncIterable<RowSource>
7
+ getRows(): AsyncIterable<AsyncRow>
8
8
  }
9
- export interface RowSource {
9
+ export interface AsyncRow {
10
10
  getCell(name: string): any
11
11
  getKeys(): string[]
12
12
  }
@@ -122,6 +122,11 @@ export interface CaseNode {
122
122
  elseResult?: ExprNode
123
123
  }
124
124
 
125
+ export interface SubqueryNode {
126
+ type: 'subquery'
127
+ subquery: SelectStatement
128
+ }
129
+
125
130
  export type ExprNode =
126
131
  | LiteralNode
127
132
  | IdentifierNode
@@ -134,6 +139,7 @@ export type ExprNode =
134
139
  | InValuesNode
135
140
  | ExistsNode
136
141
  | CaseNode
142
+ | SubqueryNode
137
143
 
138
144
  export interface StarColumn {
139
145
  kind: 'star'
@@ -197,7 +203,7 @@ export interface ExprCursor {
197
203
  match(type: TokenType, value?: string): boolean
198
204
  expect(type: TokenType, value: string): Token
199
205
  expectIdentifier(): Token
200
- parseSubquery?: () => SelectStatement
206
+ parseSubquery: () => SelectStatement
201
207
  }
202
208
 
203
209
  // Tokenizer types
@@ -1,36 +0,0 @@
1
- /**
2
- * @import { AsyncDataSource, RowSource } from '../types.js'
3
- */
4
-
5
- /**
6
- * Creates a row accessor that wraps a plain JavaScript object
7
- *
8
- * @param {Record<string, any>} obj - the plain object
9
- * @returns {RowSource} a row accessor interface
10
- */
11
- export function createRowAccessor(obj) {
12
- return {
13
- getCell(name) {
14
- return obj[name]
15
- },
16
- getKeys() {
17
- return Object.keys(obj)
18
- },
19
- }
20
- }
21
-
22
- /**
23
- * Creates an async memory-backed data source from an array of plain objects
24
- *
25
- * @param {Record<string, any>[]} data - array of plain objects
26
- * @returns {AsyncDataSource} an async data source interface
27
- */
28
- export function createAsyncMemorySource(data) {
29
- return {
30
- async *getRows() {
31
- for (const item of data) {
32
- yield createRowAccessor(item)
33
- }
34
- },
35
- }
36
- }