squirreling 0.10.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.10.1",
3
+ "version": "0.10.3",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -39,11 +39,11 @@
39
39
  "test": "vitest run"
40
40
  },
41
41
  "devDependencies": {
42
- "@types/node": "25.4.0",
43
- "@vitest/coverage-v8": "4.0.18",
42
+ "@types/node": "25.5.0",
43
+ "@vitest/coverage-v8": "4.1.0",
44
44
  "eslint": "9.39.2",
45
- "eslint-plugin-jsdoc": "62.7.1",
45
+ "eslint-plugin-jsdoc": "62.8.0",
46
46
  "typescript": "5.9.3",
47
- "vitest": "4.0.18"
47
+ "vitest": "4.1.0"
48
48
  }
49
49
  }
@@ -119,7 +119,7 @@ async function* executeScan(plan, context) {
119
119
 
120
120
  // Apply WHERE if data source did not
121
121
  if (!appliedWhere && plan.hints.where) {
122
- result = filterRows(result, plan.hints.where, context)
122
+ result = filterRows(result, plan.hints.where, context, plan.hints.limit)
123
123
  }
124
124
 
125
125
  // Apply LIMIT/OFFSET if data source did not
@@ -174,15 +174,42 @@ async function* executeCount(plan, { tables, signal }) {
174
174
  * @param {AsyncIterable<AsyncRow>} rows
175
175
  * @param {ExprNode} condition
176
176
  * @param {ExecuteContext} context
177
+ * @param {number} [limit] - downstream LIMIT hint for chunk sizing
177
178
  * @yields {AsyncRow}
178
179
  */
179
- async function* filterRows(rows, condition, context) {
180
+ async function* filterRows(rows, condition, context, limit) {
181
+ const MAX_CHUNK = 256
182
+ let chunkSize = limit ?? Infinity
180
183
  let rowIndex = 0
184
+
185
+ /** @type {{ row: AsyncRow, rowIndex: number }[]} */
186
+ let buffer = []
187
+
181
188
  for await (const row of rows) {
182
189
  if (context.signal?.aborted) return
183
190
  rowIndex++
184
- const pass = await evaluateExpr({ node: condition, row, rowIndex, context })
185
- if (pass) yield row
191
+ buffer.push({ row, rowIndex })
192
+
193
+ if (buffer.length >= chunkSize) {
194
+ const results = await Promise.all(buffer.map(b =>
195
+ evaluateExpr({ node: condition, row: b.row, rowIndex: b.rowIndex, context })
196
+ ))
197
+ for (let i = 0; i < buffer.length; i++) {
198
+ if (results[i]) yield buffer[i].row
199
+ }
200
+ buffer = []
201
+ chunkSize = Math.min(chunkSize * 2, MAX_CHUNK)
202
+ }
203
+ }
204
+
205
+ // Flush remaining rows
206
+ if (buffer.length > 0) {
207
+ const results = await Promise.all(buffer.map(b =>
208
+ evaluateExpr({ node: condition, row: b.row, rowIndex: b.rowIndex, context })
209
+ ))
210
+ for (let i = 0; i < buffer.length; i++) {
211
+ if (results[i]) yield buffer[i].row
212
+ }
186
213
  }
187
214
  }
188
215
 
@@ -275,17 +302,38 @@ async function* executeProject(plan, context) {
275
302
  */
276
303
  async function* executeDistinct(plan, context) {
277
304
  const { signal } = context
305
+ const MAX_CHUNK = 256
278
306
 
279
307
  /** @type {Set<string>} */
280
308
  const seen = new Set()
281
309
 
310
+ /** @type {AsyncRow[]} */
311
+ let buffer = []
312
+
282
313
  for await (const row of executePlan({ plan: plan.child, context })) {
283
314
  if (signal?.aborted) return
315
+ buffer.push(row)
284
316
 
285
- const key = await stableRowKey(row.cells)
286
- if (!seen.has(key)) {
287
- seen.add(key)
288
- yield row
317
+ if (buffer.length >= MAX_CHUNK) {
318
+ const keys = await Promise.all(buffer.map(r => stableRowKey(r.cells)))
319
+ for (let i = 0; i < buffer.length; i++) {
320
+ if (!seen.has(keys[i])) {
321
+ seen.add(keys[i])
322
+ yield buffer[i]
323
+ }
324
+ }
325
+ buffer = []
326
+ }
327
+ }
328
+
329
+ // Flush remaining
330
+ if (buffer.length > 0) {
331
+ const keys = await Promise.all(buffer.map(r => stableRowKey(r.cells)))
332
+ for (let i = 0; i < buffer.length; i++) {
333
+ if (!seen.has(keys[i])) {
334
+ seen.add(keys[i])
335
+ yield buffer[i]
336
+ }
289
337
  }
290
338
  }
291
339
  }
@@ -44,17 +44,22 @@ export function compareForTerm(a, b, term) {
44
44
  * @returns {Promise<Record<string, SqlPrimitive>[]>} array of all yielded values
45
45
  */
46
46
  export async function collect(asyncRows) {
47
- /** @type {Record<string, SqlPrimitive>[]} */
48
- const results = []
47
+ // Collect all rows first, then materialize cells concurrently
48
+ // This enables dataloader-style batching of cell accessors
49
+ /** @type {AsyncRow[]} */
50
+ const rows = []
49
51
  for await (const asyncRow of asyncRows) {
52
+ rows.push(asyncRow)
53
+ }
54
+ return Promise.all(rows.map(async asyncRow => {
55
+ const values = await Promise.all(asyncRow.columns.map(k => asyncRow.cells[k]()))
50
56
  /** @type {Record<string, SqlPrimitive>} */
51
57
  const item = {}
52
- for (const key of asyncRow.columns) {
53
- item[key] = await asyncRow.cells[key]()
58
+ for (let i = 0; i < asyncRow.columns.length; i++) {
59
+ item[asyncRow.columns[i]] = values[i]
54
60
  }
55
- results.push(item)
56
- }
57
- return results
61
+ return item
62
+ }))
58
63
  }
59
64
 
60
65
  /**
@@ -79,11 +84,6 @@ export function stringify(value) {
79
84
  */
80
85
  export async function stableRowKey(cells) {
81
86
  const keys = Object.keys(cells).sort()
82
- /** @type {string[]} */
83
- const parts = []
84
- for (const k of keys) {
85
- const v = await cells[k]()
86
- parts.push(k + ':' + stringify(v))
87
- }
88
- return parts.join('|')
87
+ const values = await Promise.all(keys.map(k => cells[k]()))
88
+ return keys.map((k, i) => k + ':' + stringify(values[i])).join('|')
89
89
  }
@@ -118,11 +118,10 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
118
118
  // Apply FILTER clause if present
119
119
  let filteredRows = rows
120
120
  if (node.filter) {
121
- filteredRows = []
122
- for (const row of rows) {
123
- const passes = await evaluateExpr({ node: node.filter, row, context })
124
- if (passes) filteredRows.push(row)
125
- }
121
+ const passes = await Promise.all(rows.map(row =>
122
+ evaluateExpr({ node: node.filter, row, context })
123
+ ))
124
+ filteredRows = rows.filter((_, i) => passes[i])
126
125
  }
127
126
 
128
127
  const argNode = node.args[0]
@@ -132,23 +131,27 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
132
131
  return filteredRows.length
133
132
  }
134
133
 
134
+ const values = await Promise.all(filteredRows.map(row =>
135
+ evaluateExpr({ node: argNode, row, context })
136
+ ))
135
137
  if (node.distinct) {
136
138
  const seen = new Set()
137
- for (const row of filteredRows) {
138
- const v = await evaluateExpr({ node: argNode, row, context })
139
+ for (const v of values) {
139
140
  if (v != null) seen.add(v)
140
141
  }
141
142
  return seen.size
142
143
  }
143
144
  let count = 0
144
- for (const row of filteredRows) {
145
- const v = await evaluateExpr({ node: argNode, row, context })
145
+ for (const v of values) {
146
146
  if (v != null) count++
147
147
  }
148
148
  return count
149
149
  }
150
150
 
151
151
  if (funcName === 'SUM' || funcName === 'AVG' || funcName === 'MIN' || funcName === 'MAX') {
152
+ const rawValues = await Promise.all(filteredRows.map(row =>
153
+ evaluateExpr({ node: argNode, row, context })
154
+ ))
152
155
  let sum = 0
153
156
  let count = 0
154
157
  /** @type {number | null} */
@@ -156,8 +159,7 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
156
159
  /** @type {number | null} */
157
160
  let max = null
158
161
 
159
- for (const row of filteredRows) {
160
- const raw = await evaluateExpr({ node: argNode, row, context })
162
+ for (const raw of rawValues) {
161
163
  if (raw == null) continue
162
164
  const num = Number(raw)
163
165
  if (!Number.isFinite(num)) continue
@@ -180,9 +182,12 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
180
182
  }
181
183
 
182
184
  if (funcName === 'STDDEV_SAMP' || funcName === 'STDDEV_POP') {
185
+ const rawValues = await Promise.all(filteredRows.map(row =>
186
+ evaluateExpr({ node: argNode, row, context })
187
+ ))
188
+ /** @type {number[]} */
183
189
  const values = []
184
- for (const row of filteredRows) {
185
- const raw = await evaluateExpr({ node: argNode, row, context })
190
+ for (const raw of rawValues) {
186
191
  if (raw == null) continue
187
192
  const num = Number(raw)
188
193
  if (!Number.isFinite(num)) continue
package/src/index.d.ts CHANGED
@@ -76,6 +76,8 @@ export function tokenizeSql(sql: string): Token[]
76
76
  */
77
77
  export function collect<T>(asyncGen: AsyncGenerator<AsyncRow>): Promise<Record<string, SqlPrimitive>[]>
78
78
 
79
+ export function asyncRow(row: Record<string, SqlPrimitive>, columns: string[]): AsyncRow
80
+
79
81
  export function cachedDataSource(source: AsyncDataSource): AsyncDataSource
80
82
 
81
83
  /**
package/src/index.js CHANGED
@@ -3,5 +3,5 @@ export { parseSql } from './parse/parse.js'
3
3
  export { planSql } from './plan/plan.js'
4
4
  export { tokenizeSql } from './parse/tokenize.js'
5
5
  export { collect } from './execute/utils.js'
6
- export { cachedDataSource } from './backend/dataSource.js'
6
+ export { asyncRow, cachedDataSource } from './backend/dataSource.js'
7
7
  export { derivedAlias } from './expression/alias.js'
@@ -1,11 +1,11 @@
1
1
  /**
2
- * @import { BBox, SimpleGeometry } from './geometry.js'
2
+ * @import { BoundingBox, SimpleGeometry } from './geometry.js'
3
3
  */
4
4
 
5
5
  export const EPSILON = 1e-10
6
6
  export const EPSILON_SQ = EPSILON * EPSILON
7
7
 
8
- /** @type {WeakMap<SimpleGeometry, BBox>} */
8
+ /** @type {WeakMap<SimpleGeometry, BoundingBox>} */
9
9
  const bboxCache = new WeakMap()
10
10
 
11
11
  /**
@@ -26,7 +26,7 @@ export function bboxOverlap(a, b) {
26
26
  * Results are cached per geometry object.
27
27
  *
28
28
  * @param {SimpleGeometry} geom
29
- * @returns {BBox}
29
+ * @returns {BoundingBox}
30
30
  */
31
31
  export function bbox(geom) {
32
32
  let b = bboxCache.get(geom)
@@ -15,7 +15,7 @@ export type Geometry =
15
15
  */
16
16
  export type SimpleGeometry = Point | LineString | Polygon
17
17
 
18
- export interface BBox {
18
+ export interface BoundingBox {
19
19
  minX: number
20
20
  minY: number
21
21
  maxX: number
@@ -0,0 +1,6 @@
1
+ import type { BoundingBox, Geometry, SimpleGeometry } from './geometry.js'
2
+
3
+ export function decompose(geom: Geometry): SimpleGeometry[]
4
+ export function bbox(geom: SimpleGeometry): BoundingBox
5
+ export function bboxOverlap(a: SimpleGeometry, b: SimpleGeometry): boolean
6
+ export function parseWkt(wkt: string): Geometry | null
@@ -0,0 +1,3 @@
1
+ export { decompose } from './spatial.js'
2
+ export { bbox, bboxOverlap } from './bbox.js'
3
+ export { parseWkt } from './wkt.js'
@@ -68,6 +68,25 @@ export function evaluateSpatialFunc({ funcName, args }) {
68
68
  }
69
69
  }
70
70
 
71
+ /**
72
+ * Decompose Multi* and GeometryCollection into simple geometries.
73
+ *
74
+ * @param {Geometry} geom
75
+ * @returns {SimpleGeometry[]}
76
+ */
77
+ export function decompose(geom) {
78
+ if (geom.type === 'MultiPoint') {
79
+ return geom.coordinates.map(c => ({ type: 'Point', coordinates: c }))
80
+ } else if (geom.type === 'MultiLineString') {
81
+ return geom.coordinates.map(c => ({ type: 'LineString', coordinates: c }))
82
+ } else if (geom.type === 'MultiPolygon') {
83
+ return geom.coordinates.map(c => ({ type: 'Polygon', coordinates: c }))
84
+ } else if (geom.type === 'GeometryCollection') {
85
+ return geom.geometries.flatMap(decompose)
86
+ }
87
+ return [geom]
88
+ }
89
+
71
90
  /**
72
91
  * Normalize a geometry value. Accepts GeoJSON objects.
73
92
  * Returns null if the value is not a valid geometry.
@@ -90,10 +109,6 @@ function toGeometry(val) {
90
109
  return null
91
110
  }
92
111
 
93
- // ============================================================================
94
- // Minimum distance between geometries
95
- // ============================================================================
96
-
97
112
  /**
98
113
  * Get all line segments from a geometry.
99
114
  *
@@ -165,35 +180,6 @@ function stDWithin(a, b, distance) {
165
180
  return false
166
181
  }
167
182
 
168
- // ============================================================================
169
- // Spatial predicate dispatch - decompose to primitive type pairs
170
- // ============================================================================
171
-
172
- /**
173
- * Decompose Multi* and GeometryCollection into simple geometries.
174
- *
175
- * @param {Geometry} geom
176
- * @returns {SimpleGeometry[]}
177
- */
178
- function decompose(geom) {
179
- switch (geom.type) {
180
- case 'MultiPoint':
181
- return geom.coordinates.map(c => ({ type: 'Point', coordinates: c }))
182
- case 'MultiLineString':
183
- return geom.coordinates.map(c => ({ type: 'LineString', coordinates: c }))
184
- case 'MultiPolygon':
185
- return geom.coordinates.map(c => ({ type: 'Polygon', coordinates: c }))
186
- case 'GeometryCollection':
187
- return geom.geometries.flatMap(decompose)
188
- default:
189
- return [geom]
190
- }
191
- }
192
-
193
- // ============================================================================
194
- // ST_Contains
195
- // ============================================================================
196
-
197
183
  /**
198
184
  * @param {SimpleGeometry[]} a
199
185
  * @param {SimpleGeometry[]} b
@@ -204,10 +190,6 @@ function stContains(a, b) {
204
190
  return b.every(pb => a.some(pa => pairContainment(pa, pb) !== 'OUTSIDE'))
205
191
  }
206
192
 
207
- // ============================================================================
208
- // ST_ContainsProperly
209
- // ============================================================================
210
-
211
193
  /**
212
194
  * @param {SimpleGeometry[]} a
213
195
  * @param {SimpleGeometry[]} b
@@ -218,10 +200,6 @@ function stContainsProperly(a, b) {
218
200
  return b.every(pb => a.some(pa => pairContainment(pa, pb) === 'INSIDE'))
219
201
  }
220
202
 
221
- // ============================================================================
222
- // ST_Touches
223
- // ============================================================================
224
-
225
203
  /**
226
204
  * @param {SimpleGeometry[]} a
227
205
  * @param {SimpleGeometry[]} b
@@ -239,10 +217,6 @@ function stTouches(a, b) {
239
217
  return intersects
240
218
  }
241
219
 
242
- // ============================================================================
243
- // ST_Overlaps
244
- // ============================================================================
245
-
246
220
  /**
247
221
  * @param {SimpleGeometry[]} a
248
222
  * @param {SimpleGeometry[]} b
@@ -281,10 +255,6 @@ function geometryDimension(parts) {
281
255
  return max
282
256
  }
283
257
 
284
- // ============================================================================
285
- // ST_Equals
286
- // ============================================================================
287
-
288
258
  /**
289
259
  * @param {SimpleGeometry[]} a
290
260
  * @param {SimpleGeometry[]} b
@@ -310,10 +280,6 @@ function stEquals(a, b) {
310
280
  return true
311
281
  }
312
282
 
313
- // ============================================================================
314
- // ST_Crosses
315
- // ============================================================================
316
-
317
283
  /**
318
284
  * @param {SimpleGeometry[]} a
319
285
  * @param {SimpleGeometry[]} b