squirreling 0.12.21 → 0.12.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.21",
3
+ "version": "0.12.22",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -3,6 +3,7 @@ import { evaluateExpr } from '../expression/evaluate.js'
3
3
  import { executePlan, selectColumnNames } from './execute.js'
4
4
  import { sortEntriesByTerms } from './sort.js'
5
5
  import { keyify } from './utils.js'
6
+ import { yieldToEventLoop } from './yield.js'
6
7
 
7
8
  /**
8
9
  * @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, QueryResults, SelectColumn, SqlPrimitive } from '../types.js'
@@ -89,29 +90,45 @@ export function executeHashAggregate(plan, context) {
89
90
  let collectCount = 0
90
91
  for await (const row of child.rows()) {
91
92
  if (++collectCount % YIELD_INTERVAL === 0) {
92
- await new Promise(resolve => setTimeout(resolve, 0))
93
+ await yieldToEventLoop()
93
94
  if (context.signal?.aborted) return
94
95
  }
95
96
  allRows.push(row)
96
97
  }
97
98
 
98
- // Group rows by GROUP BY keys
99
+ // Group rows by GROUP BY keys.
100
+ // Each chunk dispatches all per-row key evaluations in parallel so
101
+ // async cells (e.g. lazy parquet decode) overlap; the await is at the
102
+ // chunk boundary. Synchronous cells stay cheap because we skip the
103
+ // inner Promise.all wrapper when there's a single GROUP BY expression.
99
104
  /** @type {Map<any, AsyncRow[]>} */
100
105
  const groups = new Map()
106
+ const { groupBy } = plan
107
+ const singleKey = groupBy.length === 1
108
+ const singleExpr = singleKey ? groupBy[0] : null
101
109
 
102
110
  for (let chunkStart = 0; chunkStart < allRows.length; chunkStart += YIELD_INTERVAL) {
103
111
  if (chunkStart > 0) {
104
- await new Promise(resolve => setTimeout(resolve, 0))
112
+ await yieldToEventLoop()
105
113
  if (context.signal?.aborted) return
106
114
  }
107
115
  const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, allRows.length)
108
- const chunkKeys = await Promise.all(
109
- allRows.slice(chunkStart, chunkEnd).map(row =>
110
- Promise.all(plan.groupBy.map(expr => evaluateExpr({ node: expr, row, context })))
111
- )
112
- )
113
- for (let j = 0; j < chunkKeys.length; j++) {
114
- const key = keyify(...chunkKeys[j])
116
+ const chunkLen = chunkEnd - chunkStart
117
+ /** @type {Promise<any>[]} */
118
+ const pending = new Array(chunkLen)
119
+ if (singleKey) {
120
+ for (let j = 0; j < chunkLen; j++) {
121
+ pending[j] = evaluateExpr({ node: singleExpr, row: allRows[chunkStart + j], context })
122
+ }
123
+ } else {
124
+ for (let j = 0; j < chunkLen; j++) {
125
+ const row = allRows[chunkStart + j]
126
+ pending[j] = Promise.all(groupBy.map(expr => evaluateExpr({ node: expr, row, context })))
127
+ }
128
+ }
129
+ const chunkKeys = await Promise.all(pending)
130
+ for (let j = 0; j < chunkLen; j++) {
131
+ const key = singleKey ? keyify(chunkKeys[j]) : keyify(...chunkKeys[j])
115
132
  const row = allRows[chunkStart + j]
116
133
  let group = groups.get(key)
117
134
  if (!group) {
@@ -189,7 +206,7 @@ export function executeScalarAggregate(plan, context) {
189
206
  let collectCount = 0
190
207
  for await (const row of child.rows()) {
191
208
  if (++collectCount % YIELD_INTERVAL === 0) {
192
- await new Promise(resolve => setTimeout(resolve, 0))
209
+ await yieldToEventLoop()
193
210
  if (context.signal?.aborted) return
194
211
  }
195
212
  group.push(row)
@@ -10,6 +10,7 @@ import { executeHashJoin, executeNestedLoopJoin, executePositionalJoin } from '.
10
10
  import { executeSort } from './sort.js'
11
11
  import { addBounds, minBounds, stableRowKey } from './utils.js'
12
12
  import { executeWindow } from './window.js'
13
+ import { yieldToEventLoop } from './yield.js'
13
14
 
14
15
  /**
15
16
  * @import { AsyncCells, AsyncDataSource, AsyncRow, DerivedColumn, ExecuteContext, ExecuteSqlOptions, ExprNode, IdentifierNode, QueryResults, SelectColumn, SqlPrimitive, Statement } from '../types.js'
@@ -417,7 +418,7 @@ async function* filterRows(rows, condition, context, limit) {
417
418
  for await (const row of rows) {
418
419
  if (context.signal?.aborted) return
419
420
  if (++innerCount % YIELD_INTERVAL === 0) {
420
- await new Promise(resolve => setTimeout(resolve, 0))
421
+ await yieldToEventLoop()
421
422
  if (context.signal?.aborted) return
422
423
  }
423
424
  rowIndex++
@@ -463,7 +464,7 @@ async function* limitRows(rows, limit = Infinity, offset = 0, signal) {
463
464
  for await (const row of rows) {
464
465
  if (signal?.aborted) return
465
466
  if (++innerCount % YIELD_INTERVAL === 0) {
466
- await new Promise(resolve => setTimeout(resolve, 0))
467
+ await yieldToEventLoop()
467
468
  if (signal?.aborted) return
468
469
  }
469
470
  if (skipped < offset) {
@@ -518,7 +519,7 @@ function executeProject(plan, context) {
518
519
  for await (const row of child.rows()) {
519
520
  if (context.signal?.aborted) return
520
521
  if (++innerCount % YIELD_INTERVAL === 0) {
521
- await new Promise(resolve => setTimeout(resolve, 0))
522
+ await yieldToEventLoop()
522
523
  if (context.signal?.aborted) return
523
524
  }
524
525
  rowIndex++
@@ -611,7 +612,7 @@ function executeDistinct(plan, context) {
611
612
  for await (const row of child.rows()) {
612
613
  if (signal?.aborted) return
613
614
  if (++innerCount % YIELD_INTERVAL === 0) {
614
- await new Promise(resolve => setTimeout(resolve, 0))
615
+ await yieldToEventLoop()
615
616
  if (signal?.aborted) return
616
617
  }
617
618
  buffer.push(row)
@@ -698,7 +699,7 @@ function executeSetOperation(plan, context) {
698
699
  for await (const row of left.rows()) {
699
700
  if (signal?.aborted) return
700
701
  if (++count % YIELD_INTERVAL === 0) {
701
- await new Promise(resolve => setTimeout(resolve, 0))
702
+ await yieldToEventLoop()
702
703
  if (signal?.aborted) return
703
704
  }
704
705
  const key = await stableRowKey(row)
@@ -710,7 +711,7 @@ function executeSetOperation(plan, context) {
710
711
  for await (const row of right.rows()) {
711
712
  if (signal?.aborted) return
712
713
  if (++count % YIELD_INTERVAL === 0) {
713
- await new Promise(resolve => setTimeout(resolve, 0))
714
+ await yieldToEventLoop()
714
715
  if (signal?.aborted) return
715
716
  }
716
717
  const key = await stableRowKey(row)
@@ -736,7 +737,7 @@ function executeSetOperation(plan, context) {
736
737
  for await (const row of right.rows()) {
737
738
  if (signal?.aborted) return
738
739
  if (++tick % YIELD_INTERVAL === 0) {
739
- await new Promise(resolve => setTimeout(resolve, 0))
740
+ await yieldToEventLoop()
740
741
  if (signal?.aborted) return
741
742
  }
742
743
  const key = await stableRowKey(row)
@@ -748,7 +749,7 @@ function executeSetOperation(plan, context) {
748
749
  for await (const row of left.rows()) {
749
750
  if (signal?.aborted) return
750
751
  if (++tick % YIELD_INTERVAL === 0) {
751
- await new Promise(resolve => setTimeout(resolve, 0))
752
+ await yieldToEventLoop()
752
753
  if (signal?.aborted) return
753
754
  }
754
755
  const key = await stableRowKey(row)
@@ -764,7 +765,7 @@ function executeSetOperation(plan, context) {
764
765
  for await (const row of left.rows()) {
765
766
  if (signal?.aborted) return
766
767
  if (++tick % YIELD_INTERVAL === 0) {
767
- await new Promise(resolve => setTimeout(resolve, 0))
768
+ await yieldToEventLoop()
768
769
  if (signal?.aborted) return
769
770
  }
770
771
  const key = await stableRowKey(row)
@@ -791,7 +792,7 @@ function executeSetOperation(plan, context) {
791
792
  for await (const row of right.rows()) {
792
793
  if (signal?.aborted) return
793
794
  if (++tick % YIELD_INTERVAL === 0) {
794
- await new Promise(resolve => setTimeout(resolve, 0))
795
+ await yieldToEventLoop()
795
796
  if (signal?.aborted) return
796
797
  }
797
798
  const key = await stableRowKey(row)
@@ -803,7 +804,7 @@ function executeSetOperation(plan, context) {
803
804
  for await (const row of left.rows()) {
804
805
  if (signal?.aborted) return
805
806
  if (++tick % YIELD_INTERVAL === 0) {
806
- await new Promise(resolve => setTimeout(resolve, 0))
807
+ await yieldToEventLoop()
807
808
  if (signal?.aborted) return
808
809
  }
809
810
  const key = await stableRowKey(row)
@@ -820,7 +821,7 @@ function executeSetOperation(plan, context) {
820
821
  for await (const row of left.rows()) {
821
822
  if (signal?.aborted) return
822
823
  if (++tick % YIELD_INTERVAL === 0) {
823
- await new Promise(resolve => setTimeout(resolve, 0))
824
+ await yieldToEventLoop()
824
825
  if (signal?.aborted) return
825
826
  }
826
827
  const key = await stableRowKey(row)
@@ -1,6 +1,7 @@
1
1
  import { evaluateExpr } from '../expression/evaluate.js'
2
2
  import { keyify, maxBounds } from './utils.js'
3
3
  import { executePlan } from './execute.js'
4
+ import { yieldToEventLoop } from './yield.js'
4
5
 
5
6
  /**
6
7
  * @import { AsyncCells, AsyncRow, ExecuteContext, QueryResults } from '../types.js'
@@ -56,7 +57,7 @@ export function executeNestedLoopJoin(plan, context) {
56
57
 
57
58
  for (const rightRow of rightRows) {
58
59
  if (++innerCount % YIELD_INTERVAL === 0) {
59
- await new Promise(resolve => setTimeout(resolve, 0))
60
+ await yieldToEventLoop()
60
61
  if (context.signal?.aborted) return
61
62
  }
62
63
  const tempMerged = mergeRows(leftRow, rightRow, leftTable, rightTable)
@@ -263,7 +264,7 @@ export function executeHashJoin(plan, context) {
263
264
  if (candidates?.length) {
264
265
  for (const rightRow of candidates) {
265
266
  if (++innerCount % YIELD_INTERVAL === 0) {
266
- await new Promise(resolve => setTimeout(resolve, 0))
267
+ await yieldToEventLoop()
267
268
  if (context.signal?.aborted) return
268
269
  }
269
270
  const merged = mergeRows(leftRow, rightRow, leftTable, rightTable)
@@ -1,6 +1,7 @@
1
1
  import { evaluateExpr } from '../expression/evaluate.js'
2
2
  import { executePlan } from './execute.js'
3
3
  import { compareForTerm, keyify } from './utils.js'
4
+ import { yieldToEventLoop } from './yield.js'
4
5
 
5
6
  /**
6
7
  * @import { AsyncRow, ExecuteContext, QueryResults, SqlPrimitive } from '../types.js'
@@ -40,7 +41,7 @@ export function executeWindow(plan, context) {
40
41
  let i = 0
41
42
  for await (const row of child.rows()) {
42
43
  if (++i % YIELD_INTERVAL === 0) {
43
- await new Promise(resolve => setTimeout(resolve, 0))
44
+ await yieldToEventLoop()
44
45
  if (context.signal?.aborted) return
45
46
  }
46
47
  const cells = { ...row.cells }
@@ -67,7 +68,7 @@ export function executeWindow(plan, context) {
67
68
  let collectCount = 0
68
69
  for await (const row of child.rows()) {
69
70
  if (++collectCount % YIELD_INTERVAL === 0) {
70
- await new Promise(resolve => setTimeout(resolve, 0))
71
+ await yieldToEventLoop()
71
72
  if (context.signal?.aborted) return
72
73
  }
73
74
  rows.push(row)
@@ -86,7 +87,7 @@ export function executeWindow(plan, context) {
86
87
  let emitCount = 0
87
88
  for (let i = 0; i < rows.length; i++) {
88
89
  if (++emitCount % YIELD_INTERVAL === 0) {
89
- await new Promise(resolve => setTimeout(resolve, 0))
90
+ await yieldToEventLoop()
90
91
  if (context.signal?.aborted) return
91
92
  }
92
93
  const row = rows[i]
@@ -120,7 +121,7 @@ async function computeWindow(spec, rows, output, context) {
120
121
  const partitions = new Map()
121
122
  for (let chunkStart = 0; chunkStart < rows.length; chunkStart += YIELD_INTERVAL) {
122
123
  if (chunkStart > 0) {
123
- await new Promise(resolve => setTimeout(resolve, 0))
124
+ await yieldToEventLoop()
124
125
  if (context.signal?.aborted) return
125
126
  }
126
127
  const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, rows.length)
@@ -151,7 +152,7 @@ async function computeWindow(spec, rows, output, context) {
151
152
  const entries = new Array(bucket.length)
152
153
  for (let chunkStart = 0; chunkStart < bucket.length; chunkStart += YIELD_INTERVAL) {
153
154
  if (chunkStart > 0) {
154
- await new Promise(resolve => setTimeout(resolve, 0))
155
+ await yieldToEventLoop()
155
156
  if (context.signal?.aborted) return
156
157
  }
157
158
  const chunkEnd = Math.min(chunkStart + YIELD_INTERVAL, bucket.length)
@@ -203,7 +204,7 @@ async function applyWindowFunction(spec, ordered, rows, output, context) {
203
204
  let tick = 0
204
205
  for (let k = 0; k < ordered.length; k++) {
205
206
  if (++tick % YIELD_INTERVAL === 0) {
206
- await new Promise(resolve => setTimeout(resolve, 0))
207
+ await yieldToEventLoop()
207
208
  if (context.signal?.aborted) return
208
209
  }
209
210
  const idx = ordered[k]
@@ -0,0 +1,30 @@
1
+ // Yield to the event loop so that timer-based aborts can fire.
2
+ //
3
+ // In Node, setTimeout(fn, 0) is clamped to a 1ms minimum, so a tight loop
4
+ // that yields every few thousand iterations can spend hundreds of ms in
5
+ // scheduling overhead alone. setImmediate (Node) and MessageChannel
6
+ // (browsers) provide the same macrotask boundary at a fraction of the cost.
7
+ //
8
+ // We need a macrotask boundary (not just a microtask) because the abort
9
+ // timer itself is a macrotask; microtasks alone cannot let it fire.
10
+
11
+ /** @type {() => Promise<void>} */
12
+ export const yieldToEventLoop = (() => {
13
+ if (typeof setImmediate === 'function') {
14
+ return () => new Promise(resolve => setImmediate(resolve))
15
+ }
16
+ if (typeof MessageChannel !== 'undefined') {
17
+ const channel = new MessageChannel()
18
+ /** @type {Array<() => void>} */
19
+ const queue = []
20
+ channel.port1.onmessage = () => {
21
+ const resolve = queue.shift()
22
+ if (resolve) resolve()
23
+ }
24
+ return () => new Promise(resolve => {
25
+ queue.push(resolve)
26
+ channel.port2.postMessage(0)
27
+ })
28
+ }
29
+ return () => new Promise(resolve => setTimeout(resolve, 0))
30
+ })()
@@ -1,5 +1,6 @@
1
1
  import { executeStatement } from '../execute/execute.js'
2
2
  import { isPlainObject, keyify, sqlEquals, stringify } from '../execute/utils.js'
3
+ import { yieldToEventLoop } from '../execute/yield.js'
3
4
  import { ArgValueError, ExecutionError } from '../validation/executionErrors.js'
4
5
  import { isAggregateFunc, isMathFunc, isRegexpFunc, isSpatialFunc, isStringFunc } from '../validation/functions.js'
5
6
  import { UnknownFunctionError } from '../validation/parseErrors.js'
@@ -31,16 +32,21 @@ const YIELD_INTERVAL = 4000
31
32
  async function evaluateAll(node, rows, context) {
32
33
  /** @type {SqlPrimitive[]} */
33
34
  const results = new Array(rows.length)
35
+ /** @type {Promise<SqlPrimitive>[]} */
36
+ const pending = new Array(Math.min(YIELD_INTERVAL, rows.length))
34
37
  for (let i = 0; i < rows.length; i += YIELD_INTERVAL) {
35
38
  if (i > 0) {
36
- await new Promise(resolve => setTimeout(resolve, 0))
39
+ await yieldToEventLoop()
37
40
  context.signal?.throwIfAborted()
38
41
  }
39
42
  const end = Math.min(i + YIELD_INTERVAL, rows.length)
40
- const chunk = await Promise.all(rows.slice(i, end).map(row =>
41
- evaluateExpr({ node, row, context })
42
- ))
43
- for (let j = 0; j < chunk.length; j++) results[i + j] = chunk[j]
43
+ const chunkLen = end - i
44
+ pending.length = chunkLen
45
+ for (let j = 0; j < chunkLen; j++) {
46
+ pending[j] = evaluateExpr({ node, row: rows[i + j], context })
47
+ }
48
+ const chunk = await Promise.all(pending)
49
+ for (let j = 0; j < chunkLen; j++) results[i + j] = chunk[j]
44
50
  }
45
51
  return results
46
52
  }
@@ -703,7 +709,7 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
703
709
  let innerCount = 0
704
710
  for await (const resRow of subResult.rows()) {
705
711
  if (++innerCount % YIELD_INTERVAL === 0) {
706
- await new Promise(resolve => setTimeout(resolve, 0))
712
+ await yieldToEventLoop()
707
713
  context.signal?.throwIfAborted()
708
714
  }
709
715
  const value = await resRow.cells[resRow.columns[0]]()