squirreling 0.12.9 → 0.12.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "squirreling",
3
- "version": "0.12.9",
3
+ "version": "0.12.10",
4
4
  "description": "Squirreling Async SQL Engine",
5
5
  "author": "Hyperparam",
6
6
  "homepage": "https://hyperparam.app",
@@ -1,6 +1,7 @@
1
1
  import { derivedAlias } from '../expression/alias.js'
2
2
  import { evaluateExpr } from '../expression/evaluate.js'
3
3
  import { executePlan, selectColumnNames } from './execute.js'
4
+ import { sortEntriesByTerms } from './sort.js'
4
5
  import { keyify } from './utils.js'
5
6
 
6
7
  /**
@@ -50,6 +51,22 @@ function projectAggregateColumns(selectColumns, group, context) {
50
51
  return { columns, cells }
51
52
  }
52
53
 
54
+ /**
55
+ * Builds the row visible to post-aggregation expressions such as HAVING and
56
+ * grouped ORDER BY: source group columns plus aggregate output aliases.
57
+ *
58
+ * @param {AsyncRow[]} group
59
+ * @param {AsyncRow} aggregateRow
60
+ * @returns {AsyncRow}
61
+ */
62
+ function aggregateContextRow(group, aggregateRow) {
63
+ const baseRow = group[0] ?? { columns: [], cells: {} }
64
+ return {
65
+ columns: [...baseRow.columns, ...aggregateRow.columns],
66
+ cells: { ...baseRow.cells, ...aggregateRow.cells },
67
+ }
68
+ }
69
+
53
70
  /**
54
71
  * Executes a hash aggregate operation (GROUP BY)
55
72
  *
@@ -85,27 +102,42 @@ export function executeHashAggregate(plan, context) {
85
102
  group.push(row)
86
103
  }
87
104
 
88
- // Yield one row per group
105
+ /** @type {{ row: AsyncRow, group: AsyncRow[], contextRow: AsyncRow }[]} */
106
+ const aggregateRows = []
107
+
89
108
  for (const group of groups.values()) {
90
109
  const asyncRow = projectAggregateColumns(plan.columns, group, context)
110
+ const contextRow = aggregateContextRow(group, asyncRow)
91
111
 
92
112
  // Apply HAVING filter
93
113
  if (plan.having) {
94
- /** @type {AsyncRow} */
95
- const havingRow = {
96
- columns: [...group[0].columns, ...asyncRow.columns],
97
- cells: { ...group[0].cells, ...asyncRow.cells },
98
- }
99
114
  const passes = await evaluateExpr({
100
115
  node: plan.having,
101
- row: havingRow,
116
+ row: contextRow,
102
117
  rows: group,
103
118
  context,
104
119
  })
105
120
  if (!passes) continue
106
121
  }
107
122
 
108
- yield asyncRow
123
+ aggregateRows.push({ row: asyncRow, group, contextRow })
124
+ }
125
+
126
+ if (plan.orderBy?.length) {
127
+ const sortedRows = await sortEntriesByTerms({
128
+ entries: aggregateRows.map((aggregateRow, idx) => ({
129
+ row: aggregateRow.contextRow,
130
+ rows: aggregateRow.group,
131
+ idx,
132
+ })),
133
+ orderBy: plan.orderBy,
134
+ context,
135
+ })
136
+ aggregateRows.splice(0, aggregateRows.length, ...sortedRows.map(({ idx }) => aggregateRows[idx]))
137
+ }
138
+
139
+ for (const { row } of aggregateRows) {
140
+ yield row
109
141
  }
110
142
  },
111
143
  }
@@ -4,12 +4,117 @@ import { executePlan } from './execute.js'
4
4
  import { compareForTerm } from './utils.js'
5
5
 
6
6
  /**
7
- * @import { AsyncRow, ExecuteContext, QueryResults, SqlPrimitive } from '../types.js'
7
+ * @import { AsyncRow, ExecuteContext, OrderByItem, QueryResults, SqlPrimitive } from '../types.js'
8
8
  * @import { SortNode } from '../plan/types.js'
9
9
  */
10
10
 
11
11
  const MAX_CHUNK = 256
12
12
 
13
+ /**
14
+ * @typedef {{
15
+ * row: AsyncRow,
16
+ * rows?: AsyncRow[],
17
+ * }} SortEntry
18
+ */
19
+
20
+ /**
21
+ * Sorts rows by ORDER BY terms while evaluating async sort keys in concurrent
22
+ * chunks and delaying later terms until earlier terms tie.
23
+ *
24
+ * @template {SortEntry} T
25
+ * @param {{
26
+ * entries: T[],
27
+ * orderBy: OrderByItem[],
28
+ * context: ExecuteContext,
29
+ * cacheValues?: boolean,
30
+ * }} options
31
+ * @returns {Promise<T[]>}
32
+ */
33
+ export async function sortEntriesByTerms({ entries, orderBy, context, cacheValues = false }) {
34
+ if (entries.length === 0) return []
35
+
36
+ /** @type {(SqlPrimitive | undefined)[][]} */
37
+ const evaluatedValues = entries.map(() => Array(orderBy.length))
38
+
39
+ /** @type {number[][]} */
40
+ let groups = [entries.map((_, i) => i)]
41
+
42
+ for (let orderByIdx = 0; orderByIdx < orderBy.length; orderByIdx++) {
43
+ const term = orderBy[orderByIdx]
44
+ /** @type {number[][]} */
45
+ const nextGroups = []
46
+
47
+ for (const group of groups) {
48
+ if (group.length <= 1) {
49
+ nextGroups.push(group)
50
+ continue
51
+ }
52
+
53
+ const alias = derivedAlias(term.expr)
54
+ /** @type {number[]} */
55
+ const missing = []
56
+ for (const idx of group) {
57
+ if (evaluatedValues[idx][orderByIdx] === undefined) missing.push(idx)
58
+ }
59
+ let chunkSize = 1
60
+ let start = 0
61
+ while (start < missing.length) {
62
+ if (context.signal?.aborted) return []
63
+ const chunk = missing.slice(start, start + chunkSize)
64
+ const values = await Promise.all(chunk.map(idx =>
65
+ evaluateExpr({
66
+ node: term.expr,
67
+ row: entries[idx].row,
68
+ rows: entries[idx].rows,
69
+ context,
70
+ })
71
+ ))
72
+ for (let i = 0; i < chunk.length; i++) {
73
+ const idx = chunk[i]
74
+ const value = values[i]
75
+ evaluatedValues[idx][orderByIdx] = value
76
+ if (cacheValues && !(alias in entries[idx].row.cells)) {
77
+ entries[idx].row.cells[alias] = () => Promise.resolve(value)
78
+ }
79
+ }
80
+ start += chunk.length
81
+ chunkSize = Math.min(chunkSize * 2, MAX_CHUNK)
82
+ }
83
+
84
+ group.sort((aIdx, bIdx) => {
85
+ const av = evaluatedValues[aIdx][orderByIdx]
86
+ const bv = evaluatedValues[bIdx][orderByIdx]
87
+ return compareForTerm(av, bv, term)
88
+ })
89
+
90
+ if (orderByIdx < orderBy.length - 1) {
91
+ /** @type {number[]} */
92
+ let currentSubGroup = [group[0]]
93
+ for (let i = 1; i < group.length; i++) {
94
+ const prevIdx = group[i - 1]
95
+ const currIdx = group[i]
96
+ const prevVal = evaluatedValues[prevIdx][orderByIdx]
97
+ const currVal = evaluatedValues[currIdx][orderByIdx]
98
+
99
+ if (compareForTerm(prevVal, currVal, term) === 0) {
100
+ currentSubGroup.push(currIdx)
101
+ } else {
102
+ nextGroups.push(currentSubGroup)
103
+ currentSubGroup = [currIdx]
104
+ }
105
+ }
106
+ nextGroups.push(currentSubGroup)
107
+ } else {
108
+ nextGroups.push(group)
109
+ }
110
+ }
111
+
112
+ groups = nextGroups
113
+ }
114
+
115
+ return groups.flat().map(idx => entries[idx])
116
+ }
117
+
13
118
  /**
14
119
  * Executes a sort operation (ORDER BY)
15
120
  *
@@ -32,92 +137,16 @@ export function executeSort(plan, context) {
32
137
  rows.push(row)
33
138
  }
34
139
 
35
- if (rows.length === 0) return
36
-
37
- // Multi-pass lazy sorting
38
- /** @type {(SqlPrimitive | undefined)[][]} */
39
- const evaluatedValues = rows.map(() => Array(plan.orderBy.length))
40
-
41
- /** @type {number[][]} */
42
- let groups = [rows.map((_, i) => i)]
43
-
44
- for (let orderByIdx = 0; orderByIdx < plan.orderBy.length; orderByIdx++) {
45
- const term = plan.orderBy[orderByIdx]
46
- /** @type {number[][]} */
47
- const nextGroups = []
48
-
49
- for (const group of groups) {
50
- if (group.length <= 1) {
51
- nextGroups.push(group)
52
- continue
53
- }
54
-
55
- // Evaluate this column for all rows in the group, in parallel
56
- // chunks that double up to MAX_CHUNK so a slow UDF doesn't serialize.
57
- // Cache each value back into the row so downstream projection can
58
- // reuse it instead of re-invoking the expression.
59
- const alias = derivedAlias(term.expr)
60
- /** @type {number[]} */
61
- const missing = []
62
- for (const idx of group) {
63
- if (evaluatedValues[idx][orderByIdx] === undefined) missing.push(idx)
64
- }
65
- let chunkSize = 1
66
- let start = 0
67
- while (start < missing.length) {
68
- if (context.signal?.aborted) return
69
- const chunk = missing.slice(start, start + chunkSize)
70
- const values = await Promise.all(chunk.map(idx =>
71
- evaluateExpr({ node: term.expr, row: rows[idx], context })
72
- ))
73
- for (let i = 0; i < chunk.length; i++) {
74
- const idx = chunk[i]
75
- const value = values[i]
76
- evaluatedValues[idx][orderByIdx] = value
77
- if (!(alias in rows[idx].cells)) {
78
- rows[idx].cells[alias] = () => Promise.resolve(value)
79
- }
80
- }
81
- start += chunk.length
82
- chunkSize = Math.min(chunkSize * 2, MAX_CHUNK)
83
- }
84
-
85
- // Sort the group by this column
86
- group.sort((aIdx, bIdx) => {
87
- const av = evaluatedValues[aIdx][orderByIdx]
88
- const bv = evaluatedValues[bIdx][orderByIdx]
89
- return compareForTerm(av, bv, term)
90
- })
91
-
92
- // Split into sub-groups based on ties
93
- if (orderByIdx < plan.orderBy.length - 1) {
94
- /** @type {number[]} */
95
- let currentSubGroup = [group[0]]
96
- for (let i = 1; i < group.length; i++) {
97
- const prevIdx = group[i - 1]
98
- const currIdx = group[i]
99
- const prevVal = evaluatedValues[prevIdx][orderByIdx]
100
- const currVal = evaluatedValues[currIdx][orderByIdx]
101
-
102
- if (compareForTerm(prevVal, currVal, term) === 0) {
103
- currentSubGroup.push(currIdx)
104
- } else {
105
- nextGroups.push(currentSubGroup)
106
- currentSubGroup = [currIdx]
107
- }
108
- }
109
- nextGroups.push(currentSubGroup)
110
- } else {
111
- nextGroups.push(group)
112
- }
113
- }
114
-
115
- groups = nextGroups
116
- }
140
+ const sortedRows = await sortEntriesByTerms({
141
+ entries: rows.map(row => ({ row })),
142
+ orderBy: plan.orderBy,
143
+ context,
144
+ cacheValues: true,
145
+ })
117
146
 
118
147
  // Yield sorted rows
119
- for (const idx of groups.flat()) {
120
- yield rows[idx]
148
+ for (const { row } of sortedRows) {
149
+ yield row
121
150
  }
122
151
  },
123
152
  }
@@ -121,6 +121,19 @@ export function maxBounds(a, b) {
121
121
  return a ?? b
122
122
  }
123
123
 
124
+ /**
125
+ * Returns true for plain object SqlPrimitive values, excluding null, arrays, and Dates.
126
+ *
127
+ * @param {SqlPrimitive} value
128
+ * @returns {value is Record<string, SqlPrimitive>}
129
+ */
130
+ export function isPlainObject(value) {
131
+ return value != null
132
+ && typeof value === 'object'
133
+ && !Array.isArray(value)
134
+ && !(value instanceof Date)
135
+ }
136
+
124
137
  /**
125
138
  * @param {SqlPrimitive} value
126
139
  * @returns {string}
@@ -1,5 +1,5 @@
1
1
  import { executeStatement } from '../execute/execute.js'
2
- import { keyify, stringify } from '../execute/utils.js'
2
+ import { isPlainObject, keyify, stringify } from '../execute/utils.js'
3
3
  import { ArgValueError, ExecutionError } from '../validation/executionErrors.js'
4
4
  import { isAggregateFunc, isMathFunc, isRegexpFunc, isSpatialFunc, isStringFunc } from '../validation/functions.js'
5
5
  import { UnknownFunctionError } from '../validation/parseErrors.js'
@@ -39,6 +39,14 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
39
39
  if (qualified in row.cells) {
40
40
  return row.cells[qualified]()
41
41
  }
42
+ const prefix = node.prefix + '.'
43
+ const prefixedColumns = row.columns.filter(col => col.startsWith(prefix))
44
+ if (prefixedColumns.length === 1) {
45
+ const value = await row.cells[prefixedColumns[0]]()
46
+ if (isPlainObject(value) && Object.prototype.hasOwnProperty.call(value, node.name)) {
47
+ return value[node.name]
48
+ }
49
+ }
42
50
  // Check outer row for correlated subquery references
43
51
  if (context.outerRow && context.outerAliases?.has(node.prefix) && node.name in context.outerRow.cells) {
44
52
  return context.outerRow.cells[node.name]()
@@ -473,6 +481,23 @@ export async function evaluateExpr({ node, row, rowIndex, rows, context }) {
473
481
  if (funcName === 'ARRAY_LENGTH' || funcName === 'CARDINALITY') {
474
482
  const arr = args[0]
475
483
  if (!Array.isArray(arr)) return null
484
+ if (funcName === 'ARRAY_LENGTH' && args.length === 2) {
485
+ const dim = args[1]
486
+ if (typeof dim !== 'number' && typeof dim !== 'bigint') return null
487
+ const d = Number(dim)
488
+ if (!Number.isInteger(d) || d < 1) return null
489
+ let level = arr
490
+ for (let i = 1; i < d; i++) {
491
+ if (!Array.isArray(level) || level.length === 0) return null
492
+ const first = level[0]
493
+ if (!Array.isArray(first)) return null
494
+ for (const item of level) {
495
+ if (!Array.isArray(item) || item.length !== first.length) return null
496
+ }
497
+ level = first
498
+ }
499
+ return level.length
500
+ }
476
501
  return arr.length
477
502
  }
478
503
 
@@ -157,6 +157,16 @@ export function parsePrimary(state) {
157
157
  if (match(state, 'dot')) {
158
158
  prefix = name
159
159
  name = expect(state, 'identifier').value
160
+ } else if (match(state, 'bracket', '[')) {
161
+ // table['column'] — string subscript is equivalent to dot access
162
+ const fieldTok = current(state)
163
+ if (fieldTok.type !== 'string') {
164
+ throw parseError(state, 'string literal')
165
+ }
166
+ consume(state)
167
+ expect(state, 'bracket', ']')
168
+ prefix = name
169
+ name = fieldTok.value
160
170
  }
161
171
 
162
172
  return {
package/src/plan/plan.js CHANGED
@@ -7,7 +7,7 @@ import { validateNoIdentifiers, validateScan, validateTableRefs } from '../valid
7
7
  import { extractColumns, fromAlias, inferSelectSourceColumns, inferStatementColumns, tableFunctionColumnNames } from './columns.js'
8
8
 
9
9
  /**
10
- * @import { AsyncDataSource, ExprNode, DerivedColumn, IdentifierNode, JoinClause, PlanSqlOptions, ScanOptions, SelectColumn, SelectStatement, SetOperationStatement, Statement, WindowFunctionNode } from '../types.js'
10
+ * @import { AsyncDataSource, ExprNode, DerivedColumn, IdentifierNode, JoinClause, OrderByItem, PlanSqlOptions, ScanOptions, SelectColumn, SelectStatement, SetOperationStatement, Statement, WindowFunctionNode } from '../types.js'
11
11
  * @import { QueryPlan, WindowSpec } from './types.d.ts'
12
12
  */
13
13
 
@@ -177,6 +177,7 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
177
177
  }
178
178
  return col
179
179
  })
180
+ const orderBy = resolveOrderByAliases(select.orderBy, aliases)
180
181
 
181
182
  // Validate qualified references in other clauses
182
183
  validateTableRefs(select.where, scopeTables)
@@ -235,7 +236,16 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
235
236
  const groupBy = aliases.size > 0
236
237
  ? select.groupBy.map(expr => resolveAliases(expr, aliases))
237
238
  : select.groupBy
238
- plan = { type: 'HashAggregate', groupBy, columns, having: select.having, child: plan }
239
+ /** @type {QueryPlan} */
240
+ const aggregatePlan = {
241
+ type: 'HashAggregate',
242
+ groupBy,
243
+ columns,
244
+ having: select.having,
245
+ child: plan,
246
+ }
247
+ if (orderBy.length) aggregatePlan.orderBy = orderBy
248
+ plan = aggregatePlan
239
249
  } else if (!select.having && !select.where && plan.type === 'Scan' && isOwnScan && isAllCountStar(select.columns)) {
240
250
  plan = { type: 'Count', table: plan.table, columns: select.columns }
241
251
  } else {
@@ -243,8 +253,8 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
243
253
  }
244
254
 
245
255
  // ORDER BY (after aggregation)
246
- if (select.orderBy.length) {
247
- plan = { type: 'Sort', orderBy: select.orderBy, child: plan }
256
+ if (orderBy.length && !select.groupBy.length) {
257
+ plan = { type: 'Sort', orderBy, child: plan }
248
258
  }
249
259
 
250
260
  // DISTINCT
@@ -267,10 +277,7 @@ function planSelect({ select, ctePlans, cteColumns, tables, parentColumns, outer
267
277
 
268
278
  // ORDER BY (before projection so it can access all columns)
269
279
  // Resolve SELECT aliases in ORDER BY expressions at plan time
270
- if (select.orderBy.length) {
271
- const orderBy = aliases.size > 0
272
- ? select.orderBy.map(term => ({ ...term, expr: resolveAliases(term.expr, aliases) }))
273
- : select.orderBy
280
+ if (orderBy.length) {
274
281
  plan = { type: 'Sort', orderBy, child: plan }
275
282
  }
276
283
 
@@ -469,6 +476,19 @@ function planJoin({ left, joins, leftTable, ctePlans, cteColumns, perTableColumn
469
476
  return plan
470
477
  }
471
478
 
479
+ /**
480
+ * Recursively replaces identifier nodes in ORDER BY terms that match SELECT
481
+ * aliases with their aliased expressions.
482
+ *
483
+ * @param {OrderByItem[]} orderBy
484
+ * @param {Map<string, ExprNode>} aliases
485
+ * @returns {OrderByItem[]}
486
+ */
487
+ function resolveOrderByAliases(orderBy, aliases) {
488
+ if (!aliases.size) return orderBy
489
+ return orderBy.map(term => ({ ...term, expr: resolveAliases(term.expr, aliases) }))
490
+ }
491
+
472
492
  /**
473
493
  * Recursively replaces identifier nodes that match SELECT aliases
474
494
  * with their aliased expressions.
@@ -492,7 +512,8 @@ function resolveAliases(node, aliases) {
492
512
  }
493
513
  if (node.type === 'function') {
494
514
  const args = node.args.map(arg => resolveAliases(arg, aliases))
495
- return { ...node, args }
515
+ if (!node.filter) return { ...node, args }
516
+ return { ...node, args, filter: resolveAliases(node.filter, aliases) }
496
517
  }
497
518
  if (node.type === 'cast') {
498
519
  return { ...node, expr: resolveAliases(node.expr, aliases) }
@@ -67,6 +67,7 @@ export interface HashAggregateNode {
67
67
  type: 'HashAggregate'
68
68
  groupBy: ExprNode[]
69
69
  columns: SelectColumn[]
70
+ orderBy?: OrderByItem[]
70
71
  having?: ExprNode
71
72
  child: QueryPlan
72
73
  }
@@ -180,7 +180,7 @@ export const FUNCTION_SIGNATURES = {
180
180
  ARRAY_AGG: { min: 1, max: 1, signature: 'expression' },
181
181
 
182
182
  // Array functions
183
- ARRAY_LENGTH: { min: 1, max: 1, signature: 'array' },
183
+ ARRAY_LENGTH: { min: 1, max: 2, signature: 'array[, dimension]' },
184
184
  ARRAY_POSITION: { min: 2, max: 2, signature: 'array, element' },
185
185
  ARRAY_SORT: { min: 1, max: 1, signature: 'array' },
186
186
  CARDINALITY: { min: 1, max: 1, signature: 'array' },