@malloydata/malloy 0.0.350 → 0.0.351

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/dialect/databricks/databricks.d.ts +94 -0
  2. package/dist/dialect/databricks/databricks.js +441 -0
  3. package/dist/dialect/databricks/dialect_functions.d.ts +2 -0
  4. package/dist/dialect/databricks/dialect_functions.js +67 -0
  5. package/dist/dialect/databricks/function_overrides.d.ts +2 -0
  6. package/dist/dialect/databricks/function_overrides.js +68 -0
  7. package/dist/dialect/databricks/index.d.ts +1 -0
  8. package/dist/dialect/databricks/index.js +22 -0
  9. package/dist/dialect/dialect.d.ts +24 -1
  10. package/dist/dialect/dialect.js +31 -1
  11. package/dist/dialect/dialect_map.js +2 -0
  12. package/dist/dialect/duckdb/duckdb.d.ts +2 -2
  13. package/dist/dialect/duckdb/duckdb.js +2 -1
  14. package/dist/dialect/index.d.ts +2 -1
  15. package/dist/dialect/index.js +3 -1
  16. package/dist/dialect/mysql/mysql.d.ts +2 -2
  17. package/dist/dialect/mysql/mysql.js +3 -2
  18. package/dist/dialect/postgres/postgres.d.ts +2 -2
  19. package/dist/dialect/postgres/postgres.js +3 -1
  20. package/dist/dialect/snowflake/snowflake.d.ts +2 -2
  21. package/dist/dialect/snowflake/snowflake.js +3 -1
  22. package/dist/dialect/standardsql/standardsql.d.ts +4 -3
  23. package/dist/dialect/standardsql/standardsql.js +6 -1
  24. package/dist/dialect/trino/trino.d.ts +2 -2
  25. package/dist/dialect/trino/trino.js +2 -1
  26. package/dist/index.d.ts +1 -1
  27. package/dist/index.js +3 -2
  28. package/dist/lang/ast/query-elements/query-base.d.ts +1 -1
  29. package/dist/lang/ast/query-elements/query-base.js +2 -2
  30. package/dist/lang/ast/query-elements/query-raw.d.ts +1 -1
  31. package/dist/lang/ast/query-elements/query-raw.js +2 -2
  32. package/dist/lang/ast/query-elements/query-reference.d.ts +1 -1
  33. package/dist/lang/ast/query-elements/query-reference.js +2 -2
  34. package/dist/lang/ast/sql-elements/sql-string.js +1 -1
  35. package/dist/lang/ast/types/query-element.d.ts +1 -1
  36. package/dist/lang/lib/Malloy/MalloyLexer.js +1 -1
  37. package/dist/lang/lib/Malloy/MalloyParser.js +1 -1
  38. package/dist/lang/lib/Malloy/MalloyParserListener.js +1 -1
  39. package/dist/lang/lib/Malloy/MalloyParserVisitor.js +1 -1
  40. package/dist/model/query_query.d.ts +2 -2
  41. package/dist/model/query_query.js +57 -19
  42. package/dist/test/resultMatchers.d.ts +5 -3
  43. package/dist/test/resultMatchers.js +160 -150
  44. package/dist/version.d.ts +1 -1
  45. package/dist/version.js +1 -1
  46. package/femto-config.motly +21 -0
  47. package/package.json +8 -6
@@ -0,0 +1,94 @@
1
+ import type { Sampling, MeasureTimeExpr, RegexMatchExpr, TimeExtractExpr, TypecastExpr, BasicAtomicTypeDef, AtomicTypeDef, ArrayLiteralNode, RecordLiteralNode } from '../../model/malloy_types';
2
+ import type { BooleanTypeSupport, CompiledOrderBy, DialectFieldList, FieldReferenceType, LateralJoinExpression, OrderByClauseType, OrderByRequest, QueryInfo } from '../dialect';
3
+ import { Dialect } from '../dialect';
4
+ import type { DialectFunctionOverloadDef } from '../functions';
5
+ export declare class DatabricksDialect extends Dialect {
6
+ name: string;
7
+ defaultNumberType: string;
8
+ defaultDecimalType: string;
9
+ udfPrefix: string;
10
+ hasFinalStage: boolean;
11
+ stringTypeName: string;
12
+ divisionIsInteger: boolean;
13
+ supportsSumDistinctFunction: boolean;
14
+ unnestWithNumbers: boolean;
15
+ defaultSampling: {
16
+ rows: number;
17
+ };
18
+ supportUnnestArrayAgg: boolean;
19
+ supportsAggDistinct: boolean;
20
+ supportsCTEinCoorelatedSubQueries: boolean;
21
+ supportsSafeCast: boolean;
22
+ dontUnionIndex: boolean;
23
+ supportsQualify: boolean;
24
+ supportsNesting: boolean;
25
+ hasLateralColumnAliasInSelect: boolean;
26
+ cantPartitionWindowFunctionsOnExpressions: boolean;
27
+ experimental: boolean;
28
+ supportsFullJoin: boolean;
29
+ supportsPipelinesInViews: boolean;
30
+ readsNestedData: boolean;
31
+ supportsComplexFilteredSources: boolean;
32
+ supportsArraysInData: boolean;
33
+ compoundObjectInSchema: boolean;
34
+ booleanType: BooleanTypeSupport;
35
+ likeEscape: boolean;
36
+ orderByClause: OrderByClauseType;
37
+ hasTimestamptz: boolean;
38
+ supportsBigIntPrecision: boolean;
39
+ maxIdentifierLength: number;
40
+ malloyTypeToSQLType(malloyType: AtomicTypeDef): string;
41
+ sqlTypeToMalloyType(sqlType: string): BasicAtomicTypeDef;
42
+ quoteTablePath(tablePath: string): string;
43
+ sqlGroupSetTable(groupSetCount: number): string;
44
+ sqlLateralJoinBag(expressions: LateralJoinExpression[]): string;
45
+ sqlOrderBy(orderTerms: string[], obr?: OrderByRequest): string;
46
+ sqlAnyValue(groupSet: number, fieldName: string): string;
47
+ private buildNamedStructExpression;
48
+ sqlAggregateTurtle(groupSet: number, fieldList: DialectFieldList, orderBy: CompiledOrderBy[] | undefined): string;
49
+ private buildArraySortComparator;
50
+ sqlAnyValueTurtle(groupSet: number, fieldList: DialectFieldList): string;
51
+ sqlAnyValueLastTurtle(name: string, groupSet: number, sqlName: string): string;
52
+ sqlCoaleseMeasuresInline(groupSet: number, fieldList: DialectFieldList): string;
53
+ sqlUnnestAlias(source: string, alias: string, _fieldList: DialectFieldList, needDistinctKey: boolean, isArray: boolean, _isInNestedPipeline: boolean): string;
54
+ sqlUnnestPipelineHead(isSingleton: boolean, sourceSQLExpression: string, _fieldList?: DialectFieldList): string;
55
+ sqlSumDistinctHashedKey(sqlDistinctKey: string): string;
56
+ sqlSumDistinct(key: string, value: string, funcName: string): string;
57
+ sqlStringAggDistinct(distinctKey: string, valueSQL: string, separatorSQL: string): string;
58
+ sqlGenerateUUID(): string;
59
+ sqlFieldReference(parentAlias: string, _parentType: FieldReferenceType, childName: string, _childType: string): string;
60
+ sqlCreateFunction(id: string, funcText: string): string;
61
+ sqlCreateFunctionCombineLastStage(lastStageName: string, fieldList: DialectFieldList): string;
62
+ sqlSelectAliasAsStruct(alias: string, fieldList: DialectFieldList): string;
63
+ sqlMaybeQuoteIdentifier(identifier: string): string;
64
+ sqlCreateTableAsSelect(tableName: string, sql: string): string;
65
+ sqlNowExpr(): string;
66
+ sqlConvertToCivilTime(expr: string, timezone: string, _typeDef: AtomicTypeDef): {
67
+ sql: string;
68
+ typeDef: AtomicTypeDef;
69
+ };
70
+ sqlConvertFromCivilTime(expr: string, timezone: string, _destTypeDef: AtomicTypeDef): string;
71
+ sqlTruncate(expr: string, unit: string, _typeDef: AtomicTypeDef, _inCivilTime: boolean, _timezone?: string): string;
72
+ sqlOffsetTime(expr: string, op: '+' | '-', magnitude: string, unit: string, _typeDef: AtomicTypeDef, _inCivilTime: boolean, _timezone?: string): string;
73
+ sqlTimeExtractExpr(qi: QueryInfo, te: TimeExtractExpr): string;
74
+ sqlCast(qi: QueryInfo, cast: TypecastExpr): string;
75
+ sqlRegexpMatch(df: RegexMatchExpr): string;
76
+ sqlDateLiteral(_qi: QueryInfo, literal: string): string;
77
+ sqlTimestampLiteral(qi: QueryInfo, literal: string, timezone: string | undefined): string;
78
+ sqlTimestamptzLiteral(_qi: QueryInfo, _literal: string, _timezone: string): string;
79
+ sqlMeasureTimeExpr(df: MeasureTimeExpr): string;
80
+ sqlSampleTable(tableSQL: string, sample: Sampling | undefined): string;
81
+ sqlLiteralString(literal: string): string;
82
+ sqlLiteralRegexp(literal: string): string;
83
+ getDialectFunctionOverrides(): {
84
+ [name: string]: DialectFunctionOverloadDef[];
85
+ };
86
+ getDialectFunctions(): {
87
+ [name: string]: DialectFunctionOverloadDef[];
88
+ };
89
+ castToString(expression: string): string;
90
+ concat(...values: string[]): string;
91
+ validateTypeName(sqlType: string): boolean;
92
+ sqlLiteralArray(lit: ArrayLiteralNode): string;
93
+ sqlLiteralRecord(lit: RecordLiteralNode): string;
94
+ }
@@ -0,0 +1,441 @@
1
+ "use strict";
2
+ /*
3
+ * Copyright Contributors to the Malloy project
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.DatabricksDialect = void 0;
8
+ const malloy_types_1 = require("../../model/malloy_types");
9
+ const utils_1 = require("../../model/utils");
10
+ const dialect_1 = require("../dialect");
11
+ const functions_1 = require("../functions");
12
+ const dialect_functions_1 = require("./dialect_functions");
13
+ const function_overrides_1 = require("./function_overrides");
14
+ const extractionMap = {
15
+ day_of_week: 'DAYOFWEEK',
16
+ day_of_year: 'DAYOFYEAR',
17
+ };
18
+ const inSeconds = {
19
+ second: 1,
20
+ minute: 60,
21
+ hour: 3600,
22
+ day: 24 * 3600,
23
+ week: 7 * 24 * 3600,
24
+ };
25
+ const databricksToMalloyTypes = {
26
+ 'tinyint': { type: 'number', numberType: 'integer' },
27
+ 'smallint': { type: 'number', numberType: 'integer' },
28
+ 'int': { type: 'number', numberType: 'integer' },
29
+ 'integer': { type: 'number', numberType: 'integer' },
30
+ 'bigint': { type: 'number', numberType: 'bigint' },
31
+ 'float': { type: 'number', numberType: 'float' },
32
+ 'double': { type: 'number', numberType: 'float' },
33
+ 'decimal': { type: 'number', numberType: 'float' },
34
+ 'string': { type: 'string' },
35
+ 'varchar': { type: 'string' },
36
+ 'char': { type: 'string' },
37
+ 'binary': { type: 'string' },
38
+ 'boolean': { type: 'boolean' },
39
+ 'date': { type: 'date' },
40
+ 'timestamp': { type: 'timestamp' },
41
+ };
42
+ class DatabricksDialect extends dialect_1.Dialect {
43
+ constructor() {
44
+ super(...arguments);
45
+ this.name = 'databricks';
46
+ this.defaultNumberType = 'DOUBLE';
47
+ this.defaultDecimalType = 'DECIMAL';
48
+ this.udfPrefix = '__udf';
49
+ this.hasFinalStage = false;
50
+ this.stringTypeName = 'STRING';
51
+ this.divisionIsInteger = false;
52
+ this.supportsSumDistinctFunction = true;
53
+ this.unnestWithNumbers = false;
54
+ this.defaultSampling = { rows: 50000 };
55
+ this.supportUnnestArrayAgg = true;
56
+ this.supportsAggDistinct = false;
57
+ this.supportsCTEinCoorelatedSubQueries = true;
58
+ this.supportsSafeCast = true;
59
+ this.dontUnionIndex = false;
60
+ this.supportsQualify = false;
61
+ this.supportsNesting = true;
62
+ this.hasLateralColumnAliasInSelect = true;
63
+ this.cantPartitionWindowFunctionsOnExpressions = true;
64
+ this.experimental = false;
65
+ this.supportsFullJoin = true;
66
+ this.supportsPipelinesInViews = false;
67
+ this.readsNestedData = false;
68
+ this.supportsComplexFilteredSources = false;
69
+ this.supportsArraysInData = false;
70
+ this.compoundObjectInSchema = false;
71
+ this.booleanType = 'supported';
72
+ this.likeEscape = false;
73
+ this.orderByClause = 'ordinal';
74
+ this.hasTimestamptz = false;
75
+ this.supportsBigIntPrecision = false;
76
+ this.maxIdentifierLength = 255;
77
+ }
78
+ malloyTypeToSQLType(malloyType) {
79
+ switch (malloyType.type) {
80
+ case 'number':
81
+ if (malloyType.numberType === 'integer') {
82
+ return 'INT';
83
+ }
84
+ else if (malloyType.numberType === 'bigint') {
85
+ return 'BIGINT';
86
+ }
87
+ else {
88
+ return 'DOUBLE';
89
+ }
90
+ case 'string':
91
+ return 'STRING';
92
+ case 'boolean':
93
+ return 'BOOLEAN';
94
+ case 'record': {
95
+ const fields = [];
96
+ for (const f of malloyType.fields) {
97
+ if ((0, malloy_types_1.isAtomic)(f)) {
98
+ fields.push(`${this.sqlMaybeQuoteIdentifier(f.name)}: ${this.malloyTypeToSQLType(f)}`);
99
+ }
100
+ }
101
+ return `STRUCT<${fields.join(', ')}>`;
102
+ }
103
+ case 'array': {
104
+ if ((0, malloy_types_1.isRepeatedRecord)(malloyType)) {
105
+ const fields = [];
106
+ for (const f of malloyType.fields) {
107
+ if ((0, malloy_types_1.isAtomic)(f)) {
108
+ fields.push(`${this.sqlMaybeQuoteIdentifier(f.name)}: ${this.malloyTypeToSQLType(f)}`);
109
+ }
110
+ }
111
+ return `ARRAY<STRUCT<${fields.join(', ')}>>`;
112
+ }
113
+ return `ARRAY<${this.malloyTypeToSQLType(malloyType.elementTypeDef)}>`;
114
+ }
115
+ case 'timestamp':
116
+ return 'TIMESTAMP';
117
+ case 'sql native':
118
+ return malloyType.rawType || 'STRING';
119
+ default:
120
+ return malloyType.type.toUpperCase();
121
+ }
122
+ }
123
+ sqlTypeToMalloyType(sqlType) {
124
+ var _a, _b;
125
+ const baseSqlType = (_b = (_a = sqlType.match(/^(\w+)/)) === null || _a === void 0 ? void 0 : _a.at(0)) !== null && _b !== void 0 ? _b : sqlType;
126
+ return (databricksToMalloyTypes[baseSqlType.toLowerCase()] || {
127
+ type: 'sql native',
128
+ rawType: baseSqlType,
129
+ });
130
+ }
131
+ quoteTablePath(tablePath) {
132
+ return tablePath
133
+ .split('.')
134
+ .map(part => (/^[a-zA-Z_]\w*$/.test(part) ? part : `\`${part}\``))
135
+ .join('.');
136
+ }
137
+ sqlGroupSetTable(groupSetCount) {
138
+ return `LATERAL VIEW EXPLODE(SEQUENCE(0, ${groupSetCount})) group_set AS group_set`;
139
+ }
140
+ sqlLateralJoinBag(expressions) {
141
+ // Use LATERAL VIEW INLINE to produce a single-row lateral join with named
142
+ // columns. This must be LATERAL VIEW (not LEFT JOIN LATERAL) because
143
+ // Databricks requires all LATERAL VIEWs to come after regular JOINs,
144
+ // and group_set uses LATERAL VIEW EXPLODE.
145
+ const structArgs = expressions
146
+ .map(e => `'${e.name.replace(/`/g, '')}', ${e.sql}`)
147
+ .join(', ');
148
+ const aliases = expressions.map(e => e.name).join(', ');
149
+ return `LATERAL VIEW INLINE(ARRAY(named_struct(${structArgs}))) __lateral_join_bag AS ${aliases}\n`;
150
+ }
151
+ sqlOrderBy(orderTerms, obr) {
152
+ if (obr === 'analytical' || obr === 'turtle') {
153
+ return `ORDER BY ${orderTerms.join(',')}`;
154
+ }
155
+ return `ORDER BY ${orderTerms.map(t => `${t} NULLS LAST`).join(',')}`;
156
+ }
157
+ sqlAnyValue(groupSet, fieldName) {
158
+ return `FIRST(CASE WHEN group_set=${groupSet} THEN ${fieldName} END) IGNORE NULLS`;
159
+ }
160
+ // Build a named_struct expression that creates struct fields with the
161
+ // correct output names. This avoids CAST(STRUCT(...) AS STRUCT<...>) which
162
+ // can lose nested complex type information (e.g. inner array-of-struct
163
+ // field names) in Databricks.
164
+ buildNamedStructExpression(fieldList) {
165
+ return ('named_struct(' +
166
+ fieldList.map(f => `'${f.rawName}', ${f.sqlExpression}`).join(', ') +
167
+ ')');
168
+ }
169
+ sqlAggregateTurtle(groupSet, fieldList, orderBy) {
170
+ const namedStruct = this.buildNamedStructExpression(fieldList);
171
+ const collectExpr = `COLLECT_LIST(${namedStruct}) FILTER (WHERE group_set=${groupSet})`;
172
+ if (!orderBy || orderBy.length === 0) {
173
+ return collectExpr;
174
+ }
175
+ return `ARRAY_SORT(${collectExpr}, (l, r) -> ${this.buildArraySortComparator(orderBy)})`;
176
+ }
177
+ // Build a lambda comparator for ARRAY_SORT that handles multi-field
178
+ // mixed-direction ordering. Each field comparison returns -1/0/1;
179
+ // fields are chained so that ties on earlier fields fall through to
180
+ // later fields.
181
+ buildArraySortComparator(orderBy) {
182
+ const result = orderBy.reduceRight((fallthrough, ob) => {
183
+ const asc = ob.dir === 'asc';
184
+ const lt = asc ? -1 : 1;
185
+ const gt = asc ? 1 : -1;
186
+ const f = ob.structField;
187
+ return [
188
+ 'CASE',
189
+ ` WHEN l.${f} IS NULL AND r.${f} IS NULL THEN ${fallthrough}`,
190
+ ` WHEN l.${f} IS NULL THEN 1`,
191
+ ` WHEN r.${f} IS NULL THEN -1`,
192
+ ` WHEN l.${f} < r.${f} THEN ${lt}`,
193
+ ` WHEN l.${f} > r.${f} THEN ${gt}`,
194
+ ` ELSE ${fallthrough}`,
195
+ 'END',
196
+ ].join('\n');
197
+ }, '0');
198
+ return result;
199
+ }
200
+ sqlAnyValueTurtle(groupSet, fieldList) {
201
+ const namedStruct = this.buildNamedStructExpression(fieldList);
202
+ return `FIRST(CASE WHEN group_set=${groupSet} THEN ${namedStruct} END) IGNORE NULLS`;
203
+ }
204
+ sqlAnyValueLastTurtle(name, groupSet, sqlName) {
205
+ return `FIRST(CASE WHEN group_set=${groupSet} THEN ${name} END) IGNORE NULLS as ${sqlName}`;
206
+ }
207
+ sqlCoaleseMeasuresInline(groupSet, fieldList) {
208
+ const namedStruct = this.buildNamedStructExpression(fieldList);
209
+ const nullStruct = 'named_struct(' +
210
+ fieldList.map(f => `'${f.rawName}', NULL`).join(', ') +
211
+ ')';
212
+ return `COALESCE(FIRST(CASE WHEN group_set=${groupSet} THEN ${namedStruct} END) IGNORE NULLS, ${nullStruct})`;
213
+ }
214
+ // Use LATERAL VIEW EXPLODE instead of LEFT JOIN LATERAL EXPLODE.
215
+ // LEFT JOIN LATERAL EXPLODE has a Databricks bug where struct field
216
+ // access on the exploded column returns null.
217
+ sqlUnnestAlias(source, alias, _fieldList, needDistinctKey, isArray, _isInNestedPipeline) {
218
+ if (isArray) {
219
+ if (needDistinctKey) {
220
+ return `LATERAL VIEW OUTER POSEXPLODE(${source}) ${alias} AS __row_id_from_${alias}, value`;
221
+ }
222
+ return `LATERAL VIEW OUTER EXPLODE(${source}) ${alias} AS value`;
223
+ }
224
+ if (needDistinctKey) {
225
+ return `LATERAL VIEW OUTER POSEXPLODE(${source}) ${alias}_outer AS __row_id_from_${alias}, ${alias}`;
226
+ }
227
+ return `LATERAL VIEW OUTER EXPLODE(${source}) ${alias}_outer AS ${alias}`;
228
+ }
229
+ sqlUnnestPipelineHead(isSingleton, sourceSQLExpression, _fieldList) {
230
+ let p = sourceSQLExpression;
231
+ if (isSingleton) {
232
+ p = `ARRAY(${p})`;
233
+ }
234
+ return `EXPLODE(${p})`;
235
+ }
236
+ // Two-chunk MD5 hash: upper 15 hex chars * 4294967296 + lower 8 hex chars
237
+ // gives ~88 bits of entropy, matching the pattern used by Snowflake/Trino.
238
+ sqlSumDistinctHashedKey(sqlDistinctKey) {
239
+ const castKey = `CAST(${sqlDistinctKey} AS STRING)`;
240
+ const upper = `CAST(CONV(SUBSTRING(MD5(${castKey}), 1, 15), 16, 10) AS DECIMAL(38,0)) * 4294967296`;
241
+ const lower = `CAST(CONV(SUBSTRING(MD5(${castKey}), 16, 8), 16, 10) AS DECIMAL(38,0))`;
242
+ return `(${upper} + ${lower})`;
243
+ }
244
+ // Scale the value to integer before adding to hash, then divide after
245
+ // subtraction. This keeps all arithmetic in integer space and avoids
246
+ // DECIMAL precision/overflow issues (Databricks max is DECIMAL(38,x)).
247
+ sqlSumDistinct(key, value, funcName) {
248
+ const hashKey = this.sqlSumDistinctHashedKey(key);
249
+ const scale = 100000000.0;
250
+ const v = `CAST(COALESCE(${value},0)*${scale} AS DECIMAL(38,0))`;
251
+ const sqlSum = `(SUM(DISTINCT ${hashKey} + ${v}) - SUM(DISTINCT ${hashKey}))/${scale}`;
252
+ if (funcName === 'SUM') {
253
+ return sqlSum;
254
+ }
255
+ else if (funcName === 'AVG') {
256
+ return `(${sqlSum})/NULLIF(COUNT(DISTINCT CASE WHEN ${value} IS NOT NULL THEN ${key} END),0)`;
257
+ }
258
+ throw new Error(`Unknown Symmetric Aggregate function ${funcName}`);
259
+ }
260
+ sqlStringAggDistinct(distinctKey, valueSQL, separatorSQL) {
261
+ const sep = separatorSQL.length > 0 ? separatorSQL : "','";
262
+ return `ARRAY_JOIN(TRANSFORM(COLLECT_SET(NAMED_STRUCT('k', ${distinctKey}, 'v', ${valueSQL})), x -> x.v), ${sep})`;
263
+ }
264
+ sqlGenerateUUID() {
265
+ return 'UUID()';
266
+ }
267
+ sqlFieldReference(parentAlias, _parentType, childName, _childType) {
268
+ if (childName === '__row_id') {
269
+ return `__row_id_from_${parentAlias}`;
270
+ }
271
+ return `${parentAlias}.${this.sqlMaybeQuoteIdentifier(childName)}`;
272
+ }
273
+ sqlCreateFunction(id, funcText) {
274
+ return `CREATE TEMPORARY FUNCTION ${id}(param STRING) RETURNS STRING RETURN (\n${(0, utils_1.indent)(funcText)}\n);\n`;
275
+ }
276
+ sqlCreateFunctionCombineLastStage(lastStageName, fieldList) {
277
+ const namedStruct = this.buildNamedStructExpression(fieldList);
278
+ return `SELECT COLLECT_LIST(${namedStruct}) FROM ${lastStageName}\n`;
279
+ }
280
+ sqlSelectAliasAsStruct(alias, fieldList) {
281
+ const fields = fieldList
282
+ .map(f => `${alias}.${this.sqlMaybeQuoteIdentifier(f.rawName)}`)
283
+ .join(', ');
284
+ return `STRUCT(${fields})`;
285
+ }
286
+ sqlMaybeQuoteIdentifier(identifier) {
287
+ return '`' + identifier.replace(/`/g, '``') + '`';
288
+ }
289
+ sqlCreateTableAsSelect(tableName, sql) {
290
+ return `CREATE TABLE ${tableName} AS ${sql}`;
291
+ }
292
+ sqlNowExpr() {
293
+ return 'CURRENT_TIMESTAMP()';
294
+ }
295
+ sqlConvertToCivilTime(expr, timezone, _typeDef) {
296
+ // Databricks has no timestamptz type; timestamps are stored as UTC
297
+ // Convert from UTC to local timezone for civil time operations
298
+ return {
299
+ sql: `FROM_UTC_TIMESTAMP(${expr}, '${timezone}')`,
300
+ typeDef: { type: 'timestamp' },
301
+ };
302
+ }
303
+ sqlConvertFromCivilTime(expr, timezone, _destTypeDef) {
304
+ // Convert from local timezone back to UTC
305
+ return `TO_UTC_TIMESTAMP(${expr}, '${timezone}')`;
306
+ }
307
+ sqlTruncate(expr, unit, _typeDef, _inCivilTime, _timezone) {
308
+ // Databricks DATE_TRUNC starts weeks on Monday; Malloy wants Sunday.
309
+ // Add 1 day before truncating, subtract 1 day after.
310
+ if (unit === 'week') {
311
+ return `(DATE_TRUNC('${unit}', ${expr} + INTERVAL 1 DAY) - INTERVAL 1 DAY)`;
312
+ }
313
+ return `DATE_TRUNC('${unit}', ${expr})`;
314
+ }
315
+ sqlOffsetTime(expr, op, magnitude, unit, _typeDef, _inCivilTime, _timezone) {
316
+ // Use TIMESTAMPADD which accepts expressions for magnitude,
317
+ // unlike INTERVAL which only accepts literals in Databricks.
318
+ let offsetUnit = unit.toUpperCase();
319
+ let offsetMag = magnitude;
320
+ if (unit === 'quarter') {
321
+ offsetUnit = 'MONTH';
322
+ offsetMag = `(${magnitude})*3`;
323
+ }
324
+ else if (unit === 'week') {
325
+ offsetUnit = 'DAY';
326
+ offsetMag = `(${magnitude})*7`;
327
+ }
328
+ const n = op === '+' ? offsetMag : `-(${offsetMag})`;
329
+ return `TIMESTAMPADD(${offsetUnit}, ${n}, ${expr})`;
330
+ }
331
+ sqlTimeExtractExpr(qi, te) {
332
+ const units = extractionMap[te.units] || te.units;
333
+ let extractFrom = te.e.sql;
334
+ if (malloy_types_1.TD.isTimestamp(te.e.typeDef)) {
335
+ const tz = (0, dialect_1.qtz)(qi);
336
+ if (tz) {
337
+ extractFrom = `FROM_UTC_TIMESTAMP(${extractFrom}, '${tz}')`;
338
+ }
339
+ }
340
+ if (extractionMap[te.units]) {
341
+ // DAYOFWEEK, DAYOFYEAR are functions
342
+ return `${units}(${extractFrom})`;
343
+ }
344
+ return `EXTRACT(${units} FROM ${extractFrom})`;
345
+ }
346
+ sqlCast(qi, cast) {
347
+ const srcSQL = cast.e.sql || 'internal-error-in-sql-generation';
348
+ const { op, srcTypeDef, dstTypeDef, dstSQLType } = this.sqlCastPrep(cast);
349
+ const tz = (0, dialect_1.qtz)(qi);
350
+ if (op === 'timestamp::date' && tz) {
351
+ return `CAST(FROM_UTC_TIMESTAMP(${srcSQL}, '${tz}') AS DATE)`;
352
+ }
353
+ else if (op === 'date::timestamp' && tz) {
354
+ return `TO_UTC_TIMESTAMP(CAST(${srcSQL} AS TIMESTAMP_NTZ), '${tz}')`;
355
+ }
356
+ if (!malloy_types_1.TD.eq(srcTypeDef, dstTypeDef)) {
357
+ if (cast.safe) {
358
+ return `TRY_CAST(${srcSQL} AS ${dstSQLType})`;
359
+ }
360
+ if (malloy_types_1.TD.isString(dstTypeDef)) {
361
+ return `CAST(${srcSQL} AS STRING)`;
362
+ }
363
+ return `CAST(${srcSQL} AS ${dstSQLType})`;
364
+ }
365
+ return srcSQL;
366
+ }
367
+ sqlRegexpMatch(df) {
368
+ return `REGEXP_LIKE(${df.kids.expr.sql}, ${df.kids.regex.sql})`;
369
+ }
370
+ sqlDateLiteral(_qi, literal) {
371
+ return `DATE '${literal}'`;
372
+ }
373
+ sqlTimestampLiteral(qi, literal, timezone) {
374
+ const tz = timezone || (0, dialect_1.qtz)(qi);
375
+ if (tz) {
376
+ return `TO_UTC_TIMESTAMP(TIMESTAMP_NTZ '${literal}', '${tz}')`;
377
+ }
378
+ return `TIMESTAMP '${literal}'`;
379
+ }
380
+ sqlTimestamptzLiteral(_qi, _literal, _timezone) {
381
+ throw new Error('Databricks does not support timestamptz');
382
+ }
383
+ sqlMeasureTimeExpr(df) {
384
+ let lVal = df.kids.left.sql;
385
+ let rVal = df.kids.right.sql;
386
+ if (inSeconds[df.units]) {
387
+ lVal = `UNIX_MICROS(CAST(${lVal} AS TIMESTAMP))`;
388
+ rVal = `UNIX_MICROS(CAST(${rVal} AS TIMESTAMP))`;
389
+ const duration = `(${rVal}-${lVal})`;
390
+ const divisor = inSeconds[df.units] * 1000000;
391
+ return `FLOOR(${duration}/${divisor}.0)`;
392
+ }
393
+ throw new Error(`Unknown or unhandled Databricks time unit: ${df.units}`);
394
+ }
395
+ sqlSampleTable(tableSQL, sample) {
396
+ if (sample !== undefined) {
397
+ if ((0, malloy_types_1.isSamplingEnable)(sample) && sample.enable) {
398
+ sample = this.defaultSampling;
399
+ }
400
+ if ((0, malloy_types_1.isSamplingRows)(sample)) {
401
+ return `(SELECT * FROM ${tableSQL} LIMIT ${sample.rows})`;
402
+ }
403
+ else if ((0, malloy_types_1.isSamplingPercent)(sample)) {
404
+ return `(SELECT * FROM ${tableSQL} TABLESAMPLE (${sample.percent} PERCENT))`;
405
+ }
406
+ }
407
+ return tableSQL;
408
+ }
409
+ sqlLiteralString(literal) {
410
+ const noVirgule = literal.replace(/\\/g, '\\\\');
411
+ return "'" + noVirgule.replace(/'/g, "\\'") + "'";
412
+ }
413
+ sqlLiteralRegexp(literal) {
414
+ return "'" + literal.replace(/'/g, "''") + "'";
415
+ }
416
+ getDialectFunctionOverrides() {
417
+ return (0, functions_1.expandOverrideMap)(function_overrides_1.DATABRICKS_MALLOY_STANDARD_OVERLOADS);
418
+ }
419
+ getDialectFunctions() {
420
+ return (0, functions_1.expandBlueprintMap)(dialect_functions_1.DATABRICKS_DIALECT_FUNCTIONS);
421
+ }
422
+ castToString(expression) {
423
+ return `CAST(${expression} AS STRING)`;
424
+ }
425
+ concat(...values) {
426
+ return `CONCAT(${values.join(',')})`;
427
+ }
428
+ validateTypeName(sqlType) {
429
+ return sqlType.match(/^[A-Za-z\s(),0-9_]*$/) !== null;
430
+ }
431
+ sqlLiteralArray(lit) {
432
+ const array = lit.kids.values.map(val => val.sql);
433
+ return `ARRAY(${array.join(',')})`;
434
+ }
435
+ sqlLiteralRecord(lit) {
436
+ const pairs = Object.entries(lit.kids).map(([propName, propVal]) => `${this.sqlLiteralString(propName)}, ${propVal.sql}`);
437
+ return `NAMED_STRUCT(${pairs.join(', ')})`;
438
+ }
439
+ }
440
+ exports.DatabricksDialect = DatabricksDialect;
441
+ //# sourceMappingURL=databricks.js.map
@@ -0,0 +1,2 @@
1
+ import type { DefinitionBlueprintMap } from '../functions/util';
2
+ export declare const DATABRICKS_DIALECT_FUNCTIONS: DefinitionBlueprintMap;
@@ -0,0 +1,67 @@
1
+ "use strict";
2
+ /*
3
+ * Copyright Contributors to the Malloy project
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.DATABRICKS_DIALECT_FUNCTIONS = void 0;
8
+ const util_1 = require("../functions/util");
9
+ /*
10
+ * Databricks dialect function definitions.
11
+ *
12
+ * For simple functions, use the def() shorthand with the T convention:
13
+ * ...def('func_name', {'arg': 'type'}, 'return_type')
14
+ *
15
+ * For functions needing SQL templates or multiple overloads,
16
+ * use full DefinitionBlueprint / OverloadedDefinitionBlueprint objects.
17
+ */
18
+ // Shortcut so you can write things like: {array: T} and {dimension: T}
19
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
20
+ const T = { generic: 'T' };
21
+ // Databricks COLLECT_LIST/COLLECT_SET do not support ORDER BY inside
22
+ // the aggregate call. Ordering is not supported for string_agg on
23
+ // Databricks. See databricks-string-agg.md for details.
24
+ const string_agg = {
25
+ default_separator: {
26
+ takes: { 'value': { dimension: 'string' } },
27
+ returns: { measure: 'string' },
28
+ impl: {
29
+ sql: "ARRAY_JOIN(COLLECT_LIST(${value}), ',')",
30
+ },
31
+ },
32
+ with_separator: {
33
+ takes: {
34
+ 'value': { dimension: 'string' },
35
+ 'separator': { literal: 'string' },
36
+ },
37
+ returns: { measure: 'string' },
38
+ impl: {
39
+ sql: 'ARRAY_JOIN(COLLECT_LIST(${value}), ${separator})',
40
+ },
41
+ },
42
+ };
43
+ const string_agg_distinct = {
44
+ default_separator: {
45
+ ...string_agg['default_separator'],
46
+ isSymmetric: true,
47
+ impl: {
48
+ sql: "ARRAY_JOIN(COLLECT_SET(${value}), ',')",
49
+ },
50
+ },
51
+ with_separator: {
52
+ ...string_agg['with_separator'],
53
+ isSymmetric: true,
54
+ impl: {
55
+ sql: 'ARRAY_JOIN(COLLECT_SET(${value}), ${separator})',
56
+ },
57
+ },
58
+ };
59
+ exports.DATABRICKS_DIALECT_FUNCTIONS = {
60
+ // Aggregate functions
61
+ string_agg,
62
+ string_agg_distinct,
63
+ // Scalar functions
64
+ ...(0, util_1.def)('repeat', { 'str': 'string', 'n': 'number' }, 'string'),
65
+ ...(0, util_1.def)('reverse', { 'str': 'string' }, 'string'),
66
+ };
67
+ //# sourceMappingURL=dialect_functions.js.map
@@ -0,0 +1,2 @@
1
+ import type { MalloyStandardFunctionImplementations as OverrideMap } from '../functions/malloy_standard_functions';
2
+ export declare const DATABRICKS_MALLOY_STANDARD_OVERLOADS: OverrideMap;
@@ -0,0 +1,68 @@
1
+ "use strict";
2
+ /*
3
+ * Copyright Contributors to the Malloy project
4
+ * SPDX-License-Identifier: MIT
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.DATABRICKS_MALLOY_STANDARD_OVERLOADS = void 0;
8
+ // Template strings use ${...values} for Malloy parameter interpolation.
9
+ // The ELSE branch uses a JS template literal to inject `name`, so the
10
+ // Malloy parameter reference must be escaped as \${...values}.
11
+ function greatestOrLeastSQL(name) {
12
+ return ('CASE' +
13
+ ' WHEN SIZE(FILTER(ARRAY(${...values}), x -> x IS NULL)) > 0' +
14
+ ' THEN NULL' +
15
+ ` ELSE ${name}(\${...values})` +
16
+ ' END');
17
+ }
18
+ exports.DATABRICKS_MALLOY_STANDARD_OVERLOADS = {
19
+ // Databricks REGEXP_EXTRACT defaults to group index 1 (first capture group),
20
+ // but Malloy expects the full match (group 0). Explicitly pass idx=0.
21
+ regexp_extract: { sql: 'REGEXP_EXTRACT(${value}, ${pattern}, 0)' },
22
+ replace: {
23
+ regular_expression: {
24
+ sql: 'REGEXP_REPLACE(${value}, ${pattern}, ${replacement})',
25
+ },
26
+ },
27
+ trunc: {
28
+ to_integer: {
29
+ sql: 'CAST(${value} AS BIGINT)',
30
+ },
31
+ to_precision: {
32
+ sql: '(ABS(FLOOR(${value} * POW(10,${precision}))/POW(10,${precision}))*IF(${value} < 0, -1, 1))',
33
+ },
34
+ },
35
+ log: { sql: 'LOG(${base},${value})' },
36
+ div: { sql: 'FLOOR(${dividend} / ${divisor})' },
37
+ strpos: { sql: 'LOCATE(${search_string},${test_string})' },
38
+ starts_with: { sql: 'COALESCE(STARTSWITH(${value},${prefix}), false)' },
39
+ ends_with: { sql: 'COALESCE(ENDSWITH(${value},${suffix}), false)' },
40
+ trim: {
41
+ characters: {
42
+ sql: 'TRIM(BOTH ${trim_characters} FROM ${value})',
43
+ },
44
+ },
45
+ ltrim: {
46
+ characters: {
47
+ sql: 'TRIM(LEADING ${trim_characters} FROM ${value})',
48
+ },
49
+ },
50
+ rtrim: {
51
+ characters: {
52
+ sql: 'TRIM(TRAILING ${trim_characters} FROM ${value})',
53
+ },
54
+ },
55
+ byte_length: { sql: 'OCTET_LENGTH(${value})' },
56
+ chr: { sql: 'CHR(${value})' },
57
+ // Databricks has no IS_INF/IS_NAN functions; compare to special float values
58
+ is_inf: {
59
+ sql: "COALESCE(${value} = DOUBLE('infinity') OR ${value} = DOUBLE('-infinity'), false)",
60
+ },
61
+ is_nan: { sql: "COALESCE(${value} = DOUBLE('NaN'), false)" },
62
+ // Databricks ASCII() returns the Unicode codepoint of the first character
63
+ unicode: { function: 'ASCII' },
64
+ // Databricks GREATEST/LEAST skip nulls; Malloy expects null propagation
65
+ greatest: { sql: greatestOrLeastSQL('GREATEST') },
66
+ least: { sql: greatestOrLeastSQL('LEAST') },
67
+ };
68
+ //# sourceMappingURL=function_overrides.js.map
@@ -0,0 +1 @@
1
+ export * from './databricks';