@malloydata/db-snowflake 0.0.375 → 0.0.377

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/index.js +29 -2
  2. package/dist/index.js.map +1 -1
  3. package/dist/snowflake_connection.d.ts +48 -13
  4. package/dist/snowflake_connection.js +144 -228
  5. package/dist/snowflake_connection.js.map +1 -1
  6. package/dist/snowflake_connection.spec.js +179 -14
  7. package/dist/snowflake_connection.spec.js.map +1 -1
  8. package/dist/snowflake_sample_strategy.spec.js +97 -0
  9. package/dist/snowflake_sample_strategy.spec.js.map +1 -0
  10. package/dist/snowflake_table_name.d.ts +19 -0
  11. package/dist/snowflake_table_name.js +80 -0
  12. package/dist/snowflake_table_name.js.map +1 -0
  13. package/dist/snowflake_variant_schema.d.ts +43 -0
  14. package/dist/snowflake_variant_schema.js +203 -0
  15. package/dist/snowflake_variant_schema.js.map +1 -0
  16. package/dist/snowflake_variant_schema.spec.js +150 -0
  17. package/dist/snowflake_variant_schema.spec.js.map +1 -0
  18. package/package.json +2 -2
  19. package/src/index.ts +34 -1
  20. package/src/snowflake_connection.spec.ts +219 -15
  21. package/src/snowflake_connection.ts +218 -262
  22. package/src/snowflake_sample_strategy.spec.ts +130 -0
  23. package/src/snowflake_table_name.ts +94 -0
  24. package/src/snowflake_variant_schema.spec.ts +188 -0
  25. package/src/snowflake_variant_schema.ts +301 -0
  26. package/dist/snowflake_executor.spec.js +0 -89
  27. package/dist/snowflake_executor.spec.js.map +0 -1
  28. package/dist/snowflake_setup.spec.js +0 -76
  29. package/dist/snowflake_setup.spec.js.map +0 -1
  30. package/src/snowflake_executor.spec.ts +0 -103
  31. package/src/snowflake_setup.spec.ts +0 -56
  32. /package/dist/{snowflake_executor.spec.d.ts → snowflake_sample_strategy.spec.d.ts} +0 -0
  33. /package/dist/{snowflake_setup.spec.d.ts → snowflake_variant_schema.spec.d.ts} +0 -0
@@ -34,27 +34,69 @@ import type {
34
34
  StructDef,
35
35
  QueryRecord,
36
36
  TestableConnection,
37
- Dialect,
38
- RecordDef,
39
- AtomicFieldDef,
40
- ArrayDef,
41
37
  SQLSourceRequest,
42
38
  } from '@malloydata/malloy';
43
- import {
44
- SnowflakeDialect,
45
- TinyParser,
46
- mkArrayDef,
47
- sqlKey,
48
- makeDigest,
49
- } from '@malloydata/malloy';
39
+ import {SnowflakeDialect, sqlKey, makeDigest} from '@malloydata/malloy';
50
40
  import {BaseConnection} from '@malloydata/malloy/connection';
51
41
 
52
42
  import {SnowflakeExecutor} from './snowflake_executor';
43
+ import {
44
+ accumulateVariantPath,
45
+ buildTopLevelField,
46
+ createVariantSchemaState,
47
+ PathParser,
48
+ seedTopLevelShape,
49
+ } from './snowflake_variant_schema';
50
+ import type {NestedColumn} from './snowflake_variant_schema';
51
+ import {parseSnowflakeTableName} from './snowflake_table_name';
53
52
  import type {ConnectionOptions} from 'snowflake-sdk';
54
53
  import type {Options as PoolOptions} from 'generic-pool';
55
54
 
56
55
  type namespace = {database: string; schema: string};
57
56
 
57
+ /**
58
+ * Output of the INFORMATION_SCHEMA.TABLES probe. Undefined when the
59
+ * probe didn't run (non-parseable name) or couldn't find numeric size
60
+ * info (views, missing permissions).
61
+ */
62
+ export interface TableSizeProbe {
63
+ bytes: number;
64
+ rowCount: number;
65
+ }
66
+
67
+ /**
68
+ * Three-way tier that drives variant schema sampling. Extracted as a
69
+ * pure function so cost-policy decisions are unit-testable.
70
+ *
71
+ * full-scan-then-sample: probe confirmed a small base table. One
72
+ * full scan catches rare fields. On failure, fall through to the
73
+ * sample chain rather than accept opaque variant.
74
+ *
75
+ * tablesample-only: probe confirmed a base table above the small
76
+ * threshold. TABLESAMPLE BLOCK is safe (reads a few micro
77
+ * partitions). Plain LIMIT without a WHERE is unsafe on large
78
+ * partitioned tables, so we skip the LIMIT fallback — we'd rather
79
+ * degrade to variant than issue a runaway query.
80
+ *
81
+ * tablesample-then-limit: probe gave no size info (views, temp
82
+ * views, exotic names). We can't distinguish a small view from a
83
+ * view over a petabyte table, so we do best-effort sampling. This
84
+ * is the acknowledged "can't help you" case from the design doc.
85
+ */
86
+ export type SampleStrategy =
87
+ | 'full-scan-then-sample'
88
+ | 'tablesample-only'
89
+ | 'tablesample-then-limit';
90
+
91
+ export function pickSampleStrategy(
92
+ probe: TableSizeProbe | undefined,
93
+ fullScanMaxBytes: number
94
+ ): SampleStrategy {
95
+ if (probe === undefined) return 'tablesample-then-limit';
96
+ if (probe.bytes <= fullScanMaxBytes) return 'full-scan-then-sample';
97
+ return 'tablesample-only';
98
+ }
99
+
58
100
  export interface SnowflakeConnectionOptions {
59
101
  // snowflake sdk connection options
60
102
  connOptions?: ConnectionOptions;
@@ -74,155 +116,18 @@ export interface SnowflakeConnectionOptions {
74
116
  // Timeout for the variant schema sampling query (default 2 minutes)
75
117
  schemaSampleTimeoutMs?: number;
76
118
 
77
- // SQL statements to run when a connection is acquired from the pool
78
- setupSQL?: string;
79
- }
80
-
81
- type PathChain =
82
- | {arrayRef: true; next?: PathChain}
83
- | {name: string; next?: PathChain};
84
-
85
- class SnowField {
86
- constructor(
87
- readonly name: string,
88
- readonly type: string,
89
- readonly dialect: Dialect
90
- ) {}
91
- fieldDef(): AtomicFieldDef {
92
- return {
93
- ...this.dialect.sqlTypeToMalloyType(this.type),
94
- name: this.name,
95
- };
96
- }
97
- walk(_path: PathChain, _fieldType: string): void {
98
- throw new Error(
99
- 'SNOWWFLAKE SCHEMA PARSE ERROR: Should not walk through fields'
100
- );
101
- }
102
- static make(name: string, fieldType: string, d: Dialect) {
103
- if (fieldType === 'array') {
104
- return new SnowArray(name, d);
105
- } else if (fieldType === 'object') {
106
- return new SnowObject(name, d);
107
- }
108
- return new SnowField(name, fieldType, d);
109
- }
110
- }
111
-
112
- class SnowObject extends SnowField {
113
- fieldMap = new Map<string, SnowField>();
114
- constructor(name: string, d: Dialect) {
115
- super(name, 'object', d);
116
- }
117
-
118
- get fields(): AtomicFieldDef[] {
119
- const fields: AtomicFieldDef[] = [];
120
- for (const [_, fieldObj] of this.fieldMap) {
121
- fields.push(fieldObj.fieldDef());
122
- }
123
- return fields;
124
- }
125
-
126
- fieldDef(): RecordDef {
127
- const rec: RecordDef = {
128
- type: 'record',
129
- name: this.name,
130
- fields: this.fields,
131
- join: 'one',
132
- };
133
- return rec;
134
- }
135
-
136
- walk(path: PathChain, fieldType: string) {
137
- if ('name' in path) {
138
- const field = this.fieldMap.get(path.name);
139
- if (path.next) {
140
- if (field instanceof SnowObject || field instanceof SnowArray) {
141
- field.walk(path.next, fieldType);
142
- return;
143
- }
144
- // Field is missing or is a scalar leaf — the variant data has
145
- // inconsistent structure across rows. Degrade to opaque variant.
146
- this.fieldMap.set(
147
- path.name,
148
- new SnowField(path.name, 'variant', this.dialect)
149
- );
150
- return;
151
- } else {
152
- if (!field) {
153
- this.fieldMap.set(
154
- path.name,
155
- SnowField.make(path.name, fieldType, this.dialect)
156
- );
157
- return;
158
- }
159
- }
160
- return;
161
- }
162
- // Array reference in an object context — inconsistent structure.
163
- // Ignore this path; the object keeps whatever fields it already has.
164
- }
165
- }
166
-
167
- class SnowArray extends SnowField {
168
- arrayOf = 'unknown';
169
- objectChild?: SnowObject;
170
- arrayChild?: SnowArray;
171
- constructor(name: string, d: Dialect) {
172
- super(name, 'array', d);
173
- }
174
-
175
- isArrayOf(type: string) {
176
- if (this.arrayOf !== 'unknown') {
177
- this.arrayOf = 'variant';
178
- return;
179
- }
180
- this.arrayOf = type;
181
- if (type === 'object') {
182
- this.objectChild = new SnowObject('', this.dialect);
183
- } else if (type === 'array') {
184
- this.arrayChild = new SnowArray('', this.dialect);
185
- }
186
- }
119
+ // Row limit used inside the variant schema sample (default 1000). When the
120
+ // probe reports the table is small enough to full-scan, this limit is
121
+ // ignored.
122
+ schemaSampleRowLimit?: number;
187
123
 
188
- fieldDef(): ArrayDef {
189
- if (this.objectChild) {
190
- const t = mkArrayDef(
191
- {type: 'record', fields: this.objectChild.fields},
192
- this.name
193
- );
194
- return t;
195
- }
196
- if (this.arrayChild) {
197
- return mkArrayDef(this.arrayChild.fieldDef(), this.name);
198
- }
199
- return mkArrayDef(
200
- this.dialect.sqlTypeToMalloyType(this.arrayOf),
201
- this.name
202
- );
203
- }
124
+ // Byte threshold below which variant schema inference skips sampling and
125
+ // full-scans the table instead (default 100 MB). A full scan catches rare
126
+ // fields that a sample would miss.
127
+ schemaSampleFullScanMaxBytes?: number;
204
128
 
205
- walk(path: PathChain, fieldType: string) {
206
- if ('arrayRef' in path) {
207
- if (path.next) {
208
- const next = this.arrayChild || this.objectChild;
209
- if (next) {
210
- next.walk(path.next, fieldType);
211
- return;
212
- }
213
- // Array elements were scalars but now we see deeper structure —
214
- // inconsistent variant data. Degrade to variant array.
215
- this.arrayOf = 'variant';
216
- return;
217
- } else {
218
- this.isArrayOf(fieldType);
219
- return;
220
- }
221
- }
222
- // Name reference in an array context — inconsistent structure.
223
- // Degrade to variant array.
224
- this.arrayOf = 'variant';
225
- }
129
+ // SQL statements to run when a connection is acquired from the pool
130
+ setupSQL?: string;
226
131
  }
227
132
 
228
133
  /**
@@ -248,6 +153,8 @@ export class SnowflakeConnection
248
153
  private queryOptions: RunSQLOptions;
249
154
  private timeoutMs: number;
250
155
  private schemaSampleTimeoutMs: number;
156
+ private schemaSampleRowLimit: number;
157
+ private schemaSampleFullScanMaxBytes: number;
251
158
  private setupSQL: string | undefined;
252
159
 
253
160
  constructor(
@@ -271,6 +178,9 @@ export class SnowflakeConnection
271
178
  this.queryOptions = options?.queryOptions ?? {};
272
179
  this.timeoutMs = options?.timeoutMs ?? TIMEOUT_MS;
273
180
  this.schemaSampleTimeoutMs = options?.schemaSampleTimeoutMs ?? 15_000;
181
+ this.schemaSampleRowLimit = options?.schemaSampleRowLimit ?? 1000;
182
+ this.schemaSampleFullScanMaxBytes =
183
+ options?.schemaSampleFullScanMaxBytes ?? 100_000_000;
274
184
  }
275
185
 
276
186
  get dialectName(): string {
@@ -366,7 +276,7 @@ export class SnowflakeConnection
366
276
  ): Promise<void> {
367
277
  const infoQuery = `DESCRIBE TABLE ${tablePath}`;
368
278
  const rows = await this.executor.batch(infoQuery);
369
- const variants: string[] = [];
279
+ const nestedColumns: NestedColumn[] = [];
370
280
  const notVariant = new Map<string, boolean>();
371
281
  for (const row of rows) {
372
282
  // data types look like `VARCHAR(1234)` or `NUMBER(10,2)`
@@ -374,8 +284,12 @@ export class SnowflakeConnection
374
284
  const baseType = fullType.split('(')[0];
375
285
  const name = row['name'] as string;
376
286
 
377
- if (['variant', 'array', 'object'].includes(baseType)) {
378
- variants.push(name);
287
+ if (
288
+ baseType === 'variant' ||
289
+ baseType === 'array' ||
290
+ baseType === 'object'
291
+ ) {
292
+ nestedColumns.push({kind: baseType, name});
379
293
  } else {
380
294
  notVariant.set(name, true);
381
295
  // For NUMBER types, pass full string so dialect can inspect scale
@@ -390,74 +304,167 @@ export class SnowflakeConnection
390
304
  }
391
305
  }
392
306
  // VARIANT, ARRAY, and OBJECT columns don't have schema in metadata —
393
- // we have to sample actual data and inspect it to discover the structure.
394
- // This is inherently heuristic (we only look at 100 rows) and can be
395
- // slow on large partitioned tables or expensive views.
396
- if (variants.length > 0) {
397
- const variantArgs = variants.map(v => `'${v}', "${v}"`).join(', ');
398
- // Build the analysis query that flattens sampled rows and detects
399
- // the type of each leaf path. We only construct from variant columns
400
- // (not *) to avoid flattening the entire row on wide tables.
401
- // Paths with multiple types across the sample are dropped (HAVING
402
- // count(*) <= 1), and nulls are ignored.
307
+ // we have to sample actual data and inspect it to discover the
308
+ // structure. Cost control happens in two places:
309
+ // 1. project only the nested columns (via object_construct), so
310
+ // bytes-on-wire are bounded by actual variant content.
311
+ // 2. tier the sampling strategy by probeTableSize (see
312
+ // pickSampleStrategy) small base tables get a full scan;
313
+ // large base tables get TABLESAMPLE only (no unsafe LIMIT
314
+ // fallback); unknown-size sources (views, temp views) get
315
+ // the best-effort TABLESAMPLE→LIMIT chain.
316
+ if (nestedColumns.length > 0) {
317
+ const variantArgs = nestedColumns
318
+ .map(v => `'${v.name}', "${v.name}"`)
319
+ .join(', ');
320
+ // Flatten sampled rows and emit each distinct (path, type) pair.
321
+ // Conflicting pairs at the same path flow through to mergeShape,
322
+ // which collapses them to variant — that is how we honestly
323
+ // surface mixed-type fields to the user.
403
324
  const makeSampleQuery = (sampleClause: string) => `
404
- select path, min(type) as type
405
- from (
406
- select
407
- regexp_replace(path, '\\\\[[0-9]+\\\\]', '[*]') as path,
408
- case
409
- when typeof(value) = 'INTEGER' then 'decimal'
410
- when typeof(value) = 'DOUBLE' then 'decimal'
411
- else lower(typeof(value)) end as type
412
- from
413
- (${sampleClause})
414
- ,table(flatten(input => o, recursive => true)) as meta
415
- group by 1,2
416
- )
417
- where type != 'null_value'
418
- group BY 1
419
- having count(*) <=1
420
- order by path;
325
+ select
326
+ regexp_replace(path, '\\\\[[0-9]+\\\\]', '[*]') as path,
327
+ case
328
+ when typeof(value) = 'INTEGER' then 'decimal'
329
+ when typeof(value) = 'DOUBLE' then 'decimal'
330
+ else lower(typeof(value)) end as type
331
+ from
332
+ (${sampleClause})
333
+ ,table(flatten(input => o, recursive => true)) as meta
334
+ where typeof(value) != 'NULL_VALUE'
335
+ group by 1, 2
336
+ order by 1;
421
337
  `;
422
- const limitClause =
423
- `select object_construct(${variantArgs}) o` +
424
- ` from ${tablePath} limit 100`;
425
- // Try TABLESAMPLE first — it picks random micro-partitions without
426
- // scanning the whole table, which avoids the full-scan problem on
427
- // large partitioned tables. TABLESAMPLE only works on base tables,
428
- // not views, so if it fails we fall back to a plain LIMIT 100.
429
- const tablesampleClause =
430
- `select object_construct(${variantArgs}) o` +
431
- ` from ${tablePath} TABLESAMPLE BLOCK (1) limit 100`;
432
- const fieldPathRows = await this.runSchemaSample(
433
- makeSampleQuery(tablesampleClause),
434
- makeSampleQuery(limitClause)
338
+ const projectVariants = `select object_construct(${variantArgs}) o`;
339
+ const probe = await this.probeTableSize(tablePath);
340
+ const strategy = pickSampleStrategy(
341
+ probe,
342
+ this.schemaSampleFullScanMaxBytes
435
343
  );
344
+ const n = this.schemaSampleRowLimit;
345
+ let fieldPathRows: QueryRecord[] | undefined;
346
+
347
+ if (strategy === 'full-scan-then-sample') {
348
+ // Small base table: one full scan catches rare fields that
349
+ // sampling would miss. tryBatch so a failure doesn't poison
350
+ // the pool connection (temp views live on it). On failure we
351
+ // fall through to the sample path so a slow or timed-out full
352
+ // scan still gets partial structure.
353
+ fieldPathRows =
354
+ (await this.executor.tryBatch(
355
+ makeSampleQuery(`${projectVariants} from ${tablePath}`),
356
+ {},
357
+ this.schemaSampleTimeoutMs
358
+ )) ?? undefined;
359
+ }
436
360
 
437
361
  if (fieldPathRows === undefined) {
438
- // Both attempts failed or timed out — treat variants as opaque.
439
- for (const name of variants) {
440
- structDef.fields.push({type: 'sql native', rawType: 'variant', name});
362
+ const tablesampleQuery = makeSampleQuery(
363
+ `${projectVariants} from ${tablePath} TABLESAMPLE BLOCK (1) limit ${n}`
364
+ );
365
+ if (strategy === 'tablesample-only') {
366
+ // Known-large base table: TABLESAMPLE is safe (reads a few
367
+ // micro-partitions), plain LIMIT without a WHERE can be
368
+ // catastrophic on large partitioned tables. If TABLESAMPLE
369
+ // fails here we accept variant rather than risk an unbounded
370
+ // scan.
371
+ fieldPathRows =
372
+ (await this.executor.tryBatch(
373
+ tablesampleQuery,
374
+ {},
375
+ this.schemaSampleTimeoutMs
376
+ )) ?? undefined;
377
+ } else {
378
+ // Unknown size (view, temp view, non-parseable name) or
379
+ // full-scan fallback: best-effort TABLESAMPLE→LIMIT chain.
380
+ // The LIMIT fallback is the acknowledged "can't help" case
381
+ // for views over large partitioned tables.
382
+ fieldPathRows = await this.runSchemaSample(
383
+ tablesampleQuery,
384
+ makeSampleQuery(`${projectVariants} from ${tablePath} limit ${n}`)
385
+ );
441
386
  }
442
- } else {
443
- // Take the schema in list form and convert it into a tree.
444
- const rootObject = new SnowObject('__root__', this.dialect);
387
+ }
388
+
389
+ const state = createVariantSchemaState();
390
+ // Snowflake nested-schema inference follows these rules:
391
+ // - top-level ARRAY/OBJECT from DESCRIBE are authoritative
392
+ // - descendant paths imply ancestor shape
393
+ // - conflicting shapes degrade only that prefix to variant
394
+ // - every top-level nested column still produces a field
395
+ for (const nestedColumn of nestedColumns) {
396
+ seedTopLevelShape(state, nestedColumn);
397
+ }
398
+
399
+ if (fieldPathRows !== undefined) {
445
400
  for (const f of fieldPathRows) {
446
401
  const pathString = f['PATH']?.valueOf().toString();
447
402
  const fieldType = f['TYPE']?.valueOf().toString();
448
403
  if (pathString === undefined || fieldType === undefined) continue;
449
404
  const pathParser = new PathParser(pathString);
450
- const path = pathParser.pathChain();
451
- if ('name' in path && notVariant.get(path.name)) {
405
+ const segments = pathParser.segments();
406
+ const topLevel = segments[0];
407
+ if (topLevel?.kind !== 'name' || notVariant.get(topLevel.name)) {
452
408
  continue;
453
409
  }
454
- rootObject.walk(path, fieldType);
410
+ accumulateVariantPath(state, segments, fieldType);
455
411
  }
456
- structDef.fields.push(...rootObject.fields);
412
+ }
413
+
414
+ // Always emit one field per top-level nested column from DESCRIBE, even
415
+ // if sampling produced no usable descendant paths.
416
+ for (const nestedColumn of nestedColumns) {
417
+ structDef.fields.push(
418
+ buildTopLevelField(nestedColumn, state, this.dialect)
419
+ );
457
420
  }
458
421
  }
459
422
  }
460
423
 
424
+ /**
425
+ * Cheap metadata probe: ask INFORMATION_SCHEMA.TABLES for the row count
426
+ * and byte size of tablePath. Returns undefined when the name doesn't
427
+ * parse as a two- or three-part identifier, when the probe query fails,
428
+ * or when the row has no numeric BYTES (views and external tables
429
+ * typically report NULL).
430
+ *
431
+ * Two-part `schema.table` names use the current database's
432
+ * INFORMATION_SCHEMA; three-part `db.schema.table` names address
433
+ * INFORMATION_SCHEMA in the named database. Identifiers are parsed
434
+ * with Snowflake's quoting rules so bare parts case-fold to upper and
435
+ * quoted parts are compared verbatim against the catalog.
436
+ */
437
+ private async probeTableSize(
438
+ tablePath: string
439
+ ): Promise<TableSizeProbe | undefined> {
440
+ const parsed = parseSnowflakeTableName(tablePath);
441
+ if (parsed === undefined || parsed.schema === undefined) return undefined;
442
+ const quoteLit = (s: string) => s.replace(/'/g, "''");
443
+ const dbQualifier = parsed.database ? `${parsed.database.sql}.` : '';
444
+ const rows = await this.executor.tryBatch(
445
+ `select row_count as rc, bytes as by
446
+ from ${dbQualifier}information_schema.tables
447
+ where table_schema = '${quoteLit(parsed.schema.literal)}'
448
+ and table_name = '${quoteLit(parsed.table.literal)}'
449
+ limit 1`,
450
+ {},
451
+ this.schemaSampleTimeoutMs
452
+ );
453
+ if (!rows || rows.length === 0) return undefined;
454
+ const row = rows[0];
455
+ const bytesRaw = row['BY'] ?? row['by'];
456
+ const rowsRaw = row['RC'] ?? row['rc'];
457
+ // Views and external tables surface null BYTES / ROW_COUNT; treat
458
+ // that as "unknown size" so we don't classify them as small and
459
+ // launch a full scan against something potentially huge.
460
+ if (bytesRaw === null || bytesRaw === undefined) return undefined;
461
+ if (rowsRaw === null || rowsRaw === undefined) return undefined;
462
+ const bytes = Number(bytesRaw);
463
+ const rowCount = Number(rowsRaw);
464
+ if (!Number.isFinite(bytes) || !Number.isFinite(rowCount)) return undefined;
465
+ return {bytes, rowCount};
466
+ }
467
+
461
468
  /**
462
469
  * Try to run a schema sampling query, with fallback.
463
470
  * First tries the primary query (e.g. using TABLESAMPLE for speed).
@@ -535,54 +542,3 @@ export class SnowflakeConnection
535
542
  return tableName;
536
543
  }
537
544
  }
538
-
539
- export class PathParser extends TinyParser {
540
- constructor(pathName: string) {
541
- super(pathName, {
542
- quoted: /^'(\\'|[^'])*'/,
543
- array_of: /^\[\*]/,
544
- char: /^[[.\]]/,
545
- number: /^\d+/,
546
- word: /^\w+/,
547
- });
548
- }
549
-
550
- getName() {
551
- const nameStart = this.next();
552
- if (nameStart.type === 'word') {
553
- return nameStart.text;
554
- }
555
- if (nameStart.type === '[') {
556
- const quotedName = this.next('quoted');
557
- this.next(']');
558
- return quotedName.text;
559
- }
560
- throw this.parseError('Expected column name');
561
- }
562
-
563
- pathChain(): PathChain {
564
- const chain: PathChain = {name: this.getName()};
565
- let node: PathChain = chain;
566
- for (;;) {
567
- const sep = this.next();
568
- if (sep.type === 'eof') {
569
- return chain;
570
- }
571
- if (sep.type === '.') {
572
- node.next = {name: this.next('word').text};
573
- node = node.next;
574
- } else if (sep.type === 'array_of') {
575
- node.next = {arrayRef: true};
576
- node = node.next;
577
- } else if (sep.type === '[') {
578
- // Actually a dot access through a quoted name
579
- const quoted = this.next('quoted');
580
- node.next = {name: quoted.text};
581
- node = node.next;
582
- this.next(']');
583
- } else {
584
- throw this.parseError(`Unexpected ${sep.type}`);
585
- }
586
- }
587
- }
588
- }
@@ -0,0 +1,130 @@
1
+ /*
2
+ * Copyright Contributors to the Malloy project
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ import {pickSampleStrategy} from './snowflake_connection';
7
+ import {parseSnowflakeTableName} from './snowflake_table_name';
8
+
9
+ describe('pickSampleStrategy', () => {
10
+ const threshold = 100_000_000;
11
+
12
+ test('no probe → best-effort tablesample-then-limit', () => {
13
+ expect(pickSampleStrategy(undefined, threshold)).toBe(
14
+ 'tablesample-then-limit'
15
+ );
16
+ });
17
+
18
+ test('probe at or below threshold → full-scan-then-sample', () => {
19
+ expect(pickSampleStrategy({bytes: 0, rowCount: 0}, threshold)).toBe(
20
+ 'full-scan-then-sample'
21
+ );
22
+ expect(pickSampleStrategy({bytes: threshold, rowCount: 1}, threshold)).toBe(
23
+ 'full-scan-then-sample'
24
+ );
25
+ });
26
+
27
+ test('probe above threshold → tablesample-only (no unsafe LIMIT fallback)', () => {
28
+ expect(
29
+ pickSampleStrategy({bytes: threshold + 1, rowCount: 1}, threshold)
30
+ ).toBe('tablesample-only');
31
+ expect(
32
+ pickSampleStrategy(
33
+ {bytes: 10_000_000_000, rowCount: 1_000_000_000},
34
+ threshold
35
+ )
36
+ ).toBe('tablesample-only');
37
+ });
38
+
39
+ test('threshold=0 forces every probed table into tablesample-only', () => {
40
+ expect(pickSampleStrategy({bytes: 1, rowCount: 1}, 0)).toBe(
41
+ 'tablesample-only'
42
+ );
43
+ });
44
+ });
45
+
46
+ describe('parseSnowflakeTableName', () => {
47
+ test('single bare identifier', () => {
48
+ expect(parseSnowflakeTableName('aircraft')).toEqual({
49
+ table: {literal: 'AIRCRAFT', sql: 'AIRCRAFT', quoted: false},
50
+ });
51
+ });
52
+
53
+ test('two-part bare name uppercases both parts', () => {
54
+ expect(parseSnowflakeTableName('malloytest.aircraft')).toEqual({
55
+ schema: {literal: 'MALLOYTEST', sql: 'MALLOYTEST', quoted: false},
56
+ table: {literal: 'AIRCRAFT', sql: 'AIRCRAFT', quoted: false},
57
+ });
58
+ });
59
+
60
+ test('three-part bare name', () => {
61
+ expect(parseSnowflakeTableName('db.sch.t')).toEqual({
62
+ database: {literal: 'DB', sql: 'DB', quoted: false},
63
+ schema: {literal: 'SCH', sql: 'SCH', quoted: false},
64
+ table: {literal: 'T', sql: 'T', quoted: false},
65
+ });
66
+ });
67
+
68
+ test('quoted identifier preserves case', () => {
69
+ expect(parseSnowflakeTableName('"MyDb"."schema"."t"')).toEqual({
70
+ database: {literal: 'MyDb', sql: '"MyDb"', quoted: true},
71
+ schema: {literal: 'schema', sql: '"schema"', quoted: true},
72
+ table: {literal: 't', sql: '"t"', quoted: true},
73
+ });
74
+ });
75
+
76
+ test('quoted identifier allows embedded dots', () => {
77
+ expect(parseSnowflakeTableName('"a.b"."c.d"')).toEqual({
78
+ schema: {literal: 'a.b', sql: '"a.b"', quoted: true},
79
+ table: {literal: 'c.d', sql: '"c.d"', quoted: true},
80
+ });
81
+ });
82
+
83
+ test('doubled double-quote is a literal quote', () => {
84
+ expect(parseSnowflakeTableName('"a""b"')).toEqual({
85
+ table: {literal: 'a"b', sql: '"a""b"', quoted: true},
86
+ });
87
+ });
88
+
89
+ test('mixes quoted and bare parts', () => {
90
+ expect(parseSnowflakeTableName('MYDB."mixed"')).toEqual({
91
+ schema: {literal: 'MYDB', sql: 'MYDB', quoted: false},
92
+ table: {literal: 'mixed', sql: '"mixed"', quoted: true},
93
+ });
94
+ });
95
+
96
+ test('tolerates surrounding whitespace and whitespace around dots', () => {
97
+ expect(parseSnowflakeTableName(' sch . t ')).toEqual({
98
+ schema: {literal: 'SCH', sql: 'SCH', quoted: false},
99
+ table: {literal: 'T', sql: 'T', quoted: false},
100
+ });
101
+ });
102
+
103
+ test('returns undefined for empty input', () => {
104
+ expect(parseSnowflakeTableName('')).toBeUndefined();
105
+ });
106
+
107
+ test('returns undefined for four-part name', () => {
108
+ expect(parseSnowflakeTableName('a.b.c.d')).toBeUndefined();
109
+ });
110
+
111
+ test('returns undefined for trailing dot', () => {
112
+ expect(parseSnowflakeTableName('sch.')).toBeUndefined();
113
+ });
114
+
115
+ test('returns undefined for leading dot', () => {
116
+ expect(parseSnowflakeTableName('.t')).toBeUndefined();
117
+ });
118
+
119
+ test('returns undefined for unterminated quoted identifier', () => {
120
+ expect(parseSnowflakeTableName('"oops')).toBeUndefined();
121
+ });
122
+
123
+ test('returns undefined for identifier starting with a digit', () => {
124
+ expect(parseSnowflakeTableName('1foo')).toBeUndefined();
125
+ });
126
+
127
+ test('returns undefined for identifier containing a dash', () => {
128
+ expect(parseSnowflakeTableName('foo-bar')).toBeUndefined();
129
+ });
130
+ });