@malloydata/db-snowflake 0.0.374 → 0.0.376

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -30,7 +30,7 @@ const snowflake_connection_2 = require("./snowflake_connection");
30
30
  (0, malloy_1.registerConnectionType)('snowflake', {
31
31
  displayName: 'Snowflake',
32
32
  factory: async (config) => {
33
- const { name, is: _, setupSQL, timeoutMs, schemaSampleTimeoutMs, ...props } = config;
33
+ const { name, is: _, setupSQL, timeoutMs, schemaSampleTimeoutMs, schemaSampleRowLimit, schemaSampleFullScanMaxBytes, ...props } = config;
34
34
  // ConnectionConfig values are trusted to match ConnectionOptions fields
35
35
  // because the property definitions below declare matching names/types.
36
36
  // The double cast bridges Malloy's generic config to snowflake-sdk's
@@ -50,6 +50,16 @@ const snowflake_connection_2 = require("./snowflake_connection");
50
50
  : typeof schemaSampleTimeoutMs === 'string'
51
51
  ? parseInt(schemaSampleTimeoutMs, 10)
52
52
  : undefined,
53
+ schemaSampleRowLimit: typeof schemaSampleRowLimit === 'number'
54
+ ? schemaSampleRowLimit
55
+ : typeof schemaSampleRowLimit === 'string'
56
+ ? parseInt(schemaSampleRowLimit, 10)
57
+ : undefined,
58
+ schemaSampleFullScanMaxBytes: typeof schemaSampleFullScanMaxBytes === 'number'
59
+ ? schemaSampleFullScanMaxBytes
60
+ : typeof schemaSampleFullScanMaxBytes === 'string'
61
+ ? parseInt(schemaSampleFullScanMaxBytes, 10)
62
+ : undefined,
53
63
  });
54
64
  },
55
65
  properties: [
@@ -91,13 +101,30 @@ const snowflake_connection_2 = require("./snowflake_connection");
91
101
  displayName: 'Timeout (ms)',
92
102
  type: 'number',
93
103
  optional: true,
104
+ default: 600000,
94
105
  },
95
106
  {
96
107
  name: 'schemaSampleTimeoutMs',
97
108
  displayName: 'Schema Sample Timeout (ms)',
98
109
  type: 'number',
99
110
  optional: true,
100
- description: 'Timeout for the query that samples variant columns to detect their schema (default 15000)',
111
+ default: 15000,
112
+ description: 'Timeout for the query that samples variant columns to detect their schema.',
113
+ },
114
+ {
115
+ name: 'schemaSampleRowLimit',
116
+ displayName: 'Schema Sample Row Limit',
117
+ type: 'number',
118
+ optional: true,
119
+ default: 1000,
120
+ description: 'Row limit for the variant schema sample. Ignored for tables small enough to full-scan.',
121
+ },
122
+ {
123
+ name: 'schemaSampleFullScanMaxBytes',
124
+ displayName: 'Schema Full-Scan Max Bytes',
125
+ type: 'number',
126
+ optional: true,
127
+ description: 'Tables with BYTES at or below this value are full-scanned during variant schema inference instead of sampled. When unset, the connection uses an internal threshold; picking a value here is a policy choice tied to the size-probe behavior.',
101
128
  },
102
129
  {
103
130
  name: 'setupSQL',
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AAEH,+DAA2D;AAAnD,2HAAA,mBAAmB,OAAA;AAE3B,+CAA0D;AAG1D,iEAA2D;AAE3D,IAAA,+BAAsB,EAAC,WAAW,EAAE;IAClC,WAAW,EAAE,WAAW;IACxB,OAAO,EAAE,KAAK,EAAE,MAAwB,EAAE,EAAE;QAC1C,MAAM,EACJ,IAAI,EACJ,EAAE,EAAE,CAAC,EACL,QAAQ,EACR,SAAS,EACT,qBAAqB,EACrB,GAAG,KAAK,EACT,GAAG,MAAM,CAAC;QACX,wEAAwE;QACxE,uEAAuE;QACvE,qEAAqE;QACrE,mEAAmE;QACnE,2BAA2B;QAC3B,MAAM,WAAW,GAAG,KAAqC,CAAC;QAC1D,OAAO,IAAI,0CAAmB,CAAC,IAAI,EAAE;YACnC,WAAW;YACX,QAAQ,EAAE,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;YAC7D,SAAS,EACP,OAAO,SAAS,KAAK,QAAQ;gBAC3B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,SAAS,KAAK,QAAQ;oBAC7B,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC;oBACzB,CAAC,CAAC,SAAS;YACjB,qBAAqB,EACnB,OAAO,qBAAqB,KAAK,QAAQ;gBACvC,CAAC,CAAC,qBAAqB;gBACvB,CAAC,CAAC,OAAO,qBAAqB,KAAK,QAAQ;oBACzC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,CAAC;oBACrC,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,UAAU,EAAE;QACV,EAAC,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAC;QACzD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,UAAU;YACvB,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACnE;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E,EAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACvE;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,kBAAkB;YAC/B,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE;gBACX,mBAAmB,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC;gBAC1C,WAAW,EAAE,CAAC,GAAG,CAAC;aACnB;SACF;QACD;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,wBAAwB;YACrC,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,cAAc;YAC3B,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,uBAAuB;YAC7B,WAAW,EAAE,4BAA4B;YACzC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EACT,2FAA2F;SAC9F;QACD;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,0DAA0D;SACxE;KACF;CACF,CAAC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AAEH,+DAA2D;AAAnD,2HAAA,mBAAmB,OAAA;AAE3B,+CAA0D;AAG1D,iEAA2D;AAE3D,IAAA,+BAAsB,EAAC,WAAW,EAAE;IAClC,WAAW,EAAE,WAAW;IACxB,OAAO,EAAE,KAAK,EAAE,MAAwB,EAAE,EAAE;QAC1C,MAAM,EACJ,IAAI,EACJ,EAAE,EAAE,CAAC,EACL,QAAQ,EACR,SAAS,EACT,qBAAqB,EACrB,oBAAoB,EACpB,4BAA4B,EAC5B,GAAG,KAAK,EACT,GAAG,MAAM,CAAC;QACX,wEAAwE;QACxE,uEAAuE;QACvE,qEAAqE;QACrE,mEAAmE;QACnE,2BAA2B;QAC3B,MAAM,WAAW,GAAG,KAAqC,CAAC;QAC1D,OAAO,IAAI,0CAAmB,CAAC,IAAI,EAAE;YACnC,WAAW;YACX,QAAQ,EAAE,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;YAC7D,SAAS,EACP,OAAO,SAAS,KAAK,QAAQ;gBAC3B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,SAAS,KAAK,QAAQ;oBAC7B,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC;oBACzB,CAAC,CAAC,SAAS;YACjB,qBAAqB,EACnB,OAAO,qBAAqB,KAAK,QAAQ;gBACvC,CAAC,CAAC,qBAAqB;gBACvB,CAAC,CAAC,OAAO,qBAAqB,KAAK,QAAQ;oBACzC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,CAAC;oBACrC,CAAC,CAAC,SAAS;YACjB,oBAAoB,EAClB,OAAO,oBAAoB,KAAK,QAAQ;gBACtC,CAAC,CAAC,oBAAoB;gBACtB,CAAC,CAAC,OAAO,oBAAoB,KAAK,QAAQ;oBACxC,CAAC,CAAC,QAAQ,CAAC,oBAAoB,EAAE,EAAE,CAAC;oBACpC,CAAC,CAAC,SAAS;YACjB,4BAA4B,EAC1B,OAAO,4BAA4B,KAAK,QAAQ;gBAC9C,CAAC,CAAC,4BAA4B;gBAC9B,CAAC,CAAC,OAAO,4BAA4B,KAAK,QAAQ;oBAChD,CAAC,CAAC,QAAQ,CAAC,4BAA4B,EAAE,EAAE,CAAC;oBAC5C,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,UAAU,EAAE;QACV,EAAC,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAC;QACzD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,UAAU;YACvB,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACnE;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E,EAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACvE;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,kBAAkB;YAC/B,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE;gBACX,mBAAmB,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC;gBAC1C,WAAW,EAAE,CAAC,GAAG,CAAC;aACnB;SACF;QACD;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,wBAAwB;YACrC,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,cAAc;YAC3B,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,MAAM;SAChB;QACD;YACE,IAAI,EAAE,uBAAuB;YAC7B,WAAW,EAAE,4BAA4B;YACzC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,KAAK;YACd,WAAW,EACT,4EAA4E;SAC/E;QACD;YACE,IAAI,EAAE,sBAAsB;YAC5B,WAAW,EAAE,yBAAyB;YACtC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,IAAI;YACb,WAAW,EACT,wFAAwF;SAC3F;QACD;YACE,IAAI,EAAE,8BAA8B;YACpC,WAAW,EAAE,4BAA4B;YACzC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EACT,+OAA+O;SAClP;QACD;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,0DAA0D;SACxE;KACF;CACF,CAAC,CAAC"}
@@ -1,5 +1,4 @@
1
1
  import type { RunSQLOptions, MalloyQueryData, QueryRunStats, Connection, PersistSQLResults, StreamingConnection, PooledConnection, SQLSourceDef, TableSourceDef, QueryRecord, TestableConnection, SQLSourceRequest } from '@malloydata/malloy';
2
- import { TinyParser } from '@malloydata/malloy';
3
2
  import { BaseConnection } from '@malloydata/malloy/connection';
4
3
  import type { ConnectionOptions } from 'snowflake-sdk';
5
4
  import type { Options as PoolOptions } from 'generic-pool';
@@ -7,6 +6,36 @@ type namespace = {
7
6
  database: string;
8
7
  schema: string;
9
8
  };
9
+ /**
10
+ * Output of the INFORMATION_SCHEMA.TABLES probe. Undefined when the
11
+ * probe didn't run (non-parseable name) or couldn't find numeric size
12
+ * info (views, missing permissions).
13
+ */
14
+ export interface TableSizeProbe {
15
+ bytes: number;
16
+ rowCount: number;
17
+ }
18
+ /**
19
+ * Three-way tier that drives variant schema sampling. Extracted as a
20
+ * pure function so cost-policy decisions are unit-testable.
21
+ *
22
+ * full-scan-then-sample: probe confirmed a small base table. One
23
+ * full scan catches rare fields. On failure, fall through to the
24
+ * sample chain rather than accept opaque variant.
25
+ *
26
+ * tablesample-only: probe confirmed a base table above the small
27
+ * threshold. TABLESAMPLE BLOCK is safe (reads a few micro
28
+ * partitions). Plain LIMIT without a WHERE is unsafe on large
29
+ * partitioned tables, so we skip the LIMIT fallback — we'd rather
30
+ * degrade to variant than issue a runaway query.
31
+ *
32
+ * tablesample-then-limit: probe gave no size info (views, temp
33
+ * views, exotic names). We can't distinguish a small view from a
34
+ * view over a petabyte table, so we do best-effort sampling. This
35
+ * is the acknowledged "can't help you" case from the design doc.
36
+ */
37
+ export type SampleStrategy = 'full-scan-then-sample' | 'tablesample-only' | 'tablesample-then-limit';
38
+ export declare function pickSampleStrategy(probe: TableSizeProbe | undefined, fullScanMaxBytes: number): SampleStrategy;
10
39
  export interface SnowflakeConnectionOptions {
11
40
  connOptions?: ConnectionOptions;
12
41
  poolOptions?: PoolOptions;
@@ -14,15 +43,10 @@ export interface SnowflakeConnectionOptions {
14
43
  queryOptions?: RunSQLOptions;
15
44
  timeoutMs?: number;
16
45
  schemaSampleTimeoutMs?: number;
46
+ schemaSampleRowLimit?: number;
47
+ schemaSampleFullScanMaxBytes?: number;
17
48
  setupSQL?: string;
18
49
  }
19
- type PathChain = {
20
- arrayRef: true;
21
- next?: PathChain;
22
- } | {
23
- name: string;
24
- next?: PathChain;
25
- };
26
50
  export declare class SnowflakeConnection extends BaseConnection implements Connection, PooledConnection, PersistSQLResults, StreamingConnection, TestableConnection {
27
51
  readonly name: string;
28
52
  private readonly dialect;
@@ -32,6 +56,8 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
32
56
  private queryOptions;
33
57
  private timeoutMs;
34
58
  private schemaSampleTimeoutMs;
59
+ private schemaSampleRowLimit;
60
+ private schemaSampleFullScanMaxBytes;
35
61
  private setupSQL;
36
62
  constructor(name: string, options?: SnowflakeConnectionOptions);
37
63
  get dialectName(): string;
@@ -48,6 +74,20 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
48
74
  runSQLStream(sqlCommand: string, options?: RunSQLOptions): AsyncIterableIterator<QueryRecord>;
49
75
  test(): Promise<void>;
50
76
  private schemaFromTablePath;
77
+ /**
78
+ * Cheap metadata probe: ask INFORMATION_SCHEMA.TABLES for the row count
79
+ * and byte size of tablePath. Returns undefined when the name doesn't
80
+ * parse as a two- or three-part identifier (temp views, exotic quoted
81
+ * names), when the probe query fails, or when the row has no numeric
82
+ * BYTES (views and external tables typically report NULL).
83
+ *
84
+ * Two-part `schema.table` names use the current database's
85
+ * INFORMATION_SCHEMA; three-part `db.schema.table` names address
86
+ * INFORMATION_SCHEMA in the named database. Identifier parts are
87
+ * validated against a strict regex before interpolation; values that
88
+ * don't match cause the probe to skip.
89
+ */
90
+ private probeTableSize;
51
91
  /**
52
92
  * Try to run a schema sampling query, with fallback.
53
93
  * First tries the primary query (e.g. using TABLESAMPLE for speed).
@@ -64,9 +104,4 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
64
104
  fetchSelectSchema(sqlRef: SQLSourceRequest): Promise<SQLSourceDef>;
65
105
  manifestTemporaryTable(sqlCommand: string): Promise<string>;
66
106
  }
67
- export declare class PathParser extends TinyParser {
68
- constructor(pathName: string);
69
- getName(): string;
70
- pathChain(): PathChain;
71
- }
72
107
  export {};
@@ -22,131 +22,18 @@
22
22
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
23
  */
24
24
  Object.defineProperty(exports, "__esModule", { value: true });
25
- exports.PathParser = exports.SnowflakeConnection = void 0;
25
+ exports.SnowflakeConnection = void 0;
26
+ exports.pickSampleStrategy = pickSampleStrategy;
26
27
  const malloy_1 = require("@malloydata/malloy");
27
28
  const connection_1 = require("@malloydata/malloy/connection");
28
29
  const snowflake_executor_1 = require("./snowflake_executor");
29
- class SnowField {
30
- constructor(name, type, dialect) {
31
- this.name = name;
32
- this.type = type;
33
- this.dialect = dialect;
34
- }
35
- fieldDef() {
36
- return {
37
- ...this.dialect.sqlTypeToMalloyType(this.type),
38
- name: this.name,
39
- };
40
- }
41
- walk(_path, _fieldType) {
42
- throw new Error('SNOWWFLAKE SCHEMA PARSE ERROR: Should not walk through fields');
43
- }
44
- static make(name, fieldType, d) {
45
- if (fieldType === 'array') {
46
- return new SnowArray(name, d);
47
- }
48
- else if (fieldType === 'object') {
49
- return new SnowObject(name, d);
50
- }
51
- return new SnowField(name, fieldType, d);
52
- }
53
- }
54
- class SnowObject extends SnowField {
55
- constructor(name, d) {
56
- super(name, 'object', d);
57
- this.fieldMap = new Map();
58
- }
59
- get fields() {
60
- const fields = [];
61
- for (const [_, fieldObj] of this.fieldMap) {
62
- fields.push(fieldObj.fieldDef());
63
- }
64
- return fields;
65
- }
66
- fieldDef() {
67
- const rec = {
68
- type: 'record',
69
- name: this.name,
70
- fields: this.fields,
71
- join: 'one',
72
- };
73
- return rec;
74
- }
75
- walk(path, fieldType) {
76
- if ('name' in path) {
77
- const field = this.fieldMap.get(path.name);
78
- if (path.next) {
79
- if (field instanceof SnowObject || field instanceof SnowArray) {
80
- field.walk(path.next, fieldType);
81
- return;
82
- }
83
- // Field is missing or is a scalar leaf — the variant data has
84
- // inconsistent structure across rows. Degrade to opaque variant.
85
- this.fieldMap.set(path.name, new SnowField(path.name, 'variant', this.dialect));
86
- return;
87
- }
88
- else {
89
- if (!field) {
90
- this.fieldMap.set(path.name, SnowField.make(path.name, fieldType, this.dialect));
91
- return;
92
- }
93
- }
94
- return;
95
- }
96
- // Array reference in an object context — inconsistent structure.
97
- // Ignore this path; the object keeps whatever fields it already has.
98
- }
99
- }
100
- class SnowArray extends SnowField {
101
- constructor(name, d) {
102
- super(name, 'array', d);
103
- this.arrayOf = 'unknown';
104
- }
105
- isArrayOf(type) {
106
- if (this.arrayOf !== 'unknown') {
107
- this.arrayOf = 'variant';
108
- return;
109
- }
110
- this.arrayOf = type;
111
- if (type === 'object') {
112
- this.objectChild = new SnowObject('', this.dialect);
113
- }
114
- else if (type === 'array') {
115
- this.arrayChild = new SnowArray('', this.dialect);
116
- }
117
- }
118
- fieldDef() {
119
- if (this.objectChild) {
120
- const t = (0, malloy_1.mkArrayDef)({ type: 'record', fields: this.objectChild.fields }, this.name);
121
- return t;
122
- }
123
- if (this.arrayChild) {
124
- return (0, malloy_1.mkArrayDef)(this.arrayChild.fieldDef(), this.name);
125
- }
126
- return (0, malloy_1.mkArrayDef)(this.dialect.sqlTypeToMalloyType(this.arrayOf), this.name);
127
- }
128
- walk(path, fieldType) {
129
- if ('arrayRef' in path) {
130
- if (path.next) {
131
- const next = this.arrayChild || this.objectChild;
132
- if (next) {
133
- next.walk(path.next, fieldType);
134
- return;
135
- }
136
- // Array elements were scalars but now we see deeper structure —
137
- // inconsistent variant data. Degrade to variant array.
138
- this.arrayOf = 'variant';
139
- return;
140
- }
141
- else {
142
- this.isArrayOf(fieldType);
143
- return;
144
- }
145
- }
146
- // Name reference in an array context — inconsistent structure.
147
- // Degrade to variant array.
148
- this.arrayOf = 'variant';
149
- }
30
+ const snowflake_variant_schema_1 = require("./snowflake_variant_schema");
31
+ function pickSampleStrategy(probe, fullScanMaxBytes) {
32
+ if (probe === undefined)
33
+ return 'tablesample-then-limit';
34
+ if (probe.bytes <= fullScanMaxBytes)
35
+ return 'full-scan-then-sample';
36
+ return 'tablesample-only';
150
37
  }
151
38
  /**
152
39
  * Default statement timeoutMs value, 10 Mins
@@ -154,7 +41,7 @@ class SnowArray extends SnowField {
154
41
  const TIMEOUT_MS = 1000 * 60 * 10;
155
42
  class SnowflakeConnection extends connection_1.BaseConnection {
156
43
  constructor(name, options) {
157
- var _a, _b, _c;
44
+ var _a, _b, _c, _d, _e;
158
45
  super();
159
46
  this.name = name;
160
47
  this.dialect = new malloy_1.SnowflakeDialect();
@@ -170,6 +57,9 @@ class SnowflakeConnection extends connection_1.BaseConnection {
170
57
  this.queryOptions = (_a = options === null || options === void 0 ? void 0 : options.queryOptions) !== null && _a !== void 0 ? _a : {};
171
58
  this.timeoutMs = (_b = options === null || options === void 0 ? void 0 : options.timeoutMs) !== null && _b !== void 0 ? _b : TIMEOUT_MS;
172
59
  this.schemaSampleTimeoutMs = (_c = options === null || options === void 0 ? void 0 : options.schemaSampleTimeoutMs) !== null && _c !== void 0 ? _c : 15000;
60
+ this.schemaSampleRowLimit = (_d = options === null || options === void 0 ? void 0 : options.schemaSampleRowLimit) !== null && _d !== void 0 ? _d : 1000;
61
+ this.schemaSampleFullScanMaxBytes =
62
+ (_e = options === null || options === void 0 ? void 0 : options.schemaSampleFullScanMaxBytes) !== null && _e !== void 0 ? _e : 100000000;
173
63
  }
174
64
  get dialectName() {
175
65
  return 'snowflake';
@@ -228,18 +118,20 @@ class SnowflakeConnection extends connection_1.BaseConnection {
228
118
  await this.executor.batch('SELECT 1 as one');
229
119
  }
230
120
  async schemaFromTablePath(tablePath, structDef) {
231
- var _a, _b;
121
+ var _a, _b, _c, _d;
232
122
  const infoQuery = `DESCRIBE TABLE ${tablePath}`;
233
123
  const rows = await this.executor.batch(infoQuery);
234
- const variants = [];
124
+ const nestedColumns = [];
235
125
  const notVariant = new Map();
236
126
  for (const row of rows) {
237
127
  // data types look like `VARCHAR(1234)` or `NUMBER(10,2)`
238
128
  const fullType = row['type'].toLocaleLowerCase();
239
129
  const baseType = fullType.split('(')[0];
240
130
  const name = row['name'];
241
- if (['variant', 'array', 'object'].includes(baseType)) {
242
- variants.push(name);
131
+ if (baseType === 'variant' ||
132
+ baseType === 'array' ||
133
+ baseType === 'object') {
134
+ nestedColumns.push({ kind: baseType, name });
243
135
  }
244
136
  else {
245
137
  notVariant.set(name, true);
@@ -253,69 +145,147 @@ class SnowflakeConnection extends connection_1.BaseConnection {
253
145
  }
254
146
  }
255
147
  // VARIANT, ARRAY, and OBJECT columns don't have schema in metadata —
256
- // we have to sample actual data and inspect it to discover the structure.
257
- // This is inherently heuristic (we only look at 100 rows) and can be
258
- // slow on large partitioned tables or expensive views.
259
- if (variants.length > 0) {
260
- const variantArgs = variants.map(v => `'${v}', "${v}"`).join(', ');
261
- // Build the analysis query that flattens sampled rows and detects
262
- // the type of each leaf path. We only construct from variant columns
263
- // (not *) to avoid flattening the entire row on wide tables.
264
- // Paths with multiple types across the sample are dropped (HAVING
265
- // count(*) <= 1), and nulls are ignored.
148
+ // we have to sample actual data and inspect it to discover the
149
+ // structure. Cost control happens in two places:
150
+ // 1. project only the nested columns (via object_construct), so
151
+ // bytes-on-wire are bounded by actual variant content.
152
+ // 2. tier the sampling strategy by probeTableSize (see
153
+ // pickSampleStrategy) small base tables get a full scan;
154
+ // large base tables get TABLESAMPLE only (no unsafe LIMIT
155
+ // fallback); unknown-size sources (views, temp views) get
156
+ // the best-effort TABLESAMPLE→LIMIT chain.
157
+ if (nestedColumns.length > 0) {
158
+ const variantArgs = nestedColumns
159
+ .map(v => `'${v.name}', "${v.name}"`)
160
+ .join(', ');
161
+ // Flatten sampled rows and emit each distinct (path, type) pair.
162
+ // Conflicting pairs at the same path flow through to mergeShape,
163
+ // which collapses them to variant — that is how we honestly
164
+ // surface mixed-type fields to the user.
266
165
  const makeSampleQuery = (sampleClause) => `
267
- select path, min(type) as type
268
- from (
269
- select
270
- regexp_replace(path, '\\\\[[0-9]+\\\\]', '[*]') as path,
271
- case
272
- when typeof(value) = 'INTEGER' then 'decimal'
273
- when typeof(value) = 'DOUBLE' then 'decimal'
274
- else lower(typeof(value)) end as type
275
- from
276
- (${sampleClause})
277
- ,table(flatten(input => o, recursive => true)) as meta
278
- group by 1,2
279
- )
280
- where type != 'null_value'
281
- group BY 1
282
- having count(*) <=1
283
- order by path;
166
+ select
167
+ regexp_replace(path, '\\\\[[0-9]+\\\\]', '[*]') as path,
168
+ case
169
+ when typeof(value) = 'INTEGER' then 'decimal'
170
+ when typeof(value) = 'DOUBLE' then 'decimal'
171
+ else lower(typeof(value)) end as type
172
+ from
173
+ (${sampleClause})
174
+ ,table(flatten(input => o, recursive => true)) as meta
175
+ where typeof(value) != 'NULL_VALUE'
176
+ group by 1, 2
177
+ order by 1;
284
178
  `;
285
- const limitClause = `select object_construct(${variantArgs}) o` +
286
- ` from ${tablePath} limit 100`;
287
- // Try TABLESAMPLE first — it picks random micro-partitions without
288
- // scanning the whole table, which avoids the full-scan problem on
289
- // large partitioned tables. TABLESAMPLE only works on base tables,
290
- // not views, so if it fails we fall back to a plain LIMIT 100.
291
- const tablesampleClause = `select object_construct(${variantArgs}) o` +
292
- ` from ${tablePath} TABLESAMPLE BLOCK (1) limit 100`;
293
- const fieldPathRows = await this.runSchemaSample(makeSampleQuery(tablesampleClause), makeSampleQuery(limitClause));
179
+ const projectVariants = `select object_construct(${variantArgs}) o`;
180
+ const probe = await this.probeTableSize(tablePath);
181
+ const strategy = pickSampleStrategy(probe, this.schemaSampleFullScanMaxBytes);
182
+ const n = this.schemaSampleRowLimit;
183
+ let fieldPathRows;
184
+ if (strategy === 'full-scan-then-sample') {
185
+ // Small base table: one full scan catches rare fields that
186
+ // sampling would miss. tryBatch so a failure doesn't poison
187
+ // the pool connection (temp views live on it). On failure we
188
+ // fall through to the sample path so a slow or timed-out full
189
+ // scan still gets partial structure.
190
+ fieldPathRows =
191
+ (_a = (await this.executor.tryBatch(makeSampleQuery(`${projectVariants} from ${tablePath}`), {}, this.schemaSampleTimeoutMs))) !== null && _a !== void 0 ? _a : undefined;
192
+ }
294
193
  if (fieldPathRows === undefined) {
295
- // Both attempts failed or timed out treat variants as opaque.
296
- for (const name of variants) {
297
- structDef.fields.push({ type: 'sql native', rawType: 'variant', name });
194
+ const tablesampleQuery = makeSampleQuery(`${projectVariants} from ${tablePath} TABLESAMPLE BLOCK (1) limit ${n}`);
195
+ if (strategy === 'tablesample-only') {
196
+ // Known-large base table: TABLESAMPLE is safe (reads a few
197
+ // micro-partitions), plain LIMIT without a WHERE can be
198
+ // catastrophic on large partitioned tables. If TABLESAMPLE
199
+ // fails here we accept variant rather than risk an unbounded
200
+ // scan.
201
+ fieldPathRows =
202
+ (_b = (await this.executor.tryBatch(tablesampleQuery, {}, this.schemaSampleTimeoutMs))) !== null && _b !== void 0 ? _b : undefined;
203
+ }
204
+ else {
205
+ // Unknown size (view, temp view, non-parseable name) or
206
+ // full-scan fallback: best-effort TABLESAMPLE→LIMIT chain.
207
+ // The LIMIT fallback is the acknowledged "can't help" case
208
+ // for views over large partitioned tables.
209
+ fieldPathRows = await this.runSchemaSample(tablesampleQuery, makeSampleQuery(`${projectVariants} from ${tablePath} limit ${n}`));
298
210
  }
299
211
  }
300
- else {
301
- // Take the schema in list form and convert it into a tree.
302
- const rootObject = new SnowObject('__root__', this.dialect);
212
+ const state = (0, snowflake_variant_schema_1.createVariantSchemaState)();
213
+ // Snowflake nested-schema inference follows these rules:
214
+ // - top-level ARRAY/OBJECT from DESCRIBE are authoritative
215
+ // - descendant paths imply ancestor shape
216
+ // - conflicting shapes degrade only that prefix to variant
217
+ // - every top-level nested column still produces a field
218
+ for (const nestedColumn of nestedColumns) {
219
+ (0, snowflake_variant_schema_1.seedTopLevelShape)(state, nestedColumn);
220
+ }
221
+ if (fieldPathRows !== undefined) {
303
222
  for (const f of fieldPathRows) {
304
- const pathString = (_a = f['PATH']) === null || _a === void 0 ? void 0 : _a.valueOf().toString();
305
- const fieldType = (_b = f['TYPE']) === null || _b === void 0 ? void 0 : _b.valueOf().toString();
223
+ const pathString = (_c = f['PATH']) === null || _c === void 0 ? void 0 : _c.valueOf().toString();
224
+ const fieldType = (_d = f['TYPE']) === null || _d === void 0 ? void 0 : _d.valueOf().toString();
306
225
  if (pathString === undefined || fieldType === undefined)
307
226
  continue;
308
- const pathParser = new PathParser(pathString);
309
- const path = pathParser.pathChain();
310
- if ('name' in path && notVariant.get(path.name)) {
227
+ const pathParser = new snowflake_variant_schema_1.PathParser(pathString);
228
+ const segments = pathParser.segments();
229
+ const topLevel = segments[0];
230
+ if ((topLevel === null || topLevel === void 0 ? void 0 : topLevel.kind) !== 'name' || notVariant.get(topLevel.name)) {
311
231
  continue;
312
232
  }
313
- rootObject.walk(path, fieldType);
233
+ (0, snowflake_variant_schema_1.accumulateVariantPath)(state, segments, fieldType);
314
234
  }
315
- structDef.fields.push(...rootObject.fields);
235
+ }
236
+ // Always emit one field per top-level nested column from DESCRIBE, even
237
+ // if sampling produced no usable descendant paths.
238
+ for (const nestedColumn of nestedColumns) {
239
+ structDef.fields.push((0, snowflake_variant_schema_1.buildTopLevelField)(nestedColumn, state, this.dialect));
316
240
  }
317
241
  }
318
242
  }
243
+ /**
244
+ * Cheap metadata probe: ask INFORMATION_SCHEMA.TABLES for the row count
245
+ * and byte size of tablePath. Returns undefined when the name doesn't
246
+ * parse as a two- or three-part identifier (temp views, exotic quoted
247
+ * names), when the probe query fails, or when the row has no numeric
248
+ * BYTES (views and external tables typically report NULL).
249
+ *
250
+ * Two-part `schema.table` names use the current database's
251
+ * INFORMATION_SCHEMA; three-part `db.schema.table` names address
252
+ * INFORMATION_SCHEMA in the named database. Identifier parts are
253
+ * validated against a strict regex before interpolation; values that
254
+ * don't match cause the probe to skip.
255
+ */
256
+ async probeTableSize(tablePath) {
257
+ var _a, _b;
258
+ const parts = tablePath.split('.');
259
+ if (parts.length !== 2 && parts.length !== 3)
260
+ return undefined;
261
+ const identifier = /^[A-Za-z_][A-Za-z0-9_$]*$/;
262
+ if (!parts.every(p => identifier.test(p)))
263
+ return undefined;
264
+ const [db, schema, table] = parts.length === 3 ? parts : [undefined, parts[0], parts[1]];
265
+ const dbQualifier = db !== undefined ? `${db}.` : '';
266
+ const rows = await this.executor.tryBatch(`select row_count as rc, bytes as by
267
+ from ${dbQualifier}information_schema.tables
268
+ where upper(table_schema) = upper('${schema}')
269
+ and upper(table_name) = upper('${table}')
270
+ limit 1`, {}, this.schemaSampleTimeoutMs);
271
+ if (!rows || rows.length === 0)
272
+ return undefined;
273
+ const row = rows[0];
274
+ const bytesRaw = (_a = row['BY']) !== null && _a !== void 0 ? _a : row['by'];
275
+ const rowsRaw = (_b = row['RC']) !== null && _b !== void 0 ? _b : row['rc'];
276
+ // Views and external tables surface null BYTES / ROW_COUNT; treat
277
+ // that as "unknown size" so we don't classify them as small and
278
+ // launch a full scan against something potentially huge.
279
+ if (bytesRaw === null || bytesRaw === undefined)
280
+ return undefined;
281
+ if (rowsRaw === null || rowsRaw === undefined)
282
+ return undefined;
283
+ const bytes = Number(bytesRaw);
284
+ const rowCount = Number(rowsRaw);
285
+ if (!Number.isFinite(bytes) || !Number.isFinite(rowCount))
286
+ return undefined;
287
+ return { bytes, rowCount };
288
+ }
319
289
  /**
320
290
  * Try to run a schema sampling query, with fallback.
321
291
  * First tries the primary query (e.g. using TABLESAMPLE for speed).
@@ -373,56 +343,4 @@ class SnowflakeConnection extends connection_1.BaseConnection {
373
343
  }
374
344
  }
375
345
  exports.SnowflakeConnection = SnowflakeConnection;
376
- class PathParser extends malloy_1.TinyParser {
377
- constructor(pathName) {
378
- super(pathName, {
379
- quoted: /^'(\\'|[^'])*'/,
380
- array_of: /^\[\*]/,
381
- char: /^[[.\]]/,
382
- number: /^\d+/,
383
- word: /^\w+/,
384
- });
385
- }
386
- getName() {
387
- const nameStart = this.next();
388
- if (nameStart.type === 'word') {
389
- return nameStart.text;
390
- }
391
- if (nameStart.type === '[') {
392
- const quotedName = this.next('quoted');
393
- this.next(']');
394
- return quotedName.text;
395
- }
396
- throw this.parseError('Expected column name');
397
- }
398
- pathChain() {
399
- const chain = { name: this.getName() };
400
- let node = chain;
401
- for (;;) {
402
- const sep = this.next();
403
- if (sep.type === 'eof') {
404
- return chain;
405
- }
406
- if (sep.type === '.') {
407
- node.next = { name: this.next('word').text };
408
- node = node.next;
409
- }
410
- else if (sep.type === 'array_of') {
411
- node.next = { arrayRef: true };
412
- node = node.next;
413
- }
414
- else if (sep.type === '[') {
415
- // Actually a dot access through a quoted name
416
- const quoted = this.next('quoted');
417
- node.next = { name: quoted.text };
418
- node = node.next;
419
- this.next(']');
420
- }
421
- else {
422
- throw this.parseError(`Unexpected ${sep.type}`);
423
- }
424
- }
425
- }
426
- }
427
- exports.PathParser = PathParser;
428
346
  //# sourceMappingURL=snowflake_connection.js.map