@malloydata/db-snowflake 0.0.375 → 0.0.377

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/dist/index.js +29 -2
  2. package/dist/index.js.map +1 -1
  3. package/dist/snowflake_connection.d.ts +48 -13
  4. package/dist/snowflake_connection.js +144 -228
  5. package/dist/snowflake_connection.js.map +1 -1
  6. package/dist/snowflake_connection.spec.js +179 -14
  7. package/dist/snowflake_connection.spec.js.map +1 -1
  8. package/dist/snowflake_sample_strategy.spec.js +97 -0
  9. package/dist/snowflake_sample_strategy.spec.js.map +1 -0
  10. package/dist/snowflake_table_name.d.ts +19 -0
  11. package/dist/snowflake_table_name.js +80 -0
  12. package/dist/snowflake_table_name.js.map +1 -0
  13. package/dist/snowflake_variant_schema.d.ts +43 -0
  14. package/dist/snowflake_variant_schema.js +203 -0
  15. package/dist/snowflake_variant_schema.js.map +1 -0
  16. package/dist/snowflake_variant_schema.spec.js +150 -0
  17. package/dist/snowflake_variant_schema.spec.js.map +1 -0
  18. package/package.json +2 -2
  19. package/src/index.ts +34 -1
  20. package/src/snowflake_connection.spec.ts +219 -15
  21. package/src/snowflake_connection.ts +218 -262
  22. package/src/snowflake_sample_strategy.spec.ts +130 -0
  23. package/src/snowflake_table_name.ts +94 -0
  24. package/src/snowflake_variant_schema.spec.ts +188 -0
  25. package/src/snowflake_variant_schema.ts +301 -0
  26. package/dist/snowflake_executor.spec.js +0 -89
  27. package/dist/snowflake_executor.spec.js.map +0 -1
  28. package/dist/snowflake_setup.spec.js +0 -76
  29. package/dist/snowflake_setup.spec.js.map +0 -1
  30. package/src/snowflake_executor.spec.ts +0 -103
  31. package/src/snowflake_setup.spec.ts +0 -56
  32. /package/dist/{snowflake_executor.spec.d.ts → snowflake_sample_strategy.spec.d.ts} +0 -0
  33. /package/dist/{snowflake_setup.spec.d.ts → snowflake_variant_schema.spec.d.ts} +0 -0
package/dist/index.js CHANGED
@@ -30,7 +30,7 @@ const snowflake_connection_2 = require("./snowflake_connection");
30
30
  (0, malloy_1.registerConnectionType)('snowflake', {
31
31
  displayName: 'Snowflake',
32
32
  factory: async (config) => {
33
- const { name, is: _, setupSQL, timeoutMs, schemaSampleTimeoutMs, ...props } = config;
33
+ const { name, is: _, setupSQL, timeoutMs, schemaSampleTimeoutMs, schemaSampleRowLimit, schemaSampleFullScanMaxBytes, ...props } = config;
34
34
  // ConnectionConfig values are trusted to match ConnectionOptions fields
35
35
  // because the property definitions below declare matching names/types.
36
36
  // The double cast bridges Malloy's generic config to snowflake-sdk's
@@ -50,6 +50,16 @@ const snowflake_connection_2 = require("./snowflake_connection");
50
50
  : typeof schemaSampleTimeoutMs === 'string'
51
51
  ? parseInt(schemaSampleTimeoutMs, 10)
52
52
  : undefined,
53
+ schemaSampleRowLimit: typeof schemaSampleRowLimit === 'number'
54
+ ? schemaSampleRowLimit
55
+ : typeof schemaSampleRowLimit === 'string'
56
+ ? parseInt(schemaSampleRowLimit, 10)
57
+ : undefined,
58
+ schemaSampleFullScanMaxBytes: typeof schemaSampleFullScanMaxBytes === 'number'
59
+ ? schemaSampleFullScanMaxBytes
60
+ : typeof schemaSampleFullScanMaxBytes === 'string'
61
+ ? parseInt(schemaSampleFullScanMaxBytes, 10)
62
+ : undefined,
53
63
  });
54
64
  },
55
65
  properties: [
@@ -91,13 +101,30 @@ const snowflake_connection_2 = require("./snowflake_connection");
91
101
  displayName: 'Timeout (ms)',
92
102
  type: 'number',
93
103
  optional: true,
104
+ default: 600000,
94
105
  },
95
106
  {
96
107
  name: 'schemaSampleTimeoutMs',
97
108
  displayName: 'Schema Sample Timeout (ms)',
98
109
  type: 'number',
99
110
  optional: true,
100
- description: 'Timeout for the query that samples variant columns to detect their schema (default 15000)',
111
+ default: 15000,
112
+ description: 'Timeout for the query that samples variant columns to detect their schema.',
113
+ },
114
+ {
115
+ name: 'schemaSampleRowLimit',
116
+ displayName: 'Schema Sample Row Limit',
117
+ type: 'number',
118
+ optional: true,
119
+ default: 1000,
120
+ description: 'Row limit for the variant schema sample. Ignored for tables small enough to full-scan.',
121
+ },
122
+ {
123
+ name: 'schemaSampleFullScanMaxBytes',
124
+ displayName: 'Schema Full-Scan Max Bytes',
125
+ type: 'number',
126
+ optional: true,
127
+ description: 'Tables with BYTES at or below this value are full-scanned during variant schema inference instead of sampled. When unset, the connection uses an internal threshold; picking a value here is a policy choice tied to the size-probe behavior.',
101
128
  },
102
129
  {
103
130
  name: 'setupSQL',
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AAEH,+DAA2D;AAAnD,2HAAA,mBAAmB,OAAA;AAE3B,+CAA0D;AAG1D,iEAA2D;AAE3D,IAAA,+BAAsB,EAAC,WAAW,EAAE;IAClC,WAAW,EAAE,WAAW;IACxB,OAAO,EAAE,KAAK,EAAE,MAAwB,EAAE,EAAE;QAC1C,MAAM,EACJ,IAAI,EACJ,EAAE,EAAE,CAAC,EACL,QAAQ,EACR,SAAS,EACT,qBAAqB,EACrB,GAAG,KAAK,EACT,GAAG,MAAM,CAAC;QACX,wEAAwE;QACxE,uEAAuE;QACvE,qEAAqE;QACrE,mEAAmE;QACnE,2BAA2B;QAC3B,MAAM,WAAW,GAAG,KAAqC,CAAC;QAC1D,OAAO,IAAI,0CAAmB,CAAC,IAAI,EAAE;YACnC,WAAW;YACX,QAAQ,EAAE,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;YAC7D,SAAS,EACP,OAAO,SAAS,KAAK,QAAQ;gBAC3B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,SAAS,KAAK,QAAQ;oBAC7B,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC;oBACzB,CAAC,CAAC,SAAS;YACjB,qBAAqB,EACnB,OAAO,qBAAqB,KAAK,QAAQ;gBACvC,CAAC,CAAC,qBAAqB;gBACvB,CAAC,CAAC,OAAO,qBAAqB,KAAK,QAAQ;oBACzC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,CAAC;oBACrC,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,UAAU,EAAE;QACV,EAAC,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAC;QACzD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,UAAU;YACvB,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACnE;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E,EAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACvE;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,kBAAkB;YAC/B,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE;gBACX,mBAAmB,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC;gBAC1C,WAAW,EAAE,CAAC,GAAG,CAAC;aACnB;SACF;QACD;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,wBAAwB;YACrC,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,cAAc;YAC3B,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,uBAAuB;YAC7B,WAAW,EAAE,4BAA4B;YACzC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EACT,2FAA2F;SAC9F;QACD;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,0DAA0D;SACxE;KACF;CACF,CAAC,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AAEH,+DAA2D;AAAnD,2HAAA,mBAAmB,OAAA;AAE3B,+CAA0D;AAG1D,iEAA2D;AAE3D,IAAA,+BAAsB,EAAC,WAAW,EAAE;IAClC,WAAW,EAAE,WAAW;IACxB,OAAO,EAAE,KAAK,EAAE,MAAwB,EAAE,EAAE;QAC1C,MAAM,EACJ,IAAI,EACJ,EAAE,EAAE,CAAC,EACL,QAAQ,EACR,SAAS,EACT,qBAAqB,EACrB,oBAAoB,EACpB,4BAA4B,EAC5B,GAAG,KAAK,EACT,GAAG,MAAM,CAAC;QACX,wEAAwE;QACxE,uEAAuE;QACvE,qEAAqE;QACrE,mEAAmE;QACnE,2BAA2B;QAC3B,MAAM,WAAW,GAAG,KAAqC,CAAC;QAC1D,OAAO,IAAI,0CAAmB,CAAC,IAAI,EAAE;YACnC,WAAW;YACX,QAAQ,EAAE,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;YAC7D,SAAS,EACP,OAAO,SAAS,KAAK,QAAQ;gBAC3B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,SAAS,KAAK,QAAQ;oBAC7B,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC;oBACzB,CAAC,CAAC,SAAS;YACjB,qBAAqB,EACnB,OAAO,qBAAqB,KAAK,QAAQ;gBACvC,CAAC,CAAC,qBAAqB;gBACvB,CAAC,CAAC,OAAO,qBAAqB,KAAK,QAAQ;oBACzC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,CAAC;oBACrC,CAAC,CAAC,SAAS;YACjB,oBAAoB,EAClB,OAAO,oBAAoB,KAAK,QAAQ;gBACtC,CAAC,CAAC,oBAAoB;gBACtB,CAAC,CAAC,OAAO,oBAAoB,KAAK,QAAQ;oBACxC,CAAC,CAAC,QAAQ,CAAC,oBAAoB,EAAE,EAAE,CAAC;oBACpC,CAAC,CAAC,SAAS;YACjB,4BAA4B,EAC1B,OAAO,4BAA4B,KAAK,QAAQ;gBAC9C,CAAC,CAAC,4BAA4B;gBAC9B,CAAC,CAAC,OAAO,4BAA4B,KAAK,QAAQ;oBAChD,CAAC,CAAC,QAAQ,CAAC,4BAA4B,EAAE,EAAE,CAAC;oBAC5C,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,UAAU,EAAE;QACV,EAAC,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAC;QACzD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,UAAU;YACvB,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACnE;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E,EAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACvE;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,kBAAkB;YAC/B,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE;gBACX,mBAAmB,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC;gBAC1C,WAAW,EAAE,CAAC,GAAG,CAAC;aACnB;SACF;QACD;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,wBAAwB;YACrC,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,cAAc;YAC3B,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,MAAM;SAChB;QACD;YACE,IAAI,EAAE,uBAAuB;YAC7B,WAAW,EAAE,4BAA4B;YACzC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,KAAK;YACd,WAAW,EACT,4EAA4E;SAC/E;QACD;YACE,IAAI,EAAE,sBAAsB;YAC5B,WAAW,EAAE,yBAAyB;YACtC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,IAAI;YACb,WAAW,EACT,wFAAwF;SAC3F;QACD;YACE,IAAI,EAAE,8BAA8B;YACpC,WAAW,EAAE,4BAA4B;YACzC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EACT,+OAA+O;SAClP;QACD;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,0DAA0D;SACxE;KACF;CACF,CAAC,CAAC"}
@@ -1,5 +1,4 @@
1
1
  import type { RunSQLOptions, MalloyQueryData, QueryRunStats, Connection, PersistSQLResults, StreamingConnection, PooledConnection, SQLSourceDef, TableSourceDef, QueryRecord, TestableConnection, SQLSourceRequest } from '@malloydata/malloy';
2
- import { TinyParser } from '@malloydata/malloy';
3
2
  import { BaseConnection } from '@malloydata/malloy/connection';
4
3
  import type { ConnectionOptions } from 'snowflake-sdk';
5
4
  import type { Options as PoolOptions } from 'generic-pool';
@@ -7,6 +6,36 @@ type namespace = {
7
6
  database: string;
8
7
  schema: string;
9
8
  };
9
+ /**
10
+ * Output of the INFORMATION_SCHEMA.TABLES probe. Undefined when the
11
+ * probe didn't run (non-parseable name) or couldn't find numeric size
12
+ * info (views, missing permissions).
13
+ */
14
+ export interface TableSizeProbe {
15
+ bytes: number;
16
+ rowCount: number;
17
+ }
18
+ /**
19
+ * Three-way tier that drives variant schema sampling. Extracted as a
20
+ * pure function so cost-policy decisions are unit-testable.
21
+ *
22
+ * full-scan-then-sample: probe confirmed a small base table. One
23
+ * full scan catches rare fields. On failure, fall through to the
24
+ * sample chain rather than accept opaque variant.
25
+ *
26
+ * tablesample-only: probe confirmed a base table above the small
27
+ * threshold. TABLESAMPLE BLOCK is safe (reads a few micro
28
+ * partitions). Plain LIMIT without a WHERE is unsafe on large
29
+ * partitioned tables, so we skip the LIMIT fallback — we'd rather
30
+ * degrade to variant than issue a runaway query.
31
+ *
32
+ * tablesample-then-limit: probe gave no size info (views, temp
33
+ * views, exotic names). We can't distinguish a small view from a
34
+ * view over a petabyte table, so we do best-effort sampling. This
35
+ * is the acknowledged "can't help you" case from the design doc.
36
+ */
37
+ export type SampleStrategy = 'full-scan-then-sample' | 'tablesample-only' | 'tablesample-then-limit';
38
+ export declare function pickSampleStrategy(probe: TableSizeProbe | undefined, fullScanMaxBytes: number): SampleStrategy;
10
39
  export interface SnowflakeConnectionOptions {
11
40
  connOptions?: ConnectionOptions;
12
41
  poolOptions?: PoolOptions;
@@ -14,15 +43,10 @@ export interface SnowflakeConnectionOptions {
14
43
  queryOptions?: RunSQLOptions;
15
44
  timeoutMs?: number;
16
45
  schemaSampleTimeoutMs?: number;
46
+ schemaSampleRowLimit?: number;
47
+ schemaSampleFullScanMaxBytes?: number;
17
48
  setupSQL?: string;
18
49
  }
19
- type PathChain = {
20
- arrayRef: true;
21
- next?: PathChain;
22
- } | {
23
- name: string;
24
- next?: PathChain;
25
- };
26
50
  export declare class SnowflakeConnection extends BaseConnection implements Connection, PooledConnection, PersistSQLResults, StreamingConnection, TestableConnection {
27
51
  readonly name: string;
28
52
  private readonly dialect;
@@ -32,6 +56,8 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
32
56
  private queryOptions;
33
57
  private timeoutMs;
34
58
  private schemaSampleTimeoutMs;
59
+ private schemaSampleRowLimit;
60
+ private schemaSampleFullScanMaxBytes;
35
61
  private setupSQL;
36
62
  constructor(name: string, options?: SnowflakeConnectionOptions);
37
63
  get dialectName(): string;
@@ -48,6 +74,20 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
48
74
  runSQLStream(sqlCommand: string, options?: RunSQLOptions): AsyncIterableIterator<QueryRecord>;
49
75
  test(): Promise<void>;
50
76
  private schemaFromTablePath;
77
+ /**
78
+ * Cheap metadata probe: ask INFORMATION_SCHEMA.TABLES for the row count
79
+ * and byte size of tablePath. Returns undefined when the name doesn't
80
+ * parse as a two- or three-part identifier, when the probe query fails,
81
+ * or when the row has no numeric BYTES (views and external tables
82
+ * typically report NULL).
83
+ *
84
+ * Two-part `schema.table` names use the current database's
85
+ * INFORMATION_SCHEMA; three-part `db.schema.table` names address
86
+ * INFORMATION_SCHEMA in the named database. Identifiers are parsed
87
+ * with Snowflake's quoting rules so bare parts case-fold to upper and
88
+ * quoted parts are compared verbatim against the catalog.
89
+ */
90
+ private probeTableSize;
51
91
  /**
52
92
  * Try to run a schema sampling query, with fallback.
53
93
  * First tries the primary query (e.g. using TABLESAMPLE for speed).
@@ -64,9 +104,4 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
64
104
  fetchSelectSchema(sqlRef: SQLSourceRequest): Promise<SQLSourceDef>;
65
105
  manifestTemporaryTable(sqlCommand: string): Promise<string>;
66
106
  }
67
- export declare class PathParser extends TinyParser {
68
- constructor(pathName: string);
69
- getName(): string;
70
- pathChain(): PathChain;
71
- }
72
107
  export {};
@@ -22,131 +22,19 @@
22
22
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
23
  */
24
24
  Object.defineProperty(exports, "__esModule", { value: true });
25
- exports.PathParser = exports.SnowflakeConnection = void 0;
25
+ exports.SnowflakeConnection = void 0;
26
+ exports.pickSampleStrategy = pickSampleStrategy;
26
27
  const malloy_1 = require("@malloydata/malloy");
27
28
  const connection_1 = require("@malloydata/malloy/connection");
28
29
  const snowflake_executor_1 = require("./snowflake_executor");
29
- class SnowField {
30
- constructor(name, type, dialect) {
31
- this.name = name;
32
- this.type = type;
33
- this.dialect = dialect;
34
- }
35
- fieldDef() {
36
- return {
37
- ...this.dialect.sqlTypeToMalloyType(this.type),
38
- name: this.name,
39
- };
40
- }
41
- walk(_path, _fieldType) {
42
- throw new Error('SNOWWFLAKE SCHEMA PARSE ERROR: Should not walk through fields');
43
- }
44
- static make(name, fieldType, d) {
45
- if (fieldType === 'array') {
46
- return new SnowArray(name, d);
47
- }
48
- else if (fieldType === 'object') {
49
- return new SnowObject(name, d);
50
- }
51
- return new SnowField(name, fieldType, d);
52
- }
53
- }
54
- class SnowObject extends SnowField {
55
- constructor(name, d) {
56
- super(name, 'object', d);
57
- this.fieldMap = new Map();
58
- }
59
- get fields() {
60
- const fields = [];
61
- for (const [_, fieldObj] of this.fieldMap) {
62
- fields.push(fieldObj.fieldDef());
63
- }
64
- return fields;
65
- }
66
- fieldDef() {
67
- const rec = {
68
- type: 'record',
69
- name: this.name,
70
- fields: this.fields,
71
- join: 'one',
72
- };
73
- return rec;
74
- }
75
- walk(path, fieldType) {
76
- if ('name' in path) {
77
- const field = this.fieldMap.get(path.name);
78
- if (path.next) {
79
- if (field instanceof SnowObject || field instanceof SnowArray) {
80
- field.walk(path.next, fieldType);
81
- return;
82
- }
83
- // Field is missing or is a scalar leaf — the variant data has
84
- // inconsistent structure across rows. Degrade to opaque variant.
85
- this.fieldMap.set(path.name, new SnowField(path.name, 'variant', this.dialect));
86
- return;
87
- }
88
- else {
89
- if (!field) {
90
- this.fieldMap.set(path.name, SnowField.make(path.name, fieldType, this.dialect));
91
- return;
92
- }
93
- }
94
- return;
95
- }
96
- // Array reference in an object context — inconsistent structure.
97
- // Ignore this path; the object keeps whatever fields it already has.
98
- }
99
- }
100
- class SnowArray extends SnowField {
101
- constructor(name, d) {
102
- super(name, 'array', d);
103
- this.arrayOf = 'unknown';
104
- }
105
- isArrayOf(type) {
106
- if (this.arrayOf !== 'unknown') {
107
- this.arrayOf = 'variant';
108
- return;
109
- }
110
- this.arrayOf = type;
111
- if (type === 'object') {
112
- this.objectChild = new SnowObject('', this.dialect);
113
- }
114
- else if (type === 'array') {
115
- this.arrayChild = new SnowArray('', this.dialect);
116
- }
117
- }
118
- fieldDef() {
119
- if (this.objectChild) {
120
- const t = (0, malloy_1.mkArrayDef)({ type: 'record', fields: this.objectChild.fields }, this.name);
121
- return t;
122
- }
123
- if (this.arrayChild) {
124
- return (0, malloy_1.mkArrayDef)(this.arrayChild.fieldDef(), this.name);
125
- }
126
- return (0, malloy_1.mkArrayDef)(this.dialect.sqlTypeToMalloyType(this.arrayOf), this.name);
127
- }
128
- walk(path, fieldType) {
129
- if ('arrayRef' in path) {
130
- if (path.next) {
131
- const next = this.arrayChild || this.objectChild;
132
- if (next) {
133
- next.walk(path.next, fieldType);
134
- return;
135
- }
136
- // Array elements were scalars but now we see deeper structure —
137
- // inconsistent variant data. Degrade to variant array.
138
- this.arrayOf = 'variant';
139
- return;
140
- }
141
- else {
142
- this.isArrayOf(fieldType);
143
- return;
144
- }
145
- }
146
- // Name reference in an array context — inconsistent structure.
147
- // Degrade to variant array.
148
- this.arrayOf = 'variant';
149
- }
30
+ const snowflake_variant_schema_1 = require("./snowflake_variant_schema");
31
+ const snowflake_table_name_1 = require("./snowflake_table_name");
32
+ function pickSampleStrategy(probe, fullScanMaxBytes) {
33
+ if (probe === undefined)
34
+ return 'tablesample-then-limit';
35
+ if (probe.bytes <= fullScanMaxBytes)
36
+ return 'full-scan-then-sample';
37
+ return 'tablesample-only';
150
38
  }
151
39
  /**
152
40
  * Default statement timeoutMs value, 10 Mins
@@ -154,7 +42,7 @@ class SnowArray extends SnowField {
154
42
  const TIMEOUT_MS = 1000 * 60 * 10;
155
43
  class SnowflakeConnection extends connection_1.BaseConnection {
156
44
  constructor(name, options) {
157
- var _a, _b, _c;
45
+ var _a, _b, _c, _d, _e;
158
46
  super();
159
47
  this.name = name;
160
48
  this.dialect = new malloy_1.SnowflakeDialect();
@@ -170,6 +58,9 @@ class SnowflakeConnection extends connection_1.BaseConnection {
170
58
  this.queryOptions = (_a = options === null || options === void 0 ? void 0 : options.queryOptions) !== null && _a !== void 0 ? _a : {};
171
59
  this.timeoutMs = (_b = options === null || options === void 0 ? void 0 : options.timeoutMs) !== null && _b !== void 0 ? _b : TIMEOUT_MS;
172
60
  this.schemaSampleTimeoutMs = (_c = options === null || options === void 0 ? void 0 : options.schemaSampleTimeoutMs) !== null && _c !== void 0 ? _c : 15000;
61
+ this.schemaSampleRowLimit = (_d = options === null || options === void 0 ? void 0 : options.schemaSampleRowLimit) !== null && _d !== void 0 ? _d : 1000;
62
+ this.schemaSampleFullScanMaxBytes =
63
+ (_e = options === null || options === void 0 ? void 0 : options.schemaSampleFullScanMaxBytes) !== null && _e !== void 0 ? _e : 100000000;
173
64
  }
174
65
  get dialectName() {
175
66
  return 'snowflake';
@@ -228,18 +119,20 @@ class SnowflakeConnection extends connection_1.BaseConnection {
228
119
  await this.executor.batch('SELECT 1 as one');
229
120
  }
230
121
  async schemaFromTablePath(tablePath, structDef) {
231
- var _a, _b;
122
+ var _a, _b, _c, _d;
232
123
  const infoQuery = `DESCRIBE TABLE ${tablePath}`;
233
124
  const rows = await this.executor.batch(infoQuery);
234
- const variants = [];
125
+ const nestedColumns = [];
235
126
  const notVariant = new Map();
236
127
  for (const row of rows) {
237
128
  // data types look like `VARCHAR(1234)` or `NUMBER(10,2)`
238
129
  const fullType = row['type'].toLocaleLowerCase();
239
130
  const baseType = fullType.split('(')[0];
240
131
  const name = row['name'];
241
- if (['variant', 'array', 'object'].includes(baseType)) {
242
- variants.push(name);
132
+ if (baseType === 'variant' ||
133
+ baseType === 'array' ||
134
+ baseType === 'object') {
135
+ nestedColumns.push({ kind: baseType, name });
243
136
  }
244
137
  else {
245
138
  notVariant.set(name, true);
@@ -253,69 +146,144 @@ class SnowflakeConnection extends connection_1.BaseConnection {
253
146
  }
254
147
  }
255
148
  // VARIANT, ARRAY, and OBJECT columns don't have schema in metadata —
256
- // we have to sample actual data and inspect it to discover the structure.
257
- // This is inherently heuristic (we only look at 100 rows) and can be
258
- // slow on large partitioned tables or expensive views.
259
- if (variants.length > 0) {
260
- const variantArgs = variants.map(v => `'${v}', "${v}"`).join(', ');
261
- // Build the analysis query that flattens sampled rows and detects
262
- // the type of each leaf path. We only construct from variant columns
263
- // (not *) to avoid flattening the entire row on wide tables.
264
- // Paths with multiple types across the sample are dropped (HAVING
265
- // count(*) <= 1), and nulls are ignored.
149
+ // we have to sample actual data and inspect it to discover the
150
+ // structure. Cost control happens in two places:
151
+ // 1. project only the nested columns (via object_construct), so
152
+ // bytes-on-wire are bounded by actual variant content.
153
+ // 2. tier the sampling strategy by probeTableSize (see
154
+ // pickSampleStrategy) small base tables get a full scan;
155
+ // large base tables get TABLESAMPLE only (no unsafe LIMIT
156
+ // fallback); unknown-size sources (views, temp views) get
157
+ // the best-effort TABLESAMPLE→LIMIT chain.
158
+ if (nestedColumns.length > 0) {
159
+ const variantArgs = nestedColumns
160
+ .map(v => `'${v.name}', "${v.name}"`)
161
+ .join(', ');
162
+ // Flatten sampled rows and emit each distinct (path, type) pair.
163
+ // Conflicting pairs at the same path flow through to mergeShape,
164
+ // which collapses them to variant — that is how we honestly
165
+ // surface mixed-type fields to the user.
266
166
  const makeSampleQuery = (sampleClause) => `
267
- select path, min(type) as type
268
- from (
269
- select
270
- regexp_replace(path, '\\\\[[0-9]+\\\\]', '[*]') as path,
271
- case
272
- when typeof(value) = 'INTEGER' then 'decimal'
273
- when typeof(value) = 'DOUBLE' then 'decimal'
274
- else lower(typeof(value)) end as type
275
- from
276
- (${sampleClause})
277
- ,table(flatten(input => o, recursive => true)) as meta
278
- group by 1,2
279
- )
280
- where type != 'null_value'
281
- group BY 1
282
- having count(*) <=1
283
- order by path;
167
+ select
168
+ regexp_replace(path, '\\\\[[0-9]+\\\\]', '[*]') as path,
169
+ case
170
+ when typeof(value) = 'INTEGER' then 'decimal'
171
+ when typeof(value) = 'DOUBLE' then 'decimal'
172
+ else lower(typeof(value)) end as type
173
+ from
174
+ (${sampleClause})
175
+ ,table(flatten(input => o, recursive => true)) as meta
176
+ where typeof(value) != 'NULL_VALUE'
177
+ group by 1, 2
178
+ order by 1;
284
179
  `;
285
- const limitClause = `select object_construct(${variantArgs}) o` +
286
- ` from ${tablePath} limit 100`;
287
- // Try TABLESAMPLE first — it picks random micro-partitions without
288
- // scanning the whole table, which avoids the full-scan problem on
289
- // large partitioned tables. TABLESAMPLE only works on base tables,
290
- // not views, so if it fails we fall back to a plain LIMIT 100.
291
- const tablesampleClause = `select object_construct(${variantArgs}) o` +
292
- ` from ${tablePath} TABLESAMPLE BLOCK (1) limit 100`;
293
- const fieldPathRows = await this.runSchemaSample(makeSampleQuery(tablesampleClause), makeSampleQuery(limitClause));
180
+ const projectVariants = `select object_construct(${variantArgs}) o`;
181
+ const probe = await this.probeTableSize(tablePath);
182
+ const strategy = pickSampleStrategy(probe, this.schemaSampleFullScanMaxBytes);
183
+ const n = this.schemaSampleRowLimit;
184
+ let fieldPathRows;
185
+ if (strategy === 'full-scan-then-sample') {
186
+ // Small base table: one full scan catches rare fields that
187
+ // sampling would miss. tryBatch so a failure doesn't poison
188
+ // the pool connection (temp views live on it). On failure we
189
+ // fall through to the sample path so a slow or timed-out full
190
+ // scan still gets partial structure.
191
+ fieldPathRows =
192
+ (_a = (await this.executor.tryBatch(makeSampleQuery(`${projectVariants} from ${tablePath}`), {}, this.schemaSampleTimeoutMs))) !== null && _a !== void 0 ? _a : undefined;
193
+ }
294
194
  if (fieldPathRows === undefined) {
295
- // Both attempts failed or timed out treat variants as opaque.
296
- for (const name of variants) {
297
- structDef.fields.push({ type: 'sql native', rawType: 'variant', name });
195
+ const tablesampleQuery = makeSampleQuery(`${projectVariants} from ${tablePath} TABLESAMPLE BLOCK (1) limit ${n}`);
196
+ if (strategy === 'tablesample-only') {
197
+ // Known-large base table: TABLESAMPLE is safe (reads a few
198
+ // micro-partitions), plain LIMIT without a WHERE can be
199
+ // catastrophic on large partitioned tables. If TABLESAMPLE
200
+ // fails here we accept variant rather than risk an unbounded
201
+ // scan.
202
+ fieldPathRows =
203
+ (_b = (await this.executor.tryBatch(tablesampleQuery, {}, this.schemaSampleTimeoutMs))) !== null && _b !== void 0 ? _b : undefined;
204
+ }
205
+ else {
206
+ // Unknown size (view, temp view, non-parseable name) or
207
+ // full-scan fallback: best-effort TABLESAMPLE→LIMIT chain.
208
+ // The LIMIT fallback is the acknowledged "can't help" case
209
+ // for views over large partitioned tables.
210
+ fieldPathRows = await this.runSchemaSample(tablesampleQuery, makeSampleQuery(`${projectVariants} from ${tablePath} limit ${n}`));
298
211
  }
299
212
  }
300
- else {
301
- // Take the schema in list form and convert it into a tree.
302
- const rootObject = new SnowObject('__root__', this.dialect);
213
+ const state = (0, snowflake_variant_schema_1.createVariantSchemaState)();
214
+ // Snowflake nested-schema inference follows these rules:
215
+ // - top-level ARRAY/OBJECT from DESCRIBE are authoritative
216
+ // - descendant paths imply ancestor shape
217
+ // - conflicting shapes degrade only that prefix to variant
218
+ // - every top-level nested column still produces a field
219
+ for (const nestedColumn of nestedColumns) {
220
+ (0, snowflake_variant_schema_1.seedTopLevelShape)(state, nestedColumn);
221
+ }
222
+ if (fieldPathRows !== undefined) {
303
223
  for (const f of fieldPathRows) {
304
- const pathString = (_a = f['PATH']) === null || _a === void 0 ? void 0 : _a.valueOf().toString();
305
- const fieldType = (_b = f['TYPE']) === null || _b === void 0 ? void 0 : _b.valueOf().toString();
224
+ const pathString = (_c = f['PATH']) === null || _c === void 0 ? void 0 : _c.valueOf().toString();
225
+ const fieldType = (_d = f['TYPE']) === null || _d === void 0 ? void 0 : _d.valueOf().toString();
306
226
  if (pathString === undefined || fieldType === undefined)
307
227
  continue;
308
- const pathParser = new PathParser(pathString);
309
- const path = pathParser.pathChain();
310
- if ('name' in path && notVariant.get(path.name)) {
228
+ const pathParser = new snowflake_variant_schema_1.PathParser(pathString);
229
+ const segments = pathParser.segments();
230
+ const topLevel = segments[0];
231
+ if ((topLevel === null || topLevel === void 0 ? void 0 : topLevel.kind) !== 'name' || notVariant.get(topLevel.name)) {
311
232
  continue;
312
233
  }
313
- rootObject.walk(path, fieldType);
234
+ (0, snowflake_variant_schema_1.accumulateVariantPath)(state, segments, fieldType);
314
235
  }
315
- structDef.fields.push(...rootObject.fields);
236
+ }
237
+ // Always emit one field per top-level nested column from DESCRIBE, even
238
+ // if sampling produced no usable descendant paths.
239
+ for (const nestedColumn of nestedColumns) {
240
+ structDef.fields.push((0, snowflake_variant_schema_1.buildTopLevelField)(nestedColumn, state, this.dialect));
316
241
  }
317
242
  }
318
243
  }
244
+ /**
245
+ * Cheap metadata probe: ask INFORMATION_SCHEMA.TABLES for the row count
246
+ * and byte size of tablePath. Returns undefined when the name doesn't
247
+ * parse as a two- or three-part identifier, when the probe query fails,
248
+ * or when the row has no numeric BYTES (views and external tables
249
+ * typically report NULL).
250
+ *
251
+ * Two-part `schema.table` names use the current database's
252
+ * INFORMATION_SCHEMA; three-part `db.schema.table` names address
253
+ * INFORMATION_SCHEMA in the named database. Identifiers are parsed
254
+ * with Snowflake's quoting rules so bare parts case-fold to upper and
255
+ * quoted parts are compared verbatim against the catalog.
256
+ */
257
+ async probeTableSize(tablePath) {
258
+ var _a, _b;
259
+ const parsed = (0, snowflake_table_name_1.parseSnowflakeTableName)(tablePath);
260
+ if (parsed === undefined || parsed.schema === undefined)
261
+ return undefined;
262
+ const quoteLit = (s) => s.replace(/'/g, "''");
263
+ const dbQualifier = parsed.database ? `${parsed.database.sql}.` : '';
264
+ const rows = await this.executor.tryBatch(`select row_count as rc, bytes as by
265
+ from ${dbQualifier}information_schema.tables
266
+ where table_schema = '${quoteLit(parsed.schema.literal)}'
267
+ and table_name = '${quoteLit(parsed.table.literal)}'
268
+ limit 1`, {}, this.schemaSampleTimeoutMs);
269
+ if (!rows || rows.length === 0)
270
+ return undefined;
271
+ const row = rows[0];
272
+ const bytesRaw = (_a = row['BY']) !== null && _a !== void 0 ? _a : row['by'];
273
+ const rowsRaw = (_b = row['RC']) !== null && _b !== void 0 ? _b : row['rc'];
274
+ // Views and external tables surface null BYTES / ROW_COUNT; treat
275
+ // that as "unknown size" so we don't classify them as small and
276
+ // launch a full scan against something potentially huge.
277
+ if (bytesRaw === null || bytesRaw === undefined)
278
+ return undefined;
279
+ if (rowsRaw === null || rowsRaw === undefined)
280
+ return undefined;
281
+ const bytes = Number(bytesRaw);
282
+ const rowCount = Number(rowsRaw);
283
+ if (!Number.isFinite(bytes) || !Number.isFinite(rowCount))
284
+ return undefined;
285
+ return { bytes, rowCount };
286
+ }
319
287
  /**
320
288
  * Try to run a schema sampling query, with fallback.
321
289
  * First tries the primary query (e.g. using TABLESAMPLE for speed).
@@ -373,56 +341,4 @@ class SnowflakeConnection extends connection_1.BaseConnection {
373
341
  }
374
342
  }
375
343
  exports.SnowflakeConnection = SnowflakeConnection;
376
- class PathParser extends malloy_1.TinyParser {
377
- constructor(pathName) {
378
- super(pathName, {
379
- quoted: /^'(\\'|[^'])*'/,
380
- array_of: /^\[\*]/,
381
- char: /^[[.\]]/,
382
- number: /^\d+/,
383
- word: /^\w+/,
384
- });
385
- }
386
- getName() {
387
- const nameStart = this.next();
388
- if (nameStart.type === 'word') {
389
- return nameStart.text;
390
- }
391
- if (nameStart.type === '[') {
392
- const quotedName = this.next('quoted');
393
- this.next(']');
394
- return quotedName.text;
395
- }
396
- throw this.parseError('Expected column name');
397
- }
398
- pathChain() {
399
- const chain = { name: this.getName() };
400
- let node = chain;
401
- for (;;) {
402
- const sep = this.next();
403
- if (sep.type === 'eof') {
404
- return chain;
405
- }
406
- if (sep.type === '.') {
407
- node.next = { name: this.next('word').text };
408
- node = node.next;
409
- }
410
- else if (sep.type === 'array_of') {
411
- node.next = { arrayRef: true };
412
- node = node.next;
413
- }
414
- else if (sep.type === '[') {
415
- // Actually a dot access through a quoted name
416
- const quoted = this.next('quoted');
417
- node.next = { name: quoted.text };
418
- node = node.next;
419
- this.next(']');
420
- }
421
- else {
422
- throw this.parseError(`Unexpected ${sep.type}`);
423
- }
424
- }
425
- }
426
- }
427
- exports.PathParser = PathParser;
428
344
  //# sourceMappingURL=snowflake_connection.js.map