@malloydata/db-snowflake 0.0.374 → 0.0.376
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +29 -2
- package/dist/index.js.map +1 -1
- package/dist/snowflake_connection.d.ts +48 -13
- package/dist/snowflake_connection.js +146 -228
- package/dist/snowflake_connection.js.map +1 -1
- package/dist/snowflake_connection.spec.js +84 -14
- package/dist/snowflake_connection.spec.js.map +1 -1
- package/dist/snowflake_sample_strategy.spec.d.ts +1 -0
- package/dist/snowflake_sample_strategy.spec.js +25 -0
- package/dist/snowflake_sample_strategy.spec.js.map +1 -0
- package/dist/snowflake_variant_schema.d.ts +43 -0
- package/dist/snowflake_variant_schema.js +203 -0
- package/dist/snowflake_variant_schema.js.map +1 -0
- package/dist/snowflake_variant_schema.spec.d.ts +1 -0
- package/dist/snowflake_variant_schema.spec.js +150 -0
- package/dist/snowflake_variant_schema.spec.js.map +1 -0
- package/package.json +2 -2
- package/src/index.ts +34 -1
- package/src/snowflake_connection.spec.ts +88 -14
- package/src/snowflake_connection.ts +220 -262
- package/src/snowflake_sample_strategy.spec.ts +43 -0
- package/src/snowflake_variant_schema.spec.ts +188 -0
- package/src/snowflake_variant_schema.ts +301 -0
package/dist/index.js
CHANGED
|
@@ -30,7 +30,7 @@ const snowflake_connection_2 = require("./snowflake_connection");
|
|
|
30
30
|
(0, malloy_1.registerConnectionType)('snowflake', {
|
|
31
31
|
displayName: 'Snowflake',
|
|
32
32
|
factory: async (config) => {
|
|
33
|
-
const { name, is: _, setupSQL, timeoutMs, schemaSampleTimeoutMs, ...props } = config;
|
|
33
|
+
const { name, is: _, setupSQL, timeoutMs, schemaSampleTimeoutMs, schemaSampleRowLimit, schemaSampleFullScanMaxBytes, ...props } = config;
|
|
34
34
|
// ConnectionConfig values are trusted to match ConnectionOptions fields
|
|
35
35
|
// because the property definitions below declare matching names/types.
|
|
36
36
|
// The double cast bridges Malloy's generic config to snowflake-sdk's
|
|
@@ -50,6 +50,16 @@ const snowflake_connection_2 = require("./snowflake_connection");
|
|
|
50
50
|
: typeof schemaSampleTimeoutMs === 'string'
|
|
51
51
|
? parseInt(schemaSampleTimeoutMs, 10)
|
|
52
52
|
: undefined,
|
|
53
|
+
schemaSampleRowLimit: typeof schemaSampleRowLimit === 'number'
|
|
54
|
+
? schemaSampleRowLimit
|
|
55
|
+
: typeof schemaSampleRowLimit === 'string'
|
|
56
|
+
? parseInt(schemaSampleRowLimit, 10)
|
|
57
|
+
: undefined,
|
|
58
|
+
schemaSampleFullScanMaxBytes: typeof schemaSampleFullScanMaxBytes === 'number'
|
|
59
|
+
? schemaSampleFullScanMaxBytes
|
|
60
|
+
: typeof schemaSampleFullScanMaxBytes === 'string'
|
|
61
|
+
? parseInt(schemaSampleFullScanMaxBytes, 10)
|
|
62
|
+
: undefined,
|
|
53
63
|
});
|
|
54
64
|
},
|
|
55
65
|
properties: [
|
|
@@ -91,13 +101,30 @@ const snowflake_connection_2 = require("./snowflake_connection");
|
|
|
91
101
|
displayName: 'Timeout (ms)',
|
|
92
102
|
type: 'number',
|
|
93
103
|
optional: true,
|
|
104
|
+
default: 600000,
|
|
94
105
|
},
|
|
95
106
|
{
|
|
96
107
|
name: 'schemaSampleTimeoutMs',
|
|
97
108
|
displayName: 'Schema Sample Timeout (ms)',
|
|
98
109
|
type: 'number',
|
|
99
110
|
optional: true,
|
|
100
|
-
|
|
111
|
+
default: 15000,
|
|
112
|
+
description: 'Timeout for the query that samples variant columns to detect their schema.',
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
name: 'schemaSampleRowLimit',
|
|
116
|
+
displayName: 'Schema Sample Row Limit',
|
|
117
|
+
type: 'number',
|
|
118
|
+
optional: true,
|
|
119
|
+
default: 1000,
|
|
120
|
+
description: 'Row limit for the variant schema sample. Ignored for tables small enough to full-scan.',
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
name: 'schemaSampleFullScanMaxBytes',
|
|
124
|
+
displayName: 'Schema Full-Scan Max Bytes',
|
|
125
|
+
type: 'number',
|
|
126
|
+
optional: true,
|
|
127
|
+
description: 'Tables with BYTES at or below this value are full-scanned during variant schema inference instead of sampled. When unset, the connection uses an internal threshold; picking a value here is a policy choice tied to the size-probe behavior.',
|
|
101
128
|
},
|
|
102
129
|
{
|
|
103
130
|
name: 'setupSQL',
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AAEH,+DAA2D;AAAnD,2HAAA,mBAAmB,OAAA;AAE3B,+CAA0D;AAG1D,iEAA2D;AAE3D,IAAA,+BAAsB,EAAC,WAAW,EAAE;IAClC,WAAW,EAAE,WAAW;IACxB,OAAO,EAAE,KAAK,EAAE,MAAwB,EAAE,EAAE;QAC1C,MAAM,EACJ,IAAI,EACJ,EAAE,EAAE,CAAC,EACL,QAAQ,EACR,SAAS,EACT,qBAAqB,EACrB,GAAG,KAAK,EACT,GAAG,MAAM,CAAC;QACX,wEAAwE;QACxE,uEAAuE;QACvE,qEAAqE;QACrE,mEAAmE;QACnE,2BAA2B;QAC3B,MAAM,WAAW,GAAG,KAAqC,CAAC;QAC1D,OAAO,IAAI,0CAAmB,CAAC,IAAI,EAAE;YACnC,WAAW;YACX,QAAQ,EAAE,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;YAC7D,SAAS,EACP,OAAO,SAAS,KAAK,QAAQ;gBAC3B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,SAAS,KAAK,QAAQ;oBAC7B,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC;oBACzB,CAAC,CAAC,SAAS;YACjB,qBAAqB,EACnB,OAAO,qBAAqB,KAAK,QAAQ;gBACvC,CAAC,CAAC,qBAAqB;gBACvB,CAAC,CAAC,OAAO,qBAAqB,KAAK,QAAQ;oBACzC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,CAAC;oBACrC,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,UAAU,EAAE;QACV,EAAC,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAC;QACzD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,UAAU;YACvB,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACnE;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E,EAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACvE;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,kBAAkB;YAC/B,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE;gBACX,mBAAmB,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC;gBAC1C,WAAW,EAAE,CAAC,GAAG,CAAC;aACnB;SACF;QACD;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,wBAAwB;YACrC,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,cAAc;YAC3B,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AAEH,+DAA2D;AAAnD,2HAAA,mBAAmB,OAAA;AAE3B,+CAA0D;AAG1D,iEAA2D;AAE3D,IAAA,+BAAsB,EAAC,WAAW,EAAE;IAClC,WAAW,EAAE,WAAW;IACxB,OAAO,EAAE,KAAK,EAAE,MAAwB,EAAE,EAAE;QAC1C,MAAM,EACJ,IAAI,EACJ,EAAE,EAAE,CAAC,EACL,QAAQ,EACR,SAAS,EACT,qBAAqB,EACrB,oBAAoB,EACpB,4BAA4B,EAC5B,GAAG,KAAK,EACT,GAAG,MAAM,CAAC;QACX,wEAAwE;QACxE,uEAAuE;QACvE,qEAAqE;QACrE,mEAAmE;QACnE,2BAA2B;QAC3B,MAAM,WAAW,GAAG,KAAqC,CAAC;QAC1D,OAAO,IAAI,0CAAmB,CAAC,IAAI,EAAE;YACnC,WAAW;YACX,QAAQ,EAAE,OAAO,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;YAC7D,SAAS,EACP,OAAO,SAAS,KAAK,QAAQ;gBAC3B,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,OAAO,SAAS,KAAK,QAAQ;oBAC7B,CAAC,CAAC,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC;oBACzB,CAAC,CAAC,SAAS;YACjB,qBAAqB,EACnB,OAAO,qBAAqB,KAAK,QAAQ;gBACvC,CAAC,CAAC,qBAAqB;gBACvB,CAAC,CAAC,OAAO,qBAAqB,KAAK,QAAQ;oBACzC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,EAAE,EAAE,CAAC;oBACrC,CAAC,CAAC,SAAS;YACjB,oBAAoB,EAClB,OAAO,oBAAoB,KAAK,QAAQ;gBACtC,CAAC,CAAC,oBAAoB;gBACtB,CAAC,CAAC,OAAO,oBAAoB,KAAK,QAAQ;oBACxC,CAAC,CAAC,QAAQ,CAAC,oBAAoB,EAAE,EAAE,CAAC;oBACpC,CAAC,CAAC,SAAS;YACjB,4BAA4B,EAC1B,OAAO,4BAA4B,KAAK,QAAQ;gBAC9C,CAAC,CAAC,4BAA4B;gBAC9B,CAAC,CAAC,OAAO,4BAA4B,KAAK,QAAQ;oBAChD,CAAC,CAAC,QAAQ,CAAC,4BAA4B,EAAE,EAAE,CAAC;oBAC5C,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IACD,UAAU,EAAE;QACV,EAAC,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,SAAS,EAAE,IAAI,EAAE,QAAQ,EAAC;QACzD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,UAAU;YACvB,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACnE;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;SACf;QACD,EAAC,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QAC3E,EAAC,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAC;QACvE;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,kBAAkB;YAC/B,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE;gBACX,mBAAmB,EAAE,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC;gBAC1C,WAAW,EAAE,CAAC,GAAG,CAAC;aACnB;SACF;QACD;YACE,IAAI,EAAE,gBAAgB;YACtB,WAAW,EAAE,wBAAwB;YACrC,IAAI,EAAE,UAAU;YAChB,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,WAAW;YACjB,WAAW,EAAE,cAAc;YAC3B,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,MAAM;SAChB;QACD;YACE,IAAI,EAAE,uBAAuB;YAC7B,WAAW,EAAE,4BAA4B;YACzC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,KAAK;YACd,WAAW,EACT,4EAA4E;SAC/E;QACD;YACE,IAAI,EAAE,sBAAsB;YAC5B,WAAW,EAAE,yBAAyB;YACtC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,IAAI;YACb,WAAW,EACT,wFAAwF;SAC3F;QACD;YACE,IAAI,EAAE,8BAA8B;YACpC,WAAW,EAAE,4BAA4B;YACzC,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,IAAI;YACd,WAAW,EACT,+OAA+O;SAClP;QACD;YACE,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,WAAW;YACxB,IAAI,EAAE,MAAM;YACZ,QAAQ,EAAE,IAAI;YACd,WAAW,EAAE,0DAA0D;SACxE;KACF;CACF,CAAC,CAAC"}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import type { RunSQLOptions, MalloyQueryData, QueryRunStats, Connection, PersistSQLResults, StreamingConnection, PooledConnection, SQLSourceDef, TableSourceDef, QueryRecord, TestableConnection, SQLSourceRequest } from '@malloydata/malloy';
|
|
2
|
-
import { TinyParser } from '@malloydata/malloy';
|
|
3
2
|
import { BaseConnection } from '@malloydata/malloy/connection';
|
|
4
3
|
import type { ConnectionOptions } from 'snowflake-sdk';
|
|
5
4
|
import type { Options as PoolOptions } from 'generic-pool';
|
|
@@ -7,6 +6,36 @@ type namespace = {
|
|
|
7
6
|
database: string;
|
|
8
7
|
schema: string;
|
|
9
8
|
};
|
|
9
|
+
/**
|
|
10
|
+
* Output of the INFORMATION_SCHEMA.TABLES probe. Undefined when the
|
|
11
|
+
* probe didn't run (non-parseable name) or couldn't find numeric size
|
|
12
|
+
* info (views, missing permissions).
|
|
13
|
+
*/
|
|
14
|
+
export interface TableSizeProbe {
|
|
15
|
+
bytes: number;
|
|
16
|
+
rowCount: number;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Three-way tier that drives variant schema sampling. Extracted as a
|
|
20
|
+
* pure function so cost-policy decisions are unit-testable.
|
|
21
|
+
*
|
|
22
|
+
* full-scan-then-sample: probe confirmed a small base table. One
|
|
23
|
+
* full scan catches rare fields. On failure, fall through to the
|
|
24
|
+
* sample chain rather than accept opaque variant.
|
|
25
|
+
*
|
|
26
|
+
* tablesample-only: probe confirmed a base table above the small
|
|
27
|
+
* threshold. TABLESAMPLE BLOCK is safe (reads a few micro
|
|
28
|
+
* partitions). Plain LIMIT without a WHERE is unsafe on large
|
|
29
|
+
* partitioned tables, so we skip the LIMIT fallback — we'd rather
|
|
30
|
+
* degrade to variant than issue a runaway query.
|
|
31
|
+
*
|
|
32
|
+
* tablesample-then-limit: probe gave no size info (views, temp
|
|
33
|
+
* views, exotic names). We can't distinguish a small view from a
|
|
34
|
+
* view over a petabyte table, so we do best-effort sampling. This
|
|
35
|
+
* is the acknowledged "can't help you" case from the design doc.
|
|
36
|
+
*/
|
|
37
|
+
export type SampleStrategy = 'full-scan-then-sample' | 'tablesample-only' | 'tablesample-then-limit';
|
|
38
|
+
export declare function pickSampleStrategy(probe: TableSizeProbe | undefined, fullScanMaxBytes: number): SampleStrategy;
|
|
10
39
|
export interface SnowflakeConnectionOptions {
|
|
11
40
|
connOptions?: ConnectionOptions;
|
|
12
41
|
poolOptions?: PoolOptions;
|
|
@@ -14,15 +43,10 @@ export interface SnowflakeConnectionOptions {
|
|
|
14
43
|
queryOptions?: RunSQLOptions;
|
|
15
44
|
timeoutMs?: number;
|
|
16
45
|
schemaSampleTimeoutMs?: number;
|
|
46
|
+
schemaSampleRowLimit?: number;
|
|
47
|
+
schemaSampleFullScanMaxBytes?: number;
|
|
17
48
|
setupSQL?: string;
|
|
18
49
|
}
|
|
19
|
-
type PathChain = {
|
|
20
|
-
arrayRef: true;
|
|
21
|
-
next?: PathChain;
|
|
22
|
-
} | {
|
|
23
|
-
name: string;
|
|
24
|
-
next?: PathChain;
|
|
25
|
-
};
|
|
26
50
|
export declare class SnowflakeConnection extends BaseConnection implements Connection, PooledConnection, PersistSQLResults, StreamingConnection, TestableConnection {
|
|
27
51
|
readonly name: string;
|
|
28
52
|
private readonly dialect;
|
|
@@ -32,6 +56,8 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
|
|
|
32
56
|
private queryOptions;
|
|
33
57
|
private timeoutMs;
|
|
34
58
|
private schemaSampleTimeoutMs;
|
|
59
|
+
private schemaSampleRowLimit;
|
|
60
|
+
private schemaSampleFullScanMaxBytes;
|
|
35
61
|
private setupSQL;
|
|
36
62
|
constructor(name: string, options?: SnowflakeConnectionOptions);
|
|
37
63
|
get dialectName(): string;
|
|
@@ -48,6 +74,20 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
|
|
|
48
74
|
runSQLStream(sqlCommand: string, options?: RunSQLOptions): AsyncIterableIterator<QueryRecord>;
|
|
49
75
|
test(): Promise<void>;
|
|
50
76
|
private schemaFromTablePath;
|
|
77
|
+
/**
|
|
78
|
+
* Cheap metadata probe: ask INFORMATION_SCHEMA.TABLES for the row count
|
|
79
|
+
* and byte size of tablePath. Returns undefined when the name doesn't
|
|
80
|
+
* parse as a two- or three-part identifier (temp views, exotic quoted
|
|
81
|
+
* names), when the probe query fails, or when the row has no numeric
|
|
82
|
+
* BYTES (views and external tables typically report NULL).
|
|
83
|
+
*
|
|
84
|
+
* Two-part `schema.table` names use the current database's
|
|
85
|
+
* INFORMATION_SCHEMA; three-part `db.schema.table` names address
|
|
86
|
+
* INFORMATION_SCHEMA in the named database. Identifier parts are
|
|
87
|
+
* validated against a strict regex before interpolation; values that
|
|
88
|
+
* don't match cause the probe to skip.
|
|
89
|
+
*/
|
|
90
|
+
private probeTableSize;
|
|
51
91
|
/**
|
|
52
92
|
* Try to run a schema sampling query, with fallback.
|
|
53
93
|
* First tries the primary query (e.g. using TABLESAMPLE for speed).
|
|
@@ -64,9 +104,4 @@ export declare class SnowflakeConnection extends BaseConnection implements Conne
|
|
|
64
104
|
fetchSelectSchema(sqlRef: SQLSourceRequest): Promise<SQLSourceDef>;
|
|
65
105
|
manifestTemporaryTable(sqlCommand: string): Promise<string>;
|
|
66
106
|
}
|
|
67
|
-
export declare class PathParser extends TinyParser {
|
|
68
|
-
constructor(pathName: string);
|
|
69
|
-
getName(): string;
|
|
70
|
-
pathChain(): PathChain;
|
|
71
|
-
}
|
|
72
107
|
export {};
|
|
@@ -22,131 +22,18 @@
|
|
|
22
22
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
23
23
|
*/
|
|
24
24
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
25
|
-
exports.
|
|
25
|
+
exports.SnowflakeConnection = void 0;
|
|
26
|
+
exports.pickSampleStrategy = pickSampleStrategy;
|
|
26
27
|
const malloy_1 = require("@malloydata/malloy");
|
|
27
28
|
const connection_1 = require("@malloydata/malloy/connection");
|
|
28
29
|
const snowflake_executor_1 = require("./snowflake_executor");
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
return {
|
|
37
|
-
...this.dialect.sqlTypeToMalloyType(this.type),
|
|
38
|
-
name: this.name,
|
|
39
|
-
};
|
|
40
|
-
}
|
|
41
|
-
walk(_path, _fieldType) {
|
|
42
|
-
throw new Error('SNOWWFLAKE SCHEMA PARSE ERROR: Should not walk through fields');
|
|
43
|
-
}
|
|
44
|
-
static make(name, fieldType, d) {
|
|
45
|
-
if (fieldType === 'array') {
|
|
46
|
-
return new SnowArray(name, d);
|
|
47
|
-
}
|
|
48
|
-
else if (fieldType === 'object') {
|
|
49
|
-
return new SnowObject(name, d);
|
|
50
|
-
}
|
|
51
|
-
return new SnowField(name, fieldType, d);
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
class SnowObject extends SnowField {
|
|
55
|
-
constructor(name, d) {
|
|
56
|
-
super(name, 'object', d);
|
|
57
|
-
this.fieldMap = new Map();
|
|
58
|
-
}
|
|
59
|
-
get fields() {
|
|
60
|
-
const fields = [];
|
|
61
|
-
for (const [_, fieldObj] of this.fieldMap) {
|
|
62
|
-
fields.push(fieldObj.fieldDef());
|
|
63
|
-
}
|
|
64
|
-
return fields;
|
|
65
|
-
}
|
|
66
|
-
fieldDef() {
|
|
67
|
-
const rec = {
|
|
68
|
-
type: 'record',
|
|
69
|
-
name: this.name,
|
|
70
|
-
fields: this.fields,
|
|
71
|
-
join: 'one',
|
|
72
|
-
};
|
|
73
|
-
return rec;
|
|
74
|
-
}
|
|
75
|
-
walk(path, fieldType) {
|
|
76
|
-
if ('name' in path) {
|
|
77
|
-
const field = this.fieldMap.get(path.name);
|
|
78
|
-
if (path.next) {
|
|
79
|
-
if (field instanceof SnowObject || field instanceof SnowArray) {
|
|
80
|
-
field.walk(path.next, fieldType);
|
|
81
|
-
return;
|
|
82
|
-
}
|
|
83
|
-
// Field is missing or is a scalar leaf — the variant data has
|
|
84
|
-
// inconsistent structure across rows. Degrade to opaque variant.
|
|
85
|
-
this.fieldMap.set(path.name, new SnowField(path.name, 'variant', this.dialect));
|
|
86
|
-
return;
|
|
87
|
-
}
|
|
88
|
-
else {
|
|
89
|
-
if (!field) {
|
|
90
|
-
this.fieldMap.set(path.name, SnowField.make(path.name, fieldType, this.dialect));
|
|
91
|
-
return;
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
return;
|
|
95
|
-
}
|
|
96
|
-
// Array reference in an object context — inconsistent structure.
|
|
97
|
-
// Ignore this path; the object keeps whatever fields it already has.
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
class SnowArray extends SnowField {
|
|
101
|
-
constructor(name, d) {
|
|
102
|
-
super(name, 'array', d);
|
|
103
|
-
this.arrayOf = 'unknown';
|
|
104
|
-
}
|
|
105
|
-
isArrayOf(type) {
|
|
106
|
-
if (this.arrayOf !== 'unknown') {
|
|
107
|
-
this.arrayOf = 'variant';
|
|
108
|
-
return;
|
|
109
|
-
}
|
|
110
|
-
this.arrayOf = type;
|
|
111
|
-
if (type === 'object') {
|
|
112
|
-
this.objectChild = new SnowObject('', this.dialect);
|
|
113
|
-
}
|
|
114
|
-
else if (type === 'array') {
|
|
115
|
-
this.arrayChild = new SnowArray('', this.dialect);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
fieldDef() {
|
|
119
|
-
if (this.objectChild) {
|
|
120
|
-
const t = (0, malloy_1.mkArrayDef)({ type: 'record', fields: this.objectChild.fields }, this.name);
|
|
121
|
-
return t;
|
|
122
|
-
}
|
|
123
|
-
if (this.arrayChild) {
|
|
124
|
-
return (0, malloy_1.mkArrayDef)(this.arrayChild.fieldDef(), this.name);
|
|
125
|
-
}
|
|
126
|
-
return (0, malloy_1.mkArrayDef)(this.dialect.sqlTypeToMalloyType(this.arrayOf), this.name);
|
|
127
|
-
}
|
|
128
|
-
walk(path, fieldType) {
|
|
129
|
-
if ('arrayRef' in path) {
|
|
130
|
-
if (path.next) {
|
|
131
|
-
const next = this.arrayChild || this.objectChild;
|
|
132
|
-
if (next) {
|
|
133
|
-
next.walk(path.next, fieldType);
|
|
134
|
-
return;
|
|
135
|
-
}
|
|
136
|
-
// Array elements were scalars but now we see deeper structure —
|
|
137
|
-
// inconsistent variant data. Degrade to variant array.
|
|
138
|
-
this.arrayOf = 'variant';
|
|
139
|
-
return;
|
|
140
|
-
}
|
|
141
|
-
else {
|
|
142
|
-
this.isArrayOf(fieldType);
|
|
143
|
-
return;
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
// Name reference in an array context — inconsistent structure.
|
|
147
|
-
// Degrade to variant array.
|
|
148
|
-
this.arrayOf = 'variant';
|
|
149
|
-
}
|
|
30
|
+
const snowflake_variant_schema_1 = require("./snowflake_variant_schema");
|
|
31
|
+
function pickSampleStrategy(probe, fullScanMaxBytes) {
|
|
32
|
+
if (probe === undefined)
|
|
33
|
+
return 'tablesample-then-limit';
|
|
34
|
+
if (probe.bytes <= fullScanMaxBytes)
|
|
35
|
+
return 'full-scan-then-sample';
|
|
36
|
+
return 'tablesample-only';
|
|
150
37
|
}
|
|
151
38
|
/**
|
|
152
39
|
* Default statement timeoutMs value, 10 Mins
|
|
@@ -154,7 +41,7 @@ class SnowArray extends SnowField {
|
|
|
154
41
|
const TIMEOUT_MS = 1000 * 60 * 10;
|
|
155
42
|
class SnowflakeConnection extends connection_1.BaseConnection {
|
|
156
43
|
constructor(name, options) {
|
|
157
|
-
var _a, _b, _c;
|
|
44
|
+
var _a, _b, _c, _d, _e;
|
|
158
45
|
super();
|
|
159
46
|
this.name = name;
|
|
160
47
|
this.dialect = new malloy_1.SnowflakeDialect();
|
|
@@ -170,6 +57,9 @@ class SnowflakeConnection extends connection_1.BaseConnection {
|
|
|
170
57
|
this.queryOptions = (_a = options === null || options === void 0 ? void 0 : options.queryOptions) !== null && _a !== void 0 ? _a : {};
|
|
171
58
|
this.timeoutMs = (_b = options === null || options === void 0 ? void 0 : options.timeoutMs) !== null && _b !== void 0 ? _b : TIMEOUT_MS;
|
|
172
59
|
this.schemaSampleTimeoutMs = (_c = options === null || options === void 0 ? void 0 : options.schemaSampleTimeoutMs) !== null && _c !== void 0 ? _c : 15000;
|
|
60
|
+
this.schemaSampleRowLimit = (_d = options === null || options === void 0 ? void 0 : options.schemaSampleRowLimit) !== null && _d !== void 0 ? _d : 1000;
|
|
61
|
+
this.schemaSampleFullScanMaxBytes =
|
|
62
|
+
(_e = options === null || options === void 0 ? void 0 : options.schemaSampleFullScanMaxBytes) !== null && _e !== void 0 ? _e : 100000000;
|
|
173
63
|
}
|
|
174
64
|
get dialectName() {
|
|
175
65
|
return 'snowflake';
|
|
@@ -228,18 +118,20 @@ class SnowflakeConnection extends connection_1.BaseConnection {
|
|
|
228
118
|
await this.executor.batch('SELECT 1 as one');
|
|
229
119
|
}
|
|
230
120
|
async schemaFromTablePath(tablePath, structDef) {
|
|
231
|
-
var _a, _b;
|
|
121
|
+
var _a, _b, _c, _d;
|
|
232
122
|
const infoQuery = `DESCRIBE TABLE ${tablePath}`;
|
|
233
123
|
const rows = await this.executor.batch(infoQuery);
|
|
234
|
-
const
|
|
124
|
+
const nestedColumns = [];
|
|
235
125
|
const notVariant = new Map();
|
|
236
126
|
for (const row of rows) {
|
|
237
127
|
// data types look like `VARCHAR(1234)` or `NUMBER(10,2)`
|
|
238
128
|
const fullType = row['type'].toLocaleLowerCase();
|
|
239
129
|
const baseType = fullType.split('(')[0];
|
|
240
130
|
const name = row['name'];
|
|
241
|
-
if (
|
|
242
|
-
|
|
131
|
+
if (baseType === 'variant' ||
|
|
132
|
+
baseType === 'array' ||
|
|
133
|
+
baseType === 'object') {
|
|
134
|
+
nestedColumns.push({ kind: baseType, name });
|
|
243
135
|
}
|
|
244
136
|
else {
|
|
245
137
|
notVariant.set(name, true);
|
|
@@ -253,69 +145,147 @@ class SnowflakeConnection extends connection_1.BaseConnection {
|
|
|
253
145
|
}
|
|
254
146
|
}
|
|
255
147
|
// VARIANT, ARRAY, and OBJECT columns don't have schema in metadata —
|
|
256
|
-
// we have to sample actual data and inspect it to discover the
|
|
257
|
-
//
|
|
258
|
-
//
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
148
|
+
// we have to sample actual data and inspect it to discover the
|
|
149
|
+
// structure. Cost control happens in two places:
|
|
150
|
+
// 1. project only the nested columns (via object_construct), so
|
|
151
|
+
// bytes-on-wire are bounded by actual variant content.
|
|
152
|
+
// 2. tier the sampling strategy by probeTableSize (see
|
|
153
|
+
// pickSampleStrategy) — small base tables get a full scan;
|
|
154
|
+
// large base tables get TABLESAMPLE only (no unsafe LIMIT
|
|
155
|
+
// fallback); unknown-size sources (views, temp views) get
|
|
156
|
+
// the best-effort TABLESAMPLE→LIMIT chain.
|
|
157
|
+
if (nestedColumns.length > 0) {
|
|
158
|
+
const variantArgs = nestedColumns
|
|
159
|
+
.map(v => `'${v.name}', "${v.name}"`)
|
|
160
|
+
.join(', ');
|
|
161
|
+
// Flatten sampled rows and emit each distinct (path, type) pair.
|
|
162
|
+
// Conflicting pairs at the same path flow through to mergeShape,
|
|
163
|
+
// which collapses them to variant — that is how we honestly
|
|
164
|
+
// surface mixed-type fields to the user.
|
|
266
165
|
const makeSampleQuery = (sampleClause) => `
|
|
267
|
-
select
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
)
|
|
280
|
-
where type != 'null_value'
|
|
281
|
-
group BY 1
|
|
282
|
-
having count(*) <=1
|
|
283
|
-
order by path;
|
|
166
|
+
select
|
|
167
|
+
regexp_replace(path, '\\\\[[0-9]+\\\\]', '[*]') as path,
|
|
168
|
+
case
|
|
169
|
+
when typeof(value) = 'INTEGER' then 'decimal'
|
|
170
|
+
when typeof(value) = 'DOUBLE' then 'decimal'
|
|
171
|
+
else lower(typeof(value)) end as type
|
|
172
|
+
from
|
|
173
|
+
(${sampleClause})
|
|
174
|
+
,table(flatten(input => o, recursive => true)) as meta
|
|
175
|
+
where typeof(value) != 'NULL_VALUE'
|
|
176
|
+
group by 1, 2
|
|
177
|
+
order by 1;
|
|
284
178
|
`;
|
|
285
|
-
const
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
179
|
+
const projectVariants = `select object_construct(${variantArgs}) o`;
|
|
180
|
+
const probe = await this.probeTableSize(tablePath);
|
|
181
|
+
const strategy = pickSampleStrategy(probe, this.schemaSampleFullScanMaxBytes);
|
|
182
|
+
const n = this.schemaSampleRowLimit;
|
|
183
|
+
let fieldPathRows;
|
|
184
|
+
if (strategy === 'full-scan-then-sample') {
|
|
185
|
+
// Small base table: one full scan catches rare fields that
|
|
186
|
+
// sampling would miss. tryBatch so a failure doesn't poison
|
|
187
|
+
// the pool connection (temp views live on it). On failure we
|
|
188
|
+
// fall through to the sample path so a slow or timed-out full
|
|
189
|
+
// scan still gets partial structure.
|
|
190
|
+
fieldPathRows =
|
|
191
|
+
(_a = (await this.executor.tryBatch(makeSampleQuery(`${projectVariants} from ${tablePath}`), {}, this.schemaSampleTimeoutMs))) !== null && _a !== void 0 ? _a : undefined;
|
|
192
|
+
}
|
|
294
193
|
if (fieldPathRows === undefined) {
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
194
|
+
const tablesampleQuery = makeSampleQuery(`${projectVariants} from ${tablePath} TABLESAMPLE BLOCK (1) limit ${n}`);
|
|
195
|
+
if (strategy === 'tablesample-only') {
|
|
196
|
+
// Known-large base table: TABLESAMPLE is safe (reads a few
|
|
197
|
+
// micro-partitions), plain LIMIT without a WHERE can be
|
|
198
|
+
// catastrophic on large partitioned tables. If TABLESAMPLE
|
|
199
|
+
// fails here we accept variant rather than risk an unbounded
|
|
200
|
+
// scan.
|
|
201
|
+
fieldPathRows =
|
|
202
|
+
(_b = (await this.executor.tryBatch(tablesampleQuery, {}, this.schemaSampleTimeoutMs))) !== null && _b !== void 0 ? _b : undefined;
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
// Unknown size (view, temp view, non-parseable name) or
|
|
206
|
+
// full-scan fallback: best-effort TABLESAMPLE→LIMIT chain.
|
|
207
|
+
// The LIMIT fallback is the acknowledged "can't help" case
|
|
208
|
+
// for views over large partitioned tables.
|
|
209
|
+
fieldPathRows = await this.runSchemaSample(tablesampleQuery, makeSampleQuery(`${projectVariants} from ${tablePath} limit ${n}`));
|
|
298
210
|
}
|
|
299
211
|
}
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
212
|
+
const state = (0, snowflake_variant_schema_1.createVariantSchemaState)();
|
|
213
|
+
// Snowflake nested-schema inference follows these rules:
|
|
214
|
+
// - top-level ARRAY/OBJECT from DESCRIBE are authoritative
|
|
215
|
+
// - descendant paths imply ancestor shape
|
|
216
|
+
// - conflicting shapes degrade only that prefix to variant
|
|
217
|
+
// - every top-level nested column still produces a field
|
|
218
|
+
for (const nestedColumn of nestedColumns) {
|
|
219
|
+
(0, snowflake_variant_schema_1.seedTopLevelShape)(state, nestedColumn);
|
|
220
|
+
}
|
|
221
|
+
if (fieldPathRows !== undefined) {
|
|
303
222
|
for (const f of fieldPathRows) {
|
|
304
|
-
const pathString = (
|
|
305
|
-
const fieldType = (
|
|
223
|
+
const pathString = (_c = f['PATH']) === null || _c === void 0 ? void 0 : _c.valueOf().toString();
|
|
224
|
+
const fieldType = (_d = f['TYPE']) === null || _d === void 0 ? void 0 : _d.valueOf().toString();
|
|
306
225
|
if (pathString === undefined || fieldType === undefined)
|
|
307
226
|
continue;
|
|
308
|
-
const pathParser = new PathParser(pathString);
|
|
309
|
-
const
|
|
310
|
-
|
|
227
|
+
const pathParser = new snowflake_variant_schema_1.PathParser(pathString);
|
|
228
|
+
const segments = pathParser.segments();
|
|
229
|
+
const topLevel = segments[0];
|
|
230
|
+
if ((topLevel === null || topLevel === void 0 ? void 0 : topLevel.kind) !== 'name' || notVariant.get(topLevel.name)) {
|
|
311
231
|
continue;
|
|
312
232
|
}
|
|
313
|
-
|
|
233
|
+
(0, snowflake_variant_schema_1.accumulateVariantPath)(state, segments, fieldType);
|
|
314
234
|
}
|
|
315
|
-
|
|
235
|
+
}
|
|
236
|
+
// Always emit one field per top-level nested column from DESCRIBE, even
|
|
237
|
+
// if sampling produced no usable descendant paths.
|
|
238
|
+
for (const nestedColumn of nestedColumns) {
|
|
239
|
+
structDef.fields.push((0, snowflake_variant_schema_1.buildTopLevelField)(nestedColumn, state, this.dialect));
|
|
316
240
|
}
|
|
317
241
|
}
|
|
318
242
|
}
|
|
243
|
+
/**
|
|
244
|
+
* Cheap metadata probe: ask INFORMATION_SCHEMA.TABLES for the row count
|
|
245
|
+
* and byte size of tablePath. Returns undefined when the name doesn't
|
|
246
|
+
* parse as a two- or three-part identifier (temp views, exotic quoted
|
|
247
|
+
* names), when the probe query fails, or when the row has no numeric
|
|
248
|
+
* BYTES (views and external tables typically report NULL).
|
|
249
|
+
*
|
|
250
|
+
* Two-part `schema.table` names use the current database's
|
|
251
|
+
* INFORMATION_SCHEMA; three-part `db.schema.table` names address
|
|
252
|
+
* INFORMATION_SCHEMA in the named database. Identifier parts are
|
|
253
|
+
* validated against a strict regex before interpolation; values that
|
|
254
|
+
* don't match cause the probe to skip.
|
|
255
|
+
*/
|
|
256
|
+
async probeTableSize(tablePath) {
|
|
257
|
+
var _a, _b;
|
|
258
|
+
const parts = tablePath.split('.');
|
|
259
|
+
if (parts.length !== 2 && parts.length !== 3)
|
|
260
|
+
return undefined;
|
|
261
|
+
const identifier = /^[A-Za-z_][A-Za-z0-9_$]*$/;
|
|
262
|
+
if (!parts.every(p => identifier.test(p)))
|
|
263
|
+
return undefined;
|
|
264
|
+
const [db, schema, table] = parts.length === 3 ? parts : [undefined, parts[0], parts[1]];
|
|
265
|
+
const dbQualifier = db !== undefined ? `${db}.` : '';
|
|
266
|
+
const rows = await this.executor.tryBatch(`select row_count as rc, bytes as by
|
|
267
|
+
from ${dbQualifier}information_schema.tables
|
|
268
|
+
where upper(table_schema) = upper('${schema}')
|
|
269
|
+
and upper(table_name) = upper('${table}')
|
|
270
|
+
limit 1`, {}, this.schemaSampleTimeoutMs);
|
|
271
|
+
if (!rows || rows.length === 0)
|
|
272
|
+
return undefined;
|
|
273
|
+
const row = rows[0];
|
|
274
|
+
const bytesRaw = (_a = row['BY']) !== null && _a !== void 0 ? _a : row['by'];
|
|
275
|
+
const rowsRaw = (_b = row['RC']) !== null && _b !== void 0 ? _b : row['rc'];
|
|
276
|
+
// Views and external tables surface null BYTES / ROW_COUNT; treat
|
|
277
|
+
// that as "unknown size" so we don't classify them as small and
|
|
278
|
+
// launch a full scan against something potentially huge.
|
|
279
|
+
if (bytesRaw === null || bytesRaw === undefined)
|
|
280
|
+
return undefined;
|
|
281
|
+
if (rowsRaw === null || rowsRaw === undefined)
|
|
282
|
+
return undefined;
|
|
283
|
+
const bytes = Number(bytesRaw);
|
|
284
|
+
const rowCount = Number(rowsRaw);
|
|
285
|
+
if (!Number.isFinite(bytes) || !Number.isFinite(rowCount))
|
|
286
|
+
return undefined;
|
|
287
|
+
return { bytes, rowCount };
|
|
288
|
+
}
|
|
319
289
|
/**
|
|
320
290
|
* Try to run a schema sampling query, with fallback.
|
|
321
291
|
* First tries the primary query (e.g. using TABLESAMPLE for speed).
|
|
@@ -373,56 +343,4 @@ class SnowflakeConnection extends connection_1.BaseConnection {
|
|
|
373
343
|
}
|
|
374
344
|
}
|
|
375
345
|
exports.SnowflakeConnection = SnowflakeConnection;
|
|
376
|
-
class PathParser extends malloy_1.TinyParser {
|
|
377
|
-
constructor(pathName) {
|
|
378
|
-
super(pathName, {
|
|
379
|
-
quoted: /^'(\\'|[^'])*'/,
|
|
380
|
-
array_of: /^\[\*]/,
|
|
381
|
-
char: /^[[.\]]/,
|
|
382
|
-
number: /^\d+/,
|
|
383
|
-
word: /^\w+/,
|
|
384
|
-
});
|
|
385
|
-
}
|
|
386
|
-
getName() {
|
|
387
|
-
const nameStart = this.next();
|
|
388
|
-
if (nameStart.type === 'word') {
|
|
389
|
-
return nameStart.text;
|
|
390
|
-
}
|
|
391
|
-
if (nameStart.type === '[') {
|
|
392
|
-
const quotedName = this.next('quoted');
|
|
393
|
-
this.next(']');
|
|
394
|
-
return quotedName.text;
|
|
395
|
-
}
|
|
396
|
-
throw this.parseError('Expected column name');
|
|
397
|
-
}
|
|
398
|
-
pathChain() {
|
|
399
|
-
const chain = { name: this.getName() };
|
|
400
|
-
let node = chain;
|
|
401
|
-
for (;;) {
|
|
402
|
-
const sep = this.next();
|
|
403
|
-
if (sep.type === 'eof') {
|
|
404
|
-
return chain;
|
|
405
|
-
}
|
|
406
|
-
if (sep.type === '.') {
|
|
407
|
-
node.next = { name: this.next('word').text };
|
|
408
|
-
node = node.next;
|
|
409
|
-
}
|
|
410
|
-
else if (sep.type === 'array_of') {
|
|
411
|
-
node.next = { arrayRef: true };
|
|
412
|
-
node = node.next;
|
|
413
|
-
}
|
|
414
|
-
else if (sep.type === '[') {
|
|
415
|
-
// Actually a dot access through a quoted name
|
|
416
|
-
const quoted = this.next('quoted');
|
|
417
|
-
node.next = { name: quoted.text };
|
|
418
|
-
node = node.next;
|
|
419
|
-
this.next(']');
|
|
420
|
-
}
|
|
421
|
-
else {
|
|
422
|
-
throw this.parseError(`Unexpected ${sep.type}`);
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
exports.PathParser = PathParser;
|
|
428
346
|
//# sourceMappingURL=snowflake_connection.js.map
|