@kaelio/ktx 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/assets/python/{kaelio_ktx-0.8.0-py3-none-any.whl → kaelio_ktx-0.9.0-py3-none-any.whl} +0 -0
  2. package/assets/python/manifest.json +4 -4
  3. package/dist/.tsbuildinfo +1 -1
  4. package/dist/cli-runtime.js +50 -3
  5. package/dist/commands/setup-commands.js +1 -1
  6. package/dist/connection-recovery.d.ts +34 -0
  7. package/dist/connection-recovery.js +82 -0
  8. package/dist/connection.js +3 -1
  9. package/dist/context/ingest/adapters/historic-sql/bigquery-query-history-reader.js +71 -20
  10. package/dist/context/ingest/adapters/historic-sql/chunk-unified.js +2 -1
  11. package/dist/context/ingest/adapters/historic-sql/connection-dialect.d.ts +9 -0
  12. package/dist/context/ingest/adapters/historic-sql/connection-dialect.js +15 -4
  13. package/dist/context/ingest/adapters/historic-sql/pattern-inputs.js +8 -2
  14. package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.d.ts +29 -0
  15. package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.js +190 -0
  16. package/dist/context/ingest/adapters/historic-sql/scope-floor.d.ts +18 -0
  17. package/dist/context/ingest/adapters/historic-sql/scope-floor.js +229 -0
  18. package/dist/context/ingest/adapters/historic-sql/scope-membership.d.ts +8 -0
  19. package/dist/context/ingest/adapters/historic-sql/scope-membership.js +29 -0
  20. package/dist/context/ingest/adapters/historic-sql/snowflake-query-history-reader.js +68 -19
  21. package/dist/context/ingest/adapters/historic-sql/stage-unified.js +57 -50
  22. package/dist/context/ingest/adapters/historic-sql/types.d.ts +36 -3
  23. package/dist/context/ingest/adapters/historic-sql/types.js +14 -2
  24. package/dist/context/ingest/context-evidence/sqlite-context-evidence-store.d.ts +1 -1
  25. package/dist/context/ingest/isolated-diff/patch-integrator.js +75 -5
  26. package/dist/context/ingest/local-adapters.js +21 -4
  27. package/dist/context/ingest/local-bundle-runtime.js +3 -2
  28. package/dist/context/llm/codex-exec-events.d.ts +20 -0
  29. package/dist/context/llm/codex-exec-events.js +155 -0
  30. package/dist/context/llm/codex-isolation.d.ts +3 -0
  31. package/dist/context/llm/codex-isolation.js +5 -0
  32. package/dist/context/llm/codex-mcp-runtime-server.d.ts +24 -0
  33. package/dist/context/llm/codex-mcp-runtime-server.js +51 -0
  34. package/dist/context/llm/codex-models.d.ts +2 -0
  35. package/dist/context/llm/codex-models.js +17 -0
  36. package/dist/context/llm/codex-runtime-config.d.ts +16 -0
  37. package/dist/context/llm/codex-runtime-config.js +19 -0
  38. package/dist/context/llm/codex-runtime.d.ts +37 -0
  39. package/dist/context/llm/codex-runtime.js +304 -0
  40. package/dist/context/llm/codex-sdk-runner.d.ts +21 -0
  41. package/dist/context/llm/codex-sdk-runner.js +63 -0
  42. package/dist/context/llm/local-config.d.ts +2 -0
  43. package/dist/context/llm/local-config.js +12 -1
  44. package/dist/context/project/config.d.ts +2 -0
  45. package/dist/context/project/config.js +2 -2
  46. package/dist/context/sql-analysis/http-sql-analysis-port.js +32 -2
  47. package/dist/context/sql-analysis/ports.d.ts +12 -2
  48. package/dist/context/tools/context-candidate-mark.tool.d.ts +2 -2
  49. package/dist/context-build-view.js +4 -32
  50. package/dist/io/buffered-command-io.d.ts +11 -0
  51. package/dist/io/buffered-command-io.js +28 -0
  52. package/dist/llm/types.d.ts +1 -1
  53. package/dist/local-adapters.d.ts +10 -2
  54. package/dist/local-adapters.js +19 -3
  55. package/dist/next-steps.js +1 -2
  56. package/dist/progress-port-adapter.d.ts +6 -0
  57. package/dist/progress-port-adapter.js +18 -0
  58. package/dist/public-ingest.d.ts +20 -1
  59. package/dist/public-ingest.js +178 -27
  60. package/dist/scan.js +3 -1
  61. package/dist/setup-context.d.ts +2 -0
  62. package/dist/setup-context.js +133 -27
  63. package/dist/setup-databases.d.ts +17 -1
  64. package/dist/setup-databases.js +358 -249
  65. package/dist/setup-models.d.ts +10 -1
  66. package/dist/setup-models.js +90 -2
  67. package/dist/setup-ready-menu.d.ts +16 -2
  68. package/dist/setup-ready-menu.js +37 -5
  69. package/dist/setup-sources.js +108 -28
  70. package/dist/setup.js +22 -10
  71. package/dist/status-project.d.ts +11 -0
  72. package/dist/status-project.js +50 -1
  73. package/dist/telemetry/command-hook.d.ts +1 -0
  74. package/dist/telemetry/command-hook.js +3 -1
  75. package/dist/telemetry/events.d.ts +11 -6
  76. package/dist/telemetry/events.js +10 -2
  77. package/dist/telemetry/identity.d.ts +0 -1
  78. package/dist/telemetry/identity.js +6 -6
  79. package/dist/telemetry/index.d.ts +12 -0
  80. package/dist/telemetry/index.js +13 -2
  81. package/dist/telemetry/scrubber.d.ts +10 -0
  82. package/dist/telemetry/scrubber.js +20 -0
  83. package/package.json +5 -4
@@ -0,0 +1,229 @@
1
+ import { access, readdir, readFile } from 'node:fs/promises';
2
+ import { join, relative } from 'node:path';
3
+ import YAML from 'yaml';
4
+ import { getDriverRegistration } from '../../../connections/drivers.js';
5
+ import { parseDottedTableEntry } from '../../../scan/enabled-tables.js';
6
+ import { tableRefKey, tableRefSet } from '../../../scan/table-ref.js';
7
+ import { readLiveDatabaseTableFiles } from '../live-database/stage.js';
8
+ function isRecord(value) {
9
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
10
+ }
11
+ function stringArray(value) {
12
+ return Array.isArray(value)
13
+ ? value
14
+ .filter((item) => typeof item === 'string' && item.trim().length > 0)
15
+ .map((item) => item.trim())
16
+ : [];
17
+ }
18
+ function tableRefsFromValues(values) {
19
+ if (!Array.isArray(values))
20
+ return [];
21
+ return values.flatMap((value) => {
22
+ if (typeof value === 'string') {
23
+ const ref = parseDottedTableEntry(value);
24
+ return ref ? [ref] : [];
25
+ }
26
+ if (isRecord(value) && typeof value.name === 'string' && value.name.length > 0) {
27
+ return [
28
+ {
29
+ catalog: typeof value.catalog === 'string' ? value.catalog : null,
30
+ db: typeof value.db === 'string' ? value.db : null,
31
+ name: value.name,
32
+ },
33
+ ];
34
+ }
35
+ return [];
36
+ });
37
+ }
38
+ function declaredSchemas(driver, connection) {
39
+ const key = getDriverRegistration(driver)?.scopeConfigKey;
40
+ if (!key)
41
+ return [];
42
+ return [...new Set(stringArray(connection[key]))].sort();
43
+ }
44
+ function uniqueSortedTableRefs(refs) {
45
+ const byKey = new Map();
46
+ for (const ref of refs) {
47
+ byKey.set(tableRefKey(ref), ref);
48
+ }
49
+ return [...byKey.entries()]
50
+ .sort(([left], [right]) => left.localeCompare(right))
51
+ .map(([, ref]) => ref);
52
+ }
53
+ async function latestLiveDatabaseScanDir(projectDir, connectionId) {
54
+ const root = join(projectDir, 'raw-sources', connectionId, 'live-database');
55
+ let entries;
56
+ try {
57
+ entries = await readdir(root, { withFileTypes: true });
58
+ }
59
+ catch (error) {
60
+ if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT')
61
+ return null;
62
+ throw error;
63
+ }
64
+ const syncDirs = entries
65
+ .filter((entry) => entry.isDirectory())
66
+ .map((entry) => entry.name)
67
+ .sort()
68
+ .reverse();
69
+ for (const syncDir of syncDirs) {
70
+ const absolute = join(root, syncDir);
71
+ try {
72
+ await access(join(absolute, 'connection.json'));
73
+ return absolute;
74
+ }
75
+ catch {
76
+ continue;
77
+ }
78
+ }
79
+ return null;
80
+ }
81
+ async function scannedTableRefs(projectDir, connectionId) {
82
+ const scanDir = await latestLiveDatabaseScanDir(projectDir, connectionId);
83
+ if (!scanDir) {
84
+ return { refs: [], catalogAvailable: false, warnings: [] };
85
+ }
86
+ try {
87
+ const tableFiles = await readLiveDatabaseTableFiles(scanDir);
88
+ return {
89
+ refs: uniqueSortedTableRefs(tableFiles.map(({ table }) => ({ catalog: table.catalog, db: table.db, name: table.name }))),
90
+ catalogAvailable: true,
91
+ warnings: [],
92
+ };
93
+ }
94
+ catch (error) {
95
+ return {
96
+ refs: [],
97
+ catalogAvailable: false,
98
+ warnings: [
99
+ `query_history_scope_floor_catalog_read_failed:live_database_scan:${error instanceof Error ? error.message : String(error)}`,
100
+ ],
101
+ };
102
+ }
103
+ }
104
+ async function listYamlFiles(root) {
105
+ try {
106
+ const entries = await readdir(root, { withFileTypes: true, recursive: true });
107
+ return entries
108
+ .filter((entry) => entry.isFile() && /\.ya?ml$/i.test(entry.name))
109
+ .map((entry) => relative(root, join(entry.parentPath, entry.name)).replace(/\\/g, '/'))
110
+ .sort();
111
+ }
112
+ catch (error) {
113
+ if (error && typeof error === 'object' && 'code' in error && error.code === 'ENOENT')
114
+ return [];
115
+ throw error;
116
+ }
117
+ }
118
+ function refsFromManifest(content) {
119
+ const parsed = YAML.parse(content);
120
+ if (!isRecord(parsed) || !isRecord(parsed.tables))
121
+ return [];
122
+ return Object.values(parsed.tables).flatMap((entry) => {
123
+ if (!isRecord(entry) || typeof entry.table !== 'string')
124
+ return [];
125
+ const ref = parseDottedTableEntry(entry.table);
126
+ return ref ? [ref] : [];
127
+ });
128
+ }
129
+ function refsFromStandaloneSource(content) {
130
+ const parsed = YAML.parse(content);
131
+ if (!isRecord(parsed) || typeof parsed.table !== 'string')
132
+ return [];
133
+ const ref = parseDottedTableEntry(parsed.table);
134
+ return ref ? [ref] : [];
135
+ }
136
+ async function semanticTableRefs(projectDir, connectionId) {
137
+ const root = join(projectDir, 'semantic-layer', connectionId);
138
+ const files = await listYamlFiles(root);
139
+ const refs = [];
140
+ const warnings = [];
141
+ for (const file of files) {
142
+ try {
143
+ const content = await readFile(join(root, file), 'utf-8');
144
+ refs.push(...(file.startsWith('_schema/') ? refsFromManifest(content) : refsFromStandaloneSource(content)));
145
+ }
146
+ catch (error) {
147
+ warnings.push(`query_history_scope_floor_catalog_read_failed:${file}:${error instanceof Error ? error.message : String(error)}`);
148
+ }
149
+ }
150
+ return { refs: uniqueSortedTableRefs(refs), warnings };
151
+ }
152
+ export async function resolveQueryHistoryScopeFloor(input) {
153
+ const explicitEnabledTables = [
154
+ ...tableRefsFromValues(input.storedQueryHistory.enabledTables),
155
+ ...tableRefsFromValues(input.connection.enabled_tables),
156
+ ];
157
+ const semanticTables = await semanticTableRefs(input.projectDir, input.connectionId);
158
+ const scannedTables = await scannedTableRefs(input.projectDir, input.connectionId);
159
+ const modeledTables = uniqueSortedTableRefs([
160
+ ...semanticTables.refs,
161
+ ...scannedTables.refs,
162
+ ...explicitEnabledTables,
163
+ ]);
164
+ const warnings = [...semanticTables.warnings, ...scannedTables.warnings];
165
+ if (explicitEnabledTables.length > 0) {
166
+ return {
167
+ enabledTables: explicitEnabledTables,
168
+ enabledTableKeys: tableRefSet(explicitEnabledTables),
169
+ enabledSchemas: [],
170
+ modeledTableCatalog: modeledTables,
171
+ floorDisabled: false,
172
+ warnings,
173
+ };
174
+ }
175
+ const explicitSchemas = stringArray(input.storedQueryHistory.enabledSchemas);
176
+ if (explicitSchemas.includes('*')) {
177
+ return {
178
+ enabledTables: [],
179
+ enabledTableKeys: null,
180
+ enabledSchemas: ['*'],
181
+ modeledTableCatalog: modeledTables,
182
+ floorDisabled: true,
183
+ warnings,
184
+ };
185
+ }
186
+ if (explicitSchemas.length > 0) {
187
+ if (!scannedTables.catalogAvailable || modeledTables.length === 0) {
188
+ return {
189
+ enabledTables: [],
190
+ enabledTableKeys: null,
191
+ enabledSchemas: ['*'],
192
+ modeledTableCatalog: modeledTables,
193
+ floorDisabled: true,
194
+ warnings: [...warnings, 'query_history_scope_floor_disabled:catalog_unavailable'],
195
+ };
196
+ }
197
+ return {
198
+ enabledTables: [],
199
+ enabledTableKeys: null,
200
+ enabledSchemas: [...new Set(explicitSchemas)].sort(),
201
+ modeledTableCatalog: modeledTables,
202
+ floorDisabled: false,
203
+ warnings,
204
+ };
205
+ }
206
+ const schemas = new Set(declaredSchemas(input.driver, input.connection));
207
+ for (const ref of semanticTables.refs) {
208
+ if (ref.db)
209
+ schemas.add(ref.db);
210
+ }
211
+ if (schemas.size > 0 && (!scannedTables.catalogAvailable || modeledTables.length === 0)) {
212
+ return {
213
+ enabledTables: [],
214
+ enabledTableKeys: null,
215
+ enabledSchemas: ['*'],
216
+ modeledTableCatalog: modeledTables,
217
+ floorDisabled: true,
218
+ warnings: [...warnings, 'query_history_scope_floor_disabled:catalog_unavailable'],
219
+ };
220
+ }
221
+ return {
222
+ enabledTables: [],
223
+ enabledTableKeys: null,
224
+ enabledSchemas: [...schemas].sort(),
225
+ modeledTableCatalog: modeledTables,
226
+ floorDisabled: false,
227
+ warnings,
228
+ };
229
+ }
@@ -0,0 +1,8 @@
1
+ import type { KtxTableRef } from '../../../scan/types.js';
2
+ export interface QueryHistoryScopeMembershipConfig {
3
+ enabledTables: readonly KtxTableRef[];
4
+ enabledSchemas: readonly string[];
5
+ }
6
+ export declare function isQueryHistoryScopeFloorDisabled(config: QueryHistoryScopeMembershipConfig): boolean;
7
+ export declare function shouldFailOpenQueryHistoryScope(config: QueryHistoryScopeMembershipConfig): boolean;
8
+ export declare function includedQueryHistoryTableRefs(tablesTouched: readonly KtxTableRef[], config: QueryHistoryScopeMembershipConfig): KtxTableRef[];
@@ -0,0 +1,29 @@
1
+ import { tableRefKey, tableRefSet } from '../../../scan/table-ref.js';
2
+ function schemaNameForRef(ref) {
3
+ return ref.db && ref.db.length > 0 ? ref.db : null;
4
+ }
5
+ function schemaNamesFromConfig(enabledSchemas) {
6
+ return new Set(enabledSchemas.filter((schema) => schema !== '*'));
7
+ }
8
+ export function isQueryHistoryScopeFloorDisabled(config) {
9
+ return config.enabledSchemas.includes('*');
10
+ }
11
+ export function shouldFailOpenQueryHistoryScope(config) {
12
+ return (config.enabledTables.length === 0 &&
13
+ !isQueryHistoryScopeFloorDisabled(config) &&
14
+ config.enabledSchemas.length === 0);
15
+ }
16
+ export function includedQueryHistoryTableRefs(tablesTouched, config) {
17
+ if (config.enabledTables.length > 0) {
18
+ const enabled = tableRefSet(config.enabledTables);
19
+ return tablesTouched.filter((ref) => enabled.has(tableRefKey(ref)));
20
+ }
21
+ if (isQueryHistoryScopeFloorDisabled(config) || shouldFailOpenQueryHistoryScope(config)) {
22
+ return [...tablesTouched];
23
+ }
24
+ const schemas = schemaNamesFromConfig(config.enabledSchemas);
25
+ return tablesTouched.filter((ref) => {
26
+ const schema = schemaNameForRef(ref);
27
+ return schema !== null && schemas.has(schema);
28
+ });
29
+ }
@@ -148,26 +148,75 @@ export class SnowflakeHistoricSqlQueryHistoryReader {
148
148
  }
149
149
  async *fetchAggregated(client, window, config) {
150
150
  const sql = `
151
+ WITH filtered_queries AS (
152
+ SELECT
153
+ query_hash,
154
+ query_text,
155
+ user_name,
156
+ start_time,
157
+ total_elapsed_time,
158
+ execution_status,
159
+ rows_produced
160
+ FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
161
+ WHERE query_text IS NOT NULL
162
+ AND query_type IN ('SELECT', 'MERGE')
163
+ AND start_time >= ${timestampLiteral(window.start)}
164
+ AND start_time < ${timestampLiteral(window.end)}
165
+ ),
166
+ template_stats AS (
167
+ SELECT
168
+ query_hash AS template_id,
169
+ MIN(query_text) AS canonical_sql,
170
+ COUNT(*) AS executions,
171
+ COUNT(DISTINCT user_name) AS distinct_users,
172
+ MIN(start_time) AS first_seen,
173
+ MAX(start_time) AS last_seen,
174
+ APPROX_PERCENTILE(total_elapsed_time, 0.50) AS p50_ms,
175
+ APPROX_PERCENTILE(total_elapsed_time, 0.95) AS p95_ms,
176
+ DIV0(COUNT_IF(execution_status != 'SUCCESS'), COUNT(*)) AS error_rate,
177
+ SUM(rows_produced) AS rows_produced
178
+ FROM filtered_queries
179
+ GROUP BY query_hash
180
+ HAVING COUNT(*) >= ${config.minExecutions}
181
+ ),
182
+ template_users AS (
183
+ SELECT
184
+ query_hash AS template_id,
185
+ user_name AS user,
186
+ COUNT(*) AS executions,
187
+ MAX(start_time) AS last_seen
188
+ FROM filtered_queries
189
+ GROUP BY query_hash, user_name
190
+ )
151
191
  SELECT
152
- query_hash AS template_id,
153
- MIN(query_text) AS canonical_sql,
154
- COUNT(*) AS executions,
155
- COUNT(DISTINCT user_name) AS distinct_users,
156
- MIN(start_time) AS first_seen,
157
- MAX(start_time) AS last_seen,
158
- APPROX_PERCENTILE(total_elapsed_time, 0.50) AS p50_ms,
159
- APPROX_PERCENTILE(total_elapsed_time, 0.95) AS p95_ms,
160
- DIV0(COUNT_IF(execution_status != 'SUCCESS'), COUNT(*)) AS error_rate,
161
- SUM(rows_produced) AS rows_produced,
162
- ARRAY_AGG(OBJECT_CONSTRUCT('user', user_name, 'executions', 1)) WITHIN GROUP (ORDER BY start_time DESC)::string AS top_users
163
- FROM SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY
164
- WHERE query_text IS NOT NULL
165
- AND query_type IN ('SELECT', 'MERGE')
166
- AND start_time >= ${timestampLiteral(window.start)}
167
- AND start_time < ${timestampLiteral(window.end)}
168
- GROUP BY query_hash
169
- HAVING COUNT(*) >= ${config.minExecutions}
170
- ORDER BY executions DESC`.trim();
192
+ stats.template_id,
193
+ stats.canonical_sql,
194
+ stats.executions,
195
+ stats.distinct_users,
196
+ stats.first_seen,
197
+ stats.last_seen,
198
+ stats.p50_ms,
199
+ stats.p95_ms,
200
+ stats.error_rate,
201
+ stats.rows_produced,
202
+ ARRAY_AGG(
203
+ OBJECT_CONSTRUCT('user', users.user, 'executions', users.executions)
204
+ ) WITHIN GROUP (ORDER BY users.executions DESC, users.last_seen DESC)::string AS top_users
205
+ FROM template_stats AS stats
206
+ JOIN template_users AS users
207
+ ON users.template_id = stats.template_id
208
+ GROUP BY
209
+ stats.template_id,
210
+ stats.canonical_sql,
211
+ stats.executions,
212
+ stats.distinct_users,
213
+ stats.first_seen,
214
+ stats.last_seen,
215
+ stats.p50_ms,
216
+ stats.p95_ms,
217
+ stats.error_rate,
218
+ stats.rows_produced
219
+ ORDER BY stats.executions DESC`.trim();
171
220
  const result = await queryClient(client).executeQuery(sql);
172
221
  if (result.error) {
173
222
  throw grantsError(result.error);
@@ -1,8 +1,10 @@
1
1
  import { mkdir, writeFile } from 'node:fs/promises';
2
2
  import { dirname, join } from 'node:path';
3
+ import { tableRefKey } from '../../../scan/table-ref.js';
3
4
  import { bucketDistinctUsers, bucketErrorRate, bucketExecutions, bucketFrequency, bucketP95Runtime, bucketRecency, } from './buckets.js';
4
5
  import { splitHistoricSqlPatternInputs } from './pattern-inputs.js';
5
6
  import { compileHistoricSqlRedactionPatterns, redactHistoricSqlText, } from './redaction.js';
7
+ import { includedQueryHistoryTableRefs, isQueryHistoryScopeFloorDisabled, shouldFailOpenQueryHistoryScope, } from './scope-membership.js';
6
8
  import { HISTORIC_SQL_SOURCE_KEY, aggregatedTemplateSchema, historicSqlUnifiedPullConfigSchema, } from './types.js';
7
9
  const TRIVIAL_SQL_RE = /^\s*SELECT\s+(1|NOW\(\)|CURRENT_TIMESTAMP|VERSION\(\))\s*;?\s*$/i;
8
10
  const NOISE_PREFIX_RE = /^\s*(SHOW|DESCRIBE|DESC|EXPLAIN|USE|SET)\b/i;
@@ -17,9 +19,22 @@ function compilePatterns(patterns) {
17
19
  function matchesAny(value, patterns) {
18
20
  return !!value && patterns.some((pattern) => pattern.test(value));
19
21
  }
22
+ // ktx's own warehouse scan emits relationship- and column-profiling probes that land in
23
+ // pg_stat_statements (relationship-validation, relationship-composite-candidates, and each
24
+ // dialect's relationship value aggregation). They are ktx introspection, not genuine query
25
+ // usage, so they must not be mined back as query history. The markers are ktx-owned
26
+ // identifiers, stable across dialects.
27
+ function isKtxScanProbe(sql) {
28
+ if (/\brelationship_profile_values\b/i.test(sql)) {
29
+ return true;
30
+ }
31
+ return /\bchild_values\b/i.test(sql) && /\bparent_values\b/i.test(sql);
32
+ }
20
33
  function shouldDropBySql(sql, config) {
21
34
  if (NOISE_PREFIX_RE.test(sql) || SYSTEM_TABLE_RE.test(sql))
22
35
  return true;
36
+ if (isKtxScanProbe(sql))
37
+ return true;
23
38
  if (config.filters.dropTrivialProbes !== false && TRIVIAL_SQL_RE.test(sql))
24
39
  return true;
25
40
  return false;
@@ -32,8 +47,7 @@ function shouldDropByUsers(template, config) {
32
47
  const matchingExecutions = template.topUsers
33
48
  .filter((entry) => matchesAny(entry.user, patterns))
34
49
  .reduce((sum, entry) => sum + entry.executions, 0);
35
- const allExecutions = template.topUsers.reduce((sum, entry) => sum + entry.executions, 0);
36
- const serviceOnly = allExecutions > 0 && matchingExecutions >= allExecutions;
50
+ const serviceOnly = template.stats.executions > 0 && matchingExecutions >= template.stats.executions;
37
51
  return service.mode === 'exclude' ? serviceOnly : !serviceOnly;
38
52
  }
39
53
  function shouldDropByFailure(template, config) {
@@ -49,38 +63,8 @@ function shouldDropTemplate(template, config) {
49
63
  return true;
50
64
  return false;
51
65
  }
52
- function normalizeTableIdentifier(value) {
53
- return value.trim().toLowerCase();
54
- }
55
- function unqualifiedTableIdentifier(value) {
56
- const parts = normalizeTableIdentifier(value).split('.').filter(Boolean);
57
- return parts.at(-1) ?? '';
58
- }
59
- function buildEnabledTableFilter(enabledTables) {
60
- if (enabledTables.length === 0) {
61
- return null;
62
- }
63
- const exact = new Set(enabledTables.map(normalizeTableIdentifier).filter((value) => value.length > 0));
64
- const unqualifiedCounts = new Map();
65
- for (const table of exact) {
66
- const unqualified = unqualifiedTableIdentifier(table);
67
- if (unqualified.length > 0) {
68
- unqualifiedCounts.set(unqualified, (unqualifiedCounts.get(unqualified) ?? 0) + 1);
69
- }
70
- }
71
- return {
72
- exact,
73
- uniqueUnqualified: new Set([...unqualifiedCounts.entries()]
74
- .filter(([, count]) => count === 1)
75
- .map(([table]) => table)),
76
- };
77
- }
78
- function isEnabledTable(table, filter) {
79
- if (!filter) {
80
- return true;
81
- }
82
- const normalized = normalizeTableIdentifier(table);
83
- return filter.exact.has(normalized) || filter.uniqueUnqualified.has(unqualifiedTableIdentifier(normalized));
66
+ function displayTableRef(ref) {
67
+ return [ref.catalog, ref.db, ref.name].filter((part) => !!part && part.length > 0).join('.');
84
68
  }
85
69
  function historicSqlWindowDays(config) {
86
70
  return 'windowDays' in config ? config.windowDays : 90;
@@ -107,9 +91,10 @@ function recordJoin(acc, otherTable, columns, executions) {
107
91
  acc.observedJoins.set(otherTable, byColumns);
108
92
  }
109
93
  }
110
- function accumulatorFor(table) {
94
+ function accumulatorFor(tableRef) {
111
95
  return {
112
- table,
96
+ tableRef,
97
+ table: displayTableRef(tableRef),
113
98
  executions: 0,
114
99
  distinctUsers: 0,
115
100
  errorRateNumerator: 0,
@@ -138,8 +123,8 @@ function addTemplate(acc, parsed) {
138
123
  }
139
124
  }
140
125
  const joinColumns = parsed.columnsByClause.join ?? [];
141
- for (const otherTable of parsed.tablesTouched.filter((table) => table !== acc.table)) {
142
- recordJoin(acc, otherTable, joinColumns, executions);
126
+ for (const otherTable of parsed.tablesTouched.filter((table) => tableRefKey(table) !== tableRefKey(acc.tableRef))) {
127
+ recordJoin(acc, displayTableRef(otherTable), joinColumns, executions);
143
128
  }
144
129
  acc.topTemplates.push(parsed.template);
145
130
  }
@@ -170,6 +155,7 @@ function toStagedTable(acc, now) {
170
155
  }));
171
156
  return {
172
157
  table: acc.table,
158
+ tableRef: acc.tableRef,
173
159
  stats: {
174
160
  executionsBucket: bucketExecutions(acc.executions),
175
161
  distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
@@ -188,7 +174,7 @@ function toPatternsInput(parsedTemplates) {
188
174
  .map(({ template, tablesTouched }) => ({
189
175
  id: template.templateId,
190
176
  canonicalSql: template.canonicalSql,
191
- tablesTouched: [...tablesTouched].sort(),
177
+ tablesTouched: [...tablesTouched].sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right))),
192
178
  executionsBucket: bucketExecutions(template.stats.executions),
193
179
  distinctUsersBucket: bucketDistinctUsers(template.stats.distinctUsers),
194
180
  dialect: template.dialect,
@@ -198,7 +184,6 @@ function toPatternsInput(parsedTemplates) {
198
184
  }
199
185
  export async function stageHistoricSqlAggregatedSnapshot(input) {
200
186
  const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
201
- const enabledTableFilter = buildEnabledTableFilter(config.enabledTables);
202
187
  const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
203
188
  const now = input.now ?? new Date();
204
189
  const windowStart = new Date(now.getTime() - historicSqlWindowDays(config) * 24 * 60 * 60 * 1000);
@@ -212,8 +197,25 @@ export async function stageHistoricSqlAggregatedSnapshot(input) {
212
197
  snapshot.push(parsed);
213
198
  }
214
199
  }
215
- const analysis = await input.sqlAnalysis.analyzeBatch(snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql })), config.dialect);
216
- const warnings = [];
200
+ const analysisItems = snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql }));
201
+ const analysisOptions = config.modeledTableCatalog.length > 0 ? { catalog: { tables: config.modeledTableCatalog } } : undefined;
202
+ const warnings = [
203
+ ...config.scopeFloorWarnings,
204
+ ...(shouldFailOpenQueryHistoryScope(config) ? ['query_history_scope_floor_disabled:empty_modeled_scope'] : []),
205
+ ];
206
+ let scopeDisabledByQualificationFailure = false;
207
+ let analysis;
208
+ try {
209
+ analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, config.dialect, analysisOptions);
210
+ }
211
+ catch (error) {
212
+ if (!analysisOptions || config.enabledTables.length > 0 || isQueryHistoryScopeFloorDisabled(config)) {
213
+ throw error;
214
+ }
215
+ warnings.push('query_history_scope_floor_disabled:catalog_qualification_failed');
216
+ scopeDisabledByQualificationFailure = true;
217
+ analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, config.dialect, undefined);
218
+ }
217
219
  const parsedTemplates = [];
218
220
  for (const template of snapshot) {
219
221
  const parsed = analysis.get(template.templateId);
@@ -221,8 +223,12 @@ export async function stageHistoricSqlAggregatedSnapshot(input) {
221
223
  warnings.push(`parse_failed:${template.templateId}`);
222
224
  continue;
223
225
  }
224
- const tablesTouched = [...new Set(parsed.tablesTouched)].filter((table) => table.length > 0).sort();
225
- const includedTables = tablesTouched.filter((table) => isEnabledTable(table, enabledTableFilter));
226
+ const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()]
227
+ .filter((ref) => ref.name.length > 0)
228
+ .sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right)));
229
+ const includedTables = scopeDisabledByQualificationFailure
230
+ ? [...tablesTouched]
231
+ : includedQueryHistoryTableRefs(tablesTouched, config);
226
232
  if (includedTables.length === 0) {
227
233
  continue;
228
234
  }
@@ -235,19 +241,20 @@ export async function stageHistoricSqlAggregatedSnapshot(input) {
235
241
  }
236
242
  const byTable = new Map();
237
243
  for (const parsed of parsedTemplates) {
238
- for (const table of parsed.includedTables) {
239
- const acc = byTable.get(table) ?? accumulatorFor(table);
244
+ for (const tableRef of parsed.includedTables) {
245
+ const key = tableRefKey(tableRef);
246
+ const acc = byTable.get(key) ?? accumulatorFor(tableRef);
240
247
  addTemplate(acc, parsed);
241
- byTable.set(table, acc);
248
+ byTable.set(key, acc);
242
249
  }
243
250
  }
244
251
  await mkdir(input.stagedDir, { recursive: true });
245
- for (const [table, acc] of [...byTable.entries()].sort(([left], [right]) => left.localeCompare(right))) {
246
- await writeJson(input.stagedDir, `tables/${table}.json`, toStagedTable(acc, now));
252
+ for (const [, acc] of [...byTable.entries()].sort((left, right) => left[0].localeCompare(right[0]))) {
253
+ await writeJson(input.stagedDir, `tables/${acc.table}.json`, toStagedTable(acc, now));
247
254
  }
248
255
  const patternsInput = toPatternsInput(parsedTemplates);
249
256
  const patternInputSplit = splitHistoricSqlPatternInputs(patternsInput);
250
- const allWarnings = [...warnings, ...patternInputSplit.warnings];
257
+ const allWarnings = [...new Set([...warnings, ...patternInputSplit.warnings])];
251
258
  await writeJson(input.stagedDir, 'patterns-input.json', patternInputSplit.auditInput);
252
259
  for (const shard of patternInputSplit.shards) {
253
260
  await writeJson(input.stagedDir, shard.path, shard.input);
@@ -9,7 +9,19 @@ declare const historicSqlDialectSchema: z.ZodEnum<{
9
9
  export type HistoricSqlDialect = z.infer<typeof historicSqlDialectSchema>;
10
10
  export declare const historicSqlUnifiedPullConfigSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
11
11
  minExecutions: z.ZodDefault<z.ZodNumber>;
12
- enabledTables: z.ZodDefault<z.ZodArray<z.ZodString>>;
12
+ enabledTables: z.ZodDefault<z.ZodArray<z.ZodObject<{
13
+ catalog: z.ZodNullable<z.ZodString>;
14
+ db: z.ZodNullable<z.ZodString>;
15
+ name: z.ZodString;
16
+ }, z.core.$strict>>>;
17
+ enabledSchemas: z.ZodDefault<z.ZodArray<z.ZodString>>;
18
+ modeledTableCatalog: z.ZodDefault<z.ZodArray<z.ZodObject<{
19
+ catalog: z.ZodNullable<z.ZodString>;
20
+ db: z.ZodNullable<z.ZodString>;
21
+ name: z.ZodString;
22
+ columns: z.ZodOptional<z.ZodArray<z.ZodString>>;
23
+ }, z.core.$strict>>>;
24
+ scopeFloorWarnings: z.ZodDefault<z.ZodArray<z.ZodString>>;
13
25
  filters: z.ZodDefault<z.ZodObject<{
14
26
  serviceAccounts: z.ZodOptional<z.ZodObject<{
15
27
  patterns: z.ZodDefault<z.ZodArray<z.ZodString>>;
@@ -41,7 +53,19 @@ export declare const historicSqlUnifiedPullConfigSchema: z.ZodDiscriminatedUnion
41
53
  windowDays: z.ZodDefault<z.ZodNumber>;
42
54
  }, z.core.$strip>, z.ZodObject<{
43
55
  minExecutions: z.ZodDefault<z.ZodNumber>;
44
- enabledTables: z.ZodDefault<z.ZodArray<z.ZodString>>;
56
+ enabledTables: z.ZodDefault<z.ZodArray<z.ZodObject<{
57
+ catalog: z.ZodNullable<z.ZodString>;
58
+ db: z.ZodNullable<z.ZodString>;
59
+ name: z.ZodString;
60
+ }, z.core.$strict>>>;
61
+ enabledSchemas: z.ZodDefault<z.ZodArray<z.ZodString>>;
62
+ modeledTableCatalog: z.ZodDefault<z.ZodArray<z.ZodObject<{
63
+ catalog: z.ZodNullable<z.ZodString>;
64
+ db: z.ZodNullable<z.ZodString>;
65
+ name: z.ZodString;
66
+ columns: z.ZodOptional<z.ZodArray<z.ZodString>>;
67
+ }, z.core.$strict>>>;
68
+ scopeFloorWarnings: z.ZodDefault<z.ZodArray<z.ZodString>>;
45
69
  filters: z.ZodDefault<z.ZodObject<{
46
70
  serviceAccounts: z.ZodOptional<z.ZodObject<{
47
71
  patterns: z.ZodDefault<z.ZodArray<z.ZodString>>;
@@ -95,6 +119,11 @@ export declare const aggregatedTemplateSchema: z.ZodObject<{
95
119
  export type AggregatedTemplate = z.infer<typeof aggregatedTemplateSchema>;
96
120
  export declare const stagedTableInputSchema: z.ZodObject<{
97
121
  table: z.ZodString;
122
+ tableRef: z.ZodObject<{
123
+ catalog: z.ZodNullable<z.ZodString>;
124
+ db: z.ZodNullable<z.ZodString>;
125
+ name: z.ZodString;
126
+ }, z.core.$strict>;
98
127
  stats: z.ZodObject<{
99
128
  executionsBucket: z.ZodString;
100
129
  distinctUsersBucket: z.ZodString;
@@ -121,7 +150,11 @@ export declare const stagedPatternsInputSchema: z.ZodObject<{
121
150
  templates: z.ZodArray<z.ZodObject<{
122
151
  id: z.ZodString;
123
152
  canonicalSql: z.ZodString;
124
- tablesTouched: z.ZodArray<z.ZodString>;
153
+ tablesTouched: z.ZodArray<z.ZodObject<{
154
+ catalog: z.ZodNullable<z.ZodString>;
155
+ db: z.ZodNullable<z.ZodString>;
156
+ name: z.ZodString;
157
+ }, z.core.$strict>>;
125
158
  executionsBucket: z.ZodString;
126
159
  distinctUsersBucket: z.ZodString;
127
160
  dialect: z.ZodEnum<{
@@ -2,9 +2,20 @@ import { z } from 'zod';
2
2
  export const HISTORIC_SQL_SOURCE_KEY = 'historic-sql';
3
3
  const historicSqlDialectSchema = z.enum(['snowflake', 'bigquery', 'postgres']);
4
4
  const filterModeSchema = z.enum(['exclude', 'include', 'mark-only']);
5
+ const ktxTableRefSchema = z.object({
6
+ catalog: z.string().nullable(),
7
+ db: z.string().nullable(),
8
+ name: z.string().min(1),
9
+ }).strict();
10
+ const ktxTableRefWithColumnsSchema = ktxTableRefSchema.extend({
11
+ columns: z.array(z.string().min(1)).optional(),
12
+ }).strict();
5
13
  const historicSqlCommonPullConfigSchema = z.object({
6
14
  minExecutions: z.number().int().nonnegative().default(5),
7
- enabledTables: z.array(z.string().min(1)).default([]),
15
+ enabledTables: z.array(ktxTableRefSchema).default([]),
16
+ enabledSchemas: z.array(z.string().min(1)).default([]),
17
+ modeledTableCatalog: z.array(ktxTableRefWithColumnsSchema).default([]),
18
+ scopeFloorWarnings: z.array(z.string()).default([]),
8
19
  filters: z.object({
9
20
  serviceAccounts: z.object({
10
21
  patterns: z.array(z.string()).default([]),
@@ -54,6 +65,7 @@ export const aggregatedTemplateSchema = z.object({
54
65
  });
55
66
  export const stagedTableInputSchema = z.object({
56
67
  table: z.string().min(1),
68
+ tableRef: ktxTableRefSchema,
57
69
  stats: z.object({
58
70
  executionsBucket: z.string(),
59
71
  distinctUsersBucket: z.string(),
@@ -77,7 +89,7 @@ export const stagedPatternsInputSchema = z.object({
77
89
  templates: z.array(z.object({
78
90
  id: z.string(),
79
91
  canonicalSql: z.string(),
80
- tablesTouched: z.array(z.string()),
92
+ tablesTouched: z.array(ktxTableRefSchema),
81
93
  executionsBucket: z.string(),
82
94
  distinctUsersBucket: z.string(),
83
95
  dialect: historicSqlDialectSchema,