@kaelio/ktx 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/assets/python/{kaelio_ktx-0.8.0-py3-none-any.whl → kaelio_ktx-0.9.0-py3-none-any.whl} +0 -0
  2. package/assets/python/manifest.json +4 -4
  3. package/dist/.tsbuildinfo +1 -1
  4. package/dist/cli-runtime.js +50 -3
  5. package/dist/commands/setup-commands.js +1 -1
  6. package/dist/connection-recovery.d.ts +34 -0
  7. package/dist/connection-recovery.js +82 -0
  8. package/dist/connection.js +3 -1
  9. package/dist/context/ingest/adapters/historic-sql/bigquery-query-history-reader.js +71 -20
  10. package/dist/context/ingest/adapters/historic-sql/chunk-unified.js +2 -1
  11. package/dist/context/ingest/adapters/historic-sql/connection-dialect.d.ts +9 -0
  12. package/dist/context/ingest/adapters/historic-sql/connection-dialect.js +15 -4
  13. package/dist/context/ingest/adapters/historic-sql/pattern-inputs.js +8 -2
  14. package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.d.ts +29 -0
  15. package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.js +190 -0
  16. package/dist/context/ingest/adapters/historic-sql/scope-floor.d.ts +18 -0
  17. package/dist/context/ingest/adapters/historic-sql/scope-floor.js +229 -0
  18. package/dist/context/ingest/adapters/historic-sql/scope-membership.d.ts +8 -0
  19. package/dist/context/ingest/adapters/historic-sql/scope-membership.js +29 -0
  20. package/dist/context/ingest/adapters/historic-sql/snowflake-query-history-reader.js +68 -19
  21. package/dist/context/ingest/adapters/historic-sql/stage-unified.js +57 -50
  22. package/dist/context/ingest/adapters/historic-sql/types.d.ts +36 -3
  23. package/dist/context/ingest/adapters/historic-sql/types.js +14 -2
  24. package/dist/context/ingest/context-evidence/sqlite-context-evidence-store.d.ts +1 -1
  25. package/dist/context/ingest/isolated-diff/patch-integrator.js +75 -5
  26. package/dist/context/ingest/local-adapters.js +21 -4
  27. package/dist/context/ingest/local-bundle-runtime.js +3 -2
  28. package/dist/context/llm/codex-exec-events.d.ts +20 -0
  29. package/dist/context/llm/codex-exec-events.js +155 -0
  30. package/dist/context/llm/codex-isolation.d.ts +3 -0
  31. package/dist/context/llm/codex-isolation.js +5 -0
  32. package/dist/context/llm/codex-mcp-runtime-server.d.ts +24 -0
  33. package/dist/context/llm/codex-mcp-runtime-server.js +51 -0
  34. package/dist/context/llm/codex-models.d.ts +2 -0
  35. package/dist/context/llm/codex-models.js +17 -0
  36. package/dist/context/llm/codex-runtime-config.d.ts +16 -0
  37. package/dist/context/llm/codex-runtime-config.js +19 -0
  38. package/dist/context/llm/codex-runtime.d.ts +37 -0
  39. package/dist/context/llm/codex-runtime.js +304 -0
  40. package/dist/context/llm/codex-sdk-runner.d.ts +21 -0
  41. package/dist/context/llm/codex-sdk-runner.js +63 -0
  42. package/dist/context/llm/local-config.d.ts +2 -0
  43. package/dist/context/llm/local-config.js +12 -1
  44. package/dist/context/project/config.d.ts +2 -0
  45. package/dist/context/project/config.js +2 -2
  46. package/dist/context/sql-analysis/http-sql-analysis-port.js +32 -2
  47. package/dist/context/sql-analysis/ports.d.ts +12 -2
  48. package/dist/context/tools/context-candidate-mark.tool.d.ts +2 -2
  49. package/dist/context-build-view.js +4 -32
  50. package/dist/io/buffered-command-io.d.ts +11 -0
  51. package/dist/io/buffered-command-io.js +28 -0
  52. package/dist/llm/types.d.ts +1 -1
  53. package/dist/local-adapters.d.ts +10 -2
  54. package/dist/local-adapters.js +19 -3
  55. package/dist/next-steps.js +1 -2
  56. package/dist/progress-port-adapter.d.ts +6 -0
  57. package/dist/progress-port-adapter.js +18 -0
  58. package/dist/public-ingest.d.ts +20 -1
  59. package/dist/public-ingest.js +178 -27
  60. package/dist/scan.js +3 -1
  61. package/dist/setup-context.d.ts +2 -0
  62. package/dist/setup-context.js +133 -27
  63. package/dist/setup-databases.d.ts +17 -1
  64. package/dist/setup-databases.js +358 -249
  65. package/dist/setup-models.d.ts +10 -1
  66. package/dist/setup-models.js +90 -2
  67. package/dist/setup-ready-menu.d.ts +16 -2
  68. package/dist/setup-ready-menu.js +37 -5
  69. package/dist/setup-sources.js +108 -28
  70. package/dist/setup.js +22 -10
  71. package/dist/status-project.d.ts +11 -0
  72. package/dist/status-project.js +50 -1
  73. package/dist/telemetry/command-hook.d.ts +1 -0
  74. package/dist/telemetry/command-hook.js +3 -1
  75. package/dist/telemetry/events.d.ts +11 -6
  76. package/dist/telemetry/events.js +10 -2
  77. package/dist/telemetry/identity.d.ts +0 -1
  78. package/dist/telemetry/identity.js +6 -6
  79. package/dist/telemetry/index.d.ts +12 -0
  80. package/dist/telemetry/index.js +13 -2
  81. package/dist/telemetry/scrubber.d.ts +10 -0
  82. package/dist/telemetry/scrubber.js +20 -0
  83. package/package.json +5 -4
@@ -35,11 +35,58 @@ async function runInit(args, io) {
35
35
  export async function runInitForCommander(args, io) {
36
36
  return await runInit(args, io);
37
37
  }
38
+ function signalExitCode(signal) {
39
+ // 128 + signal number: SIGINT (2) -> 130, SIGTERM (15) -> 143.
40
+ return signal === 'SIGTERM' ? 143 : 130;
41
+ }
42
+ /**
43
+ * Flush telemetry on interrupt for the real CLI process. `capture()` is
44
+ * fire-and-forget and the only flush guarantee lives in a `finally` a signal
45
+ * skips, so Ctrl-C / `kill` of a long-running command (ingest, `mcp stdio`)
46
+ * would otherwise drop its `command` event and queued events. Installed only
47
+ * when driving the actual process; programmatic/test callers pass their own
48
+ * `io` and never reach here. Returns a disposer that removes the listeners.
49
+ */
50
+ function installTelemetrySignalFlush(io, info) {
51
+ let handling = false;
52
+ const handle = (signal) => {
53
+ if (handling) {
54
+ process.exit(signalExitCode(signal));
55
+ }
56
+ handling = true;
57
+ void (async () => {
58
+ try {
59
+ const { emitAbortedCommandAndShutdown } = await import('./telemetry/index.js');
60
+ await emitAbortedCommandAndShutdown({ packageInfo: info, io });
61
+ }
62
+ catch {
63
+ // Best-effort: never let a telemetry hiccup block the interrupt exit.
64
+ }
65
+ process.exit(signalExitCode(signal));
66
+ })();
67
+ };
68
+ const onSigint = () => handle('SIGINT');
69
+ const onSigterm = () => handle('SIGTERM');
70
+ process.on('SIGINT', onSigint);
71
+ process.on('SIGTERM', onSigterm);
72
+ return () => {
73
+ process.off('SIGINT', onSigint);
74
+ process.off('SIGTERM', onSigterm);
75
+ };
76
+ }
38
77
  export async function runKtxCli(argv = process.argv.slice(2), io = process, deps = {}) {
39
78
  const info = getKtxCliPackageInfo();
40
79
  profileMark('runtime:runKtxCli');
41
80
  const { runCommanderKtxCli } = await profileSpan('import ./cli-program.js', () => import('./cli-program.js'));
42
- return await runCommanderKtxCli(argv, io, deps, info, {
43
- runInit: runInitForCommander,
44
- });
81
+ // Real-process entry only: flush telemetry if interrupted. Test/programmatic
82
+ // callers pass their own `io`, so they never install process-level handlers.
83
+ const removeSignalFlush = io === process ? installTelemetrySignalFlush(io, info) : undefined;
84
+ try {
85
+ return await runCommanderKtxCli(argv, io, deps, info, {
86
+ runInit: runInitForCommander,
87
+ });
88
+ }
89
+ finally {
90
+ removeSignalFlush?.();
91
+ }
45
92
  }
@@ -18,7 +18,7 @@ function embeddingBackend(value) {
18
18
  throw new InvalidArgumentError(`invalid choice '${value}'`);
19
19
  }
20
20
  function llmBackend(value) {
21
- if (value === 'anthropic' || value === 'vertex' || value === 'claude-code') {
21
+ if (value === 'anthropic' || value === 'vertex' || value === 'claude-code' || value === 'codex') {
22
22
  return value;
23
23
  }
24
24
  throw new InvalidArgumentError(`invalid choice '${value}'`);
@@ -0,0 +1,34 @@
1
+ import type { KtxCliIo } from './cli-runtime.js';
2
+ import type { KtxSetupPromptOption } from './setup-prompts.js';
3
+ export type RecoveryOutcome = 'ready' | 'skip' | 'back' | 'failed';
4
+ /** @internal */
5
+ export interface RecoveryAction {
6
+ value: string;
7
+ label: string;
8
+ run: () => Promise<void>;
9
+ }
10
+ export type ConfigureResult = 'configured' | 'back' | 'cancelled';
11
+ export type ValidateResult = {
12
+ status: 'ok';
13
+ } | {
14
+ status: 'back';
15
+ } | {
16
+ status: 'failed';
17
+ extraActions?: RecoveryAction[];
18
+ };
19
+ export interface ConnectionRecoveryInput {
20
+ label: string;
21
+ interactive: boolean;
22
+ allowSkip: boolean;
23
+ io: KtxCliIo;
24
+ prompts: {
25
+ select(options: {
26
+ message: string;
27
+ options: KtxSetupPromptOption[];
28
+ }): Promise<string>;
29
+ };
30
+ snapshot: () => Promise<() => Promise<void>>;
31
+ configure: () => Promise<ConfigureResult>;
32
+ validate: () => Promise<ValidateResult>;
33
+ }
34
+ export declare function runConnectionSetupWithRecovery(input: ConnectionRecoveryInput): Promise<RecoveryOutcome>;
@@ -0,0 +1,82 @@
1
+ async function runRollbackOnce(input) {
2
+ if (input.state.rolledBack) {
3
+ return;
4
+ }
5
+ input.state.rolledBack = true;
6
+ await input.rollback();
7
+ }
8
+ function recoveryOptions(input) {
9
+ return [
10
+ { value: 'retry', label: 'Retry connection test' },
11
+ { value: 're-enter', label: 'Re-enter connection details' },
12
+ ...(input.extraActions ?? []).map((action) => ({
13
+ value: action.value,
14
+ label: action.label,
15
+ })),
16
+ ...(input.allowSkip ? [{ value: 'skip', label: 'Skip this connection' }] : []),
17
+ { value: 'back', label: 'Back' },
18
+ ];
19
+ }
20
+ export async function runConnectionSetupWithRecovery(input) {
21
+ const rollback = await input.snapshot();
22
+ const rollbackState = { rolledBack: false };
23
+ const firstConfig = await input.configure();
24
+ if (firstConfig === 'back') {
25
+ await runRollbackOnce({ rollback, state: rollbackState });
26
+ return 'back';
27
+ }
28
+ if (firstConfig === 'cancelled') {
29
+ await runRollbackOnce({ rollback, state: rollbackState });
30
+ return 'failed';
31
+ }
32
+ let validation = await input.validate();
33
+ while (validation.status !== 'ok') {
34
+ if (validation.status === 'back') {
35
+ await runRollbackOnce({ rollback, state: rollbackState });
36
+ return 'back';
37
+ }
38
+ if (!input.interactive) {
39
+ return 'failed';
40
+ }
41
+ const action = await input.prompts.select({
42
+ message: `Connection setup failed for ${input.label}`,
43
+ options: recoveryOptions({
44
+ allowSkip: input.allowSkip,
45
+ extraActions: validation.extraActions,
46
+ }),
47
+ });
48
+ if (action === 'back') {
49
+ await runRollbackOnce({ rollback, state: rollbackState });
50
+ return 'back';
51
+ }
52
+ if (action === 'skip' && input.allowSkip) {
53
+ await runRollbackOnce({ rollback, state: rollbackState });
54
+ return 'skip';
55
+ }
56
+ if (action === 're-enter') {
57
+ const nextConfig = await input.configure();
58
+ if (nextConfig === 'back') {
59
+ await runRollbackOnce({ rollback, state: rollbackState });
60
+ return 'back';
61
+ }
62
+ if (nextConfig === 'cancelled') {
63
+ await runRollbackOnce({ rollback, state: rollbackState });
64
+ return 'failed';
65
+ }
66
+ validation = await input.validate();
67
+ continue;
68
+ }
69
+ if (action === 'retry') {
70
+ validation = await input.validate();
71
+ continue;
72
+ }
73
+ const extraAction = validation.extraActions?.find((candidate) => candidate.value === action);
74
+ if (extraAction) {
75
+ await extraAction.run();
76
+ validation = await input.validate();
77
+ continue;
78
+ }
79
+ validation = await input.validate();
80
+ }
81
+ return 'ready';
82
+ }
@@ -13,7 +13,7 @@ import { createKtxCliScanConnector } from './local-scan-connectors.js';
13
13
  import { profileMark } from './startup-profile.js';
14
14
  import { isDemoConnection } from './telemetry/demo-detect.js';
15
15
  import { emitTelemetryEvent } from './telemetry/index.js';
16
- import { scrubErrorClass } from './telemetry/scrubber.js';
16
+ import { formatErrorDetail, scrubErrorClass } from './telemetry/scrubber.js';
17
17
  profileMark('module:connection');
18
18
  const SUPPORTED_TEST_DRIVERS = [
19
19
  'sqlite',
@@ -168,6 +168,7 @@ async function testConnectionByDriver(project, connectionId, deps) {
168
168
  }
169
169
  async function emitConnectionTest(input) {
170
170
  const errorClass = input.error ? scrubErrorClass(input.error) : undefined;
171
+ const errorDetail = input.error ? formatErrorDetail(input.error) : undefined;
171
172
  await emitTelemetryEvent({
172
173
  name: 'connection_test',
173
174
  projectDir: input.project.projectDir,
@@ -178,6 +179,7 @@ async function emitConnectionTest(input) {
178
179
  outcome: input.outcome,
179
180
  durationMs: input.durationMs,
180
181
  ...(errorClass ? { errorClass } : {}),
182
+ ...(errorDetail ? { errorDetail } : {}),
181
183
  },
182
184
  });
183
185
  }
@@ -154,27 +154,78 @@ export class BigQueryHistoricSqlQueryHistoryReader {
154
154
  }
155
155
  async *fetchAggregated(client, window, config) {
156
156
  const sql = `
157
+ WITH filtered_jobs AS (
158
+ SELECT
159
+ COALESCE(query_info.query_hashes.normalized_literals, TO_HEX(SHA256(query))) AS template_id,
160
+ query,
161
+ user_email,
162
+ creation_time,
163
+ end_time,
164
+ error_result
165
+ FROM ${this.viewPath}
166
+ WHERE job_type = 'QUERY'
167
+ AND statement_type IN ('SELECT', 'MERGE')
168
+ AND creation_time >= ${timestampExpression(window.start)}
169
+ AND creation_time < ${timestampExpression(window.end)}
170
+ AND query IS NOT NULL
171
+ ),
172
+ template_stats AS (
173
+ SELECT
174
+ template_id,
175
+ MIN(query) AS canonical_sql,
176
+ COUNT(*) AS executions,
177
+ COUNT(DISTINCT user_email) AS distinct_users,
178
+ MIN(creation_time) AS first_seen,
179
+ MAX(creation_time) AS last_seen,
180
+ APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(50)] AS p50_ms,
181
+ APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(95)] AS p95_ms,
182
+ SAFE_DIVIDE(COUNTIF(error_result IS NOT NULL), COUNT(*)) AS error_rate,
183
+ CAST(NULL AS INT64) AS rows_produced
184
+ FROM filtered_jobs
185
+ GROUP BY template_id
186
+ HAVING COUNT(*) >= ${config.minExecutions}
187
+ ),
188
+ template_users AS (
189
+ SELECT
190
+ template_id,
191
+ user_email AS user,
192
+ COUNT(*) AS executions,
193
+ MAX(creation_time) AS last_seen
194
+ FROM filtered_jobs
195
+ GROUP BY template_id, user_email
196
+ )
157
197
  SELECT
158
- query_hash AS template_id,
159
- MIN(query) AS canonical_sql,
160
- COUNT(*) AS executions,
161
- COUNT(DISTINCT user_email) AS distinct_users,
162
- MIN(creation_time) AS first_seen,
163
- MAX(creation_time) AS last_seen,
164
- APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(50)] AS p50_ms,
165
- APPROX_QUANTILES(TIMESTAMP_DIFF(end_time, creation_time, MILLISECOND), 100)[OFFSET(95)] AS p95_ms,
166
- SAFE_DIVIDE(COUNTIF(error_result IS NOT NULL), COUNT(*)) AS error_rate,
167
- CAST(NULL AS INT64) AS rows_produced,
168
- TO_JSON_STRING(ARRAY_AGG(STRUCT(user_email AS user, 1 AS executions) ORDER BY creation_time DESC LIMIT 5)) AS top_users
169
- FROM ${this.viewPath}
170
- WHERE job_type = 'QUERY'
171
- AND statement_type IN ('SELECT', 'MERGE')
172
- AND creation_time >= ${timestampExpression(window.start)}
173
- AND creation_time < ${timestampExpression(window.end)}
174
- AND query IS NOT NULL
175
- GROUP BY query_hash
176
- HAVING COUNT(*) >= ${config.minExecutions}
177
- ORDER BY executions DESC`.trim();
198
+ stats.template_id,
199
+ stats.canonical_sql,
200
+ stats.executions,
201
+ stats.distinct_users,
202
+ stats.first_seen,
203
+ stats.last_seen,
204
+ stats.p50_ms,
205
+ stats.p95_ms,
206
+ stats.error_rate,
207
+ stats.rows_produced,
208
+ TO_JSON_STRING(
209
+ ARRAY_AGG(
210
+ STRUCT(users.user AS user, users.executions AS executions)
211
+ ORDER BY users.executions DESC, users.last_seen DESC
212
+ )
213
+ ) AS top_users
214
+ FROM template_stats AS stats
215
+ JOIN template_users AS users
216
+ ON users.template_id = stats.template_id
217
+ GROUP BY
218
+ stats.template_id,
219
+ stats.canonical_sql,
220
+ stats.executions,
221
+ stats.distinct_users,
222
+ stats.first_seen,
223
+ stats.last_seen,
224
+ stats.p50_ms,
225
+ stats.p95_ms,
226
+ stats.error_rate,
227
+ stats.rows_produced
228
+ ORDER BY stats.executions DESC`.trim();
178
229
  const result = await queryClient(client).executeQuery(sql);
179
230
  if (result.error) {
180
231
  throw grantsError(result.error);
@@ -1,6 +1,7 @@
1
1
  import { createHash } from 'node:crypto';
2
2
  import { readFile, readdir } from 'node:fs/promises';
3
3
  import { join, relative } from 'node:path';
4
+ import { tableRefKey } from '../../../scan/table-ref.js';
4
5
  import { isHistoricSqlPatternInputShardPath } from './pattern-inputs.js';
5
6
  import { stagedManifestSchema, stagedPatternsInputSchema, stagedTableInputSchema } from './types.js';
6
7
  async function walk(root) {
@@ -30,7 +31,7 @@ export async function chunkHistoricSqlUnifiedStagedDir(stagedDir, diffSet) {
30
31
  }
31
32
  const table = stagedTableInputSchema.parse(await readJson(stagedDir, path));
32
33
  workUnits.push({
33
- unitKey: `historic-sql-table-${safeUnitKey(table.table)}`,
34
+ unitKey: `historic-sql-table-${safeUnitKey(tableRefKey(table.tableRef))}`,
34
35
  displayLabel: `Historic SQL usage: ${table.table}`,
35
36
  rawFiles: [path],
36
37
  dependencyPaths: ['manifest.json'],
@@ -1,5 +1,14 @@
1
1
  import type { HistoricSqlDialect } from './types.js';
2
2
  export declare function isQueryHistoryEnabled(connection: unknown): boolean;
3
+ /**
4
+ * Resolves the query-history dialect from the connection's driver capability
5
+ * alone, ignoring whether query history is enabled in ktx.yaml. Use this on the
6
+ * adapter-registration path when query history has been explicitly requested
7
+ * for the run (e.g. via `--query-history`, which is itself the opt-in): the
8
+ * persisted `context.queryHistory.enabled` flag must not gate registration.
9
+ * Returns null when the connection's driver has no query-history reader.
10
+ */
11
+ export declare function historicSqlDialectForConnectionDriver(connection: unknown): HistoricSqlDialect | null;
3
12
  /**
4
13
  * Resolves the query-history dialect for a connection. Returns null when
5
14
  * query history is disabled, or when the connection's driver has no
@@ -18,6 +18,20 @@ function historicSqlDialectForDriver(driver) {
18
18
  export function isQueryHistoryEnabled(connection) {
19
19
  return queryHistoryRecord(connection)?.enabled === true;
20
20
  }
21
+ /**
22
+ * Resolves the query-history dialect from the connection's driver capability
23
+ * alone, ignoring whether query history is enabled in ktx.yaml. Use this on the
24
+ * adapter-registration path when query history has been explicitly requested
25
+ * for the run (e.g. via `--query-history`, which is itself the opt-in): the
26
+ * persisted `context.queryHistory.enabled` flag must not gate registration.
27
+ * Returns null when the connection's driver has no query-history reader.
28
+ */
29
+ export function historicSqlDialectForConnectionDriver(connection) {
30
+ const conn = recordOrNull(connection);
31
+ const driver = String(conn?.driver ?? '').toLowerCase();
32
+ const registration = getDriverRegistration(driver);
33
+ return registration?.hasHistoricSqlReader ? historicSqlDialectForDriver(registration.driver) : null;
34
+ }
21
35
  /**
22
36
  * Resolves the query-history dialect for a connection. Returns null when
23
37
  * query history is disabled, or when the connection's driver has no
@@ -27,8 +41,5 @@ export function queryHistoryDialectForConnection(connection) {
27
41
  if (!isQueryHistoryEnabled(connection)) {
28
42
  return null;
29
43
  }
30
- const conn = recordOrNull(connection);
31
- const driver = String(conn?.driver ?? '').toLowerCase();
32
- const registration = getDriverRegistration(driver);
33
- return registration?.hasHistoricSqlReader ? historicSqlDialectForDriver(registration.driver) : null;
44
+ return historicSqlDialectForConnectionDriver(connection);
34
45
  }
@@ -1,4 +1,5 @@
1
1
  import { Buffer } from 'node:buffer';
2
+ import { tableRefKey } from '../../../scan/table-ref.js';
2
3
  const HISTORIC_SQL_PATTERN_WORKUNIT_DIR = 'patterns-input';
3
4
  /** @internal */
4
5
  export const HISTORIC_SQL_PATTERN_WORKUNIT_MAX_BYTES = 110_000;
@@ -19,12 +20,17 @@ function sortedAuditTemplates(templates) {
19
20
  function sortedPatternCandidates(templates) {
20
21
  return [...templates]
21
22
  .filter((template) => template.tablesTouched.length >= 2)
22
- .map((template) => ({ ...template, tablesTouched: [...template.tablesTouched].sort() }))
23
+ .map((template) => ({
24
+ ...template,
25
+ tablesTouched: [...template.tablesTouched].sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right))),
26
+ }))
23
27
  .sort((left, right) => {
24
28
  const cardinality = right.tablesTouched.length - left.tablesTouched.length;
25
29
  if (cardinality !== 0)
26
30
  return cardinality;
27
- const tableSignature = left.tablesTouched.join('\0').localeCompare(right.tablesTouched.join('\0'));
31
+ const leftSignature = left.tablesTouched.map(tableRefKey).join('\0');
32
+ const rightSignature = right.tablesTouched.map(tableRefKey).join('\0');
33
+ const tableSignature = leftSignature.localeCompare(rightSignature);
28
34
  if (tableSignature !== 0)
29
35
  return tableSignature;
30
36
  return left.id.localeCompare(right.id);
@@ -0,0 +1,29 @@
1
+ import type { KtxLlmRuntimePort } from '../../../../context/llm/runtime-port.js';
2
+ import type { SqlAnalysisPort } from '../../../../context/sql-analysis/ports.js';
3
+ import { type HistoricSqlDialect, type HistoricSqlReader } from './types.js';
4
+ export interface QueryHistoryFilterProposal {
5
+ excludedRoles: Array<{
6
+ role: string;
7
+ reason: string;
8
+ pattern: string;
9
+ }>;
10
+ consideredRoleCount: number;
11
+ skipped: {
12
+ reason: 'no-llm' | 'no-daemon' | 'no-in-scope-history' | 'user-block-present';
13
+ } | null;
14
+ warnings: string[];
15
+ }
16
+ export interface ProposeQueryHistoryServiceAccountFiltersInput {
17
+ connectionId: string;
18
+ dialect: HistoricSqlDialect;
19
+ queryClient: unknown;
20
+ reader: HistoricSqlReader;
21
+ sqlAnalysis: SqlAnalysisPort;
22
+ llmRuntime: KtxLlmRuntimePort | null;
23
+ pullConfig: unknown;
24
+ now?: Date;
25
+ userServiceAccountsPresent?: boolean;
26
+ }
27
+ /** @internal */
28
+ export declare function regexEscapeForExactRolePattern(role: string): string;
29
+ export declare function proposeQueryHistoryServiceAccountFilters(input: ProposeQueryHistoryServiceAccountFiltersInput): Promise<QueryHistoryFilterProposal>;
@@ -0,0 +1,190 @@
1
+ import { z } from 'zod';
2
+ import { tableRefKey } from '../../../scan/table-ref.js';
3
+ import { bucketDistinctUsers, bucketExecutions, bucketRecency } from './buckets.js';
4
+ import { compileHistoricSqlRedactionPatterns, redactHistoricSqlText, } from './redaction.js';
5
+ import { includedQueryHistoryTableRefs } from './scope-membership.js';
6
+ import { aggregatedTemplateSchema, historicSqlUnifiedPullConfigSchema, } from './types.js';
7
+ const queryHistoryFilterAdjudicationSchema = z.object({
8
+ roles: z.array(z.object({
9
+ role: z.string().min(1),
10
+ exclude: z.boolean(),
11
+ reason: z.string().min(1),
12
+ }).strict()),
13
+ }).strict();
14
+ function emptyProposal(skipped, warnings = []) {
15
+ return { excludedRoles: [], consideredRoleCount: 0, skipped, warnings };
16
+ }
17
+ function displayTableRef(ref) {
18
+ return [ref.catalog, ref.db, ref.name].filter((part) => !!part && part.length > 0).join('.');
19
+ }
20
+ function redactTemplateSqlForPicker(template, redactors) {
21
+ if (redactors.length === 0) {
22
+ return template;
23
+ }
24
+ return {
25
+ ...template,
26
+ canonicalSql: redactHistoricSqlText(template.canonicalSql, redactors),
27
+ };
28
+ }
29
+ /** @internal */
30
+ export function regexEscapeForExactRolePattern(role) {
31
+ return `^${role.replace(/[\\^$.*+?()[\]{}|]/g, '\\$&')}$`;
32
+ }
33
+ function recordRole(acc, template, tables, executions) {
34
+ acc.executions += executions;
35
+ acc.distinctUsers = Math.max(acc.distinctUsers, template.stats.distinctUsers);
36
+ acc.lastSeen = template.stats.lastSeen > acc.lastSeen ? template.stats.lastSeen : acc.lastSeen;
37
+ for (const table of tables) {
38
+ acc.tables.set(tableRefKey(table), table);
39
+ }
40
+ acc.templates.push(template);
41
+ }
42
+ function roleRecords(parsedTemplates, now) {
43
+ const byRole = new Map();
44
+ for (const parsed of parsedTemplates) {
45
+ for (const entry of parsed.template.topUsers) {
46
+ if (!entry.user || entry.user.trim().length === 0 || entry.executions <= 0) {
47
+ continue;
48
+ }
49
+ const role = entry.user.trim();
50
+ const acc = byRole.get(role) ??
51
+ {
52
+ role,
53
+ executions: 0,
54
+ distinctUsers: 0,
55
+ lastSeen: '1970-01-01T00:00:00.000Z',
56
+ tables: new Map(),
57
+ templates: [],
58
+ };
59
+ recordRole(acc, parsed.template, parsed.includedTables, entry.executions);
60
+ byRole.set(role, acc);
61
+ }
62
+ }
63
+ return [...byRole.values()]
64
+ .sort((left, right) => right.executions - left.executions || left.role.localeCompare(right.role))
65
+ .map((acc) => ({
66
+ role: acc.role,
67
+ inScopeTables: [...acc.tables.entries()]
68
+ .sort(([left], [right]) => left.localeCompare(right))
69
+ .slice(0, 25)
70
+ .map(([, ref]) => displayTableRef(ref)),
71
+ executionsBucket: bucketExecutions(acc.executions),
72
+ distinctUsersBucket: bucketDistinctUsers(acc.distinctUsers),
73
+ recencyBucket: bucketRecency(acc.lastSeen, now),
74
+ representativeTemplates: [...acc.templates]
75
+ .sort((left, right) => right.stats.executions - left.stats.executions || left.templateId.localeCompare(right.templateId))
76
+ .slice(0, 3)
77
+ .map((template) => ({
78
+ id: template.templateId,
79
+ canonicalSql: template.canonicalSql,
80
+ dialect: template.dialect,
81
+ })),
82
+ }));
83
+ }
84
+ function adjudicationSystemPrompt() {
85
+ return [
86
+ 'You are helping ktx decide whether observed query-history roles are operational service accounts.',
87
+ 'Default every role to keep. Mark exclude true only when the aggregate evidence clearly shows loader, ELT, reverse-ETL, export, refresh, or maintenance traffic rather than analyst or BI-dashboard usage.',
88
+ 'Use only the observed role records. Do not rely on a hardcoded denylist. Return structured output only.',
89
+ ].join('\n');
90
+ }
91
+ export async function proposeQueryHistoryServiceAccountFilters(input) {
92
+ if (!input.llmRuntime) {
93
+ return emptyProposal({ reason: 'no-llm' });
94
+ }
95
+ const config = historicSqlUnifiedPullConfigSchema.parse(input.pullConfig);
96
+ const redactors = compileHistoricSqlRedactionPatterns(config.redactionPatterns);
97
+ const now = input.now ?? new Date();
98
+ const windowDays = 'windowDays' in config ? config.windowDays : 90;
99
+ const windowStart = new Date(now.getTime() - windowDays * 24 * 60 * 60 * 1000);
100
+ const warnings = [];
101
+ const snapshot = [];
102
+ try {
103
+ for await (const row of input.reader.fetchAggregated(input.queryClient, { start: windowStart, end: now }, config)) {
104
+ snapshot.push(aggregatedTemplateSchema.parse(row));
105
+ }
106
+ }
107
+ catch (error) {
108
+ return emptyProposal(null, [
109
+ `query_history_filter_picker_read_failed:${error instanceof Error ? error.message : String(error)}`,
110
+ ]);
111
+ }
112
+ if (snapshot.length === 0) {
113
+ return emptyProposal({ reason: 'no-in-scope-history' });
114
+ }
115
+ const analysisItems = snapshot.map((template) => ({ id: template.templateId, sql: template.canonicalSql }));
116
+ const analysisOptions = config.modeledTableCatalog.length > 0 ? { catalog: { tables: config.modeledTableCatalog } } : undefined;
117
+ let analysis;
118
+ try {
119
+ analysis = await input.sqlAnalysis.analyzeBatch(analysisItems, input.dialect, analysisOptions);
120
+ }
121
+ catch (error) {
122
+ return emptyProposal({ reason: 'no-daemon' }, [
123
+ `query_history_filter_picker_analysis_failed:${error instanceof Error ? error.message : String(error)}`,
124
+ ]);
125
+ }
126
+ const parsedTemplates = [];
127
+ for (const template of snapshot) {
128
+ const parsed = analysis.get(template.templateId);
129
+ if (!parsed || parsed.error) {
130
+ warnings.push(`query_history_filter_picker_parse_failed:${template.templateId}`);
131
+ continue;
132
+ }
133
+ const tablesTouched = [...new Map(parsed.tablesTouched.map((ref) => [tableRefKey(ref), ref])).values()]
134
+ .filter((ref) => ref.name.length > 0)
135
+ .sort((left, right) => tableRefKey(left).localeCompare(tableRefKey(right)));
136
+ const includedTables = includedQueryHistoryTableRefs(tablesTouched, config);
137
+ if (includedTables.length === 0) {
138
+ continue;
139
+ }
140
+ parsedTemplates.push({
141
+ template: redactTemplateSqlForPicker(template, redactors),
142
+ tablesTouched,
143
+ includedTables,
144
+ });
145
+ }
146
+ const records = roleRecords(parsedTemplates, now);
147
+ if (records.length <= 1) {
148
+ return {
149
+ excludedRoles: [],
150
+ consideredRoleCount: records.length,
151
+ skipped: { reason: 'no-in-scope-history' },
152
+ warnings,
153
+ };
154
+ }
155
+ let generated;
156
+ try {
157
+ generated = await input.llmRuntime.generateObject({
158
+ role: 'candidateExtraction',
159
+ system: adjudicationSystemPrompt(),
160
+ prompt: JSON.stringify({ connectionId: input.connectionId, dialect: input.dialect, roles: records }),
161
+ schema: queryHistoryFilterAdjudicationSchema,
162
+ });
163
+ }
164
+ catch (error) {
165
+ return {
166
+ excludedRoles: [],
167
+ consideredRoleCount: records.length,
168
+ skipped: { reason: 'no-llm' },
169
+ warnings: [
170
+ ...warnings,
171
+ `query_history_filter_picker_llm_failed:${error instanceof Error ? error.message : String(error)}`,
172
+ ],
173
+ };
174
+ }
175
+ const knownRoles = new Set(records.map((record) => record.role));
176
+ const excludedRoles = generated.roles
177
+ .filter((role) => role.exclude && knownRoles.has(role.role))
178
+ .sort((left, right) => left.role.localeCompare(right.role))
179
+ .map((role) => ({
180
+ role: role.role,
181
+ reason: role.reason,
182
+ pattern: regexEscapeForExactRolePattern(role.role),
183
+ }));
184
+ return {
185
+ excludedRoles,
186
+ consideredRoleCount: records.length,
187
+ skipped: input.userServiceAccountsPresent ? { reason: 'user-block-present' } : null,
188
+ warnings,
189
+ };
190
+ }
@@ -0,0 +1,18 @@
1
+ import { type KtxTableRefKey } from '../../../scan/table-ref.js';
2
+ import type { KtxTableRef } from '../../../scan/types.js';
3
+ export interface QueryHistoryScopeFloorInput {
4
+ projectDir: string;
5
+ connectionId: string;
6
+ driver: string;
7
+ connection: Record<string, unknown>;
8
+ storedQueryHistory: Record<string, unknown>;
9
+ }
10
+ export interface QueryHistoryScopeFloor {
11
+ enabledTables: KtxTableRef[];
12
+ enabledTableKeys: ReadonlySet<KtxTableRefKey> | null;
13
+ enabledSchemas: string[];
14
+ modeledTableCatalog: KtxTableRef[];
15
+ floorDisabled: boolean;
16
+ warnings: string[];
17
+ }
18
+ export declare function resolveQueryHistoryScopeFloor(input: QueryHistoryScopeFloorInput): Promise<QueryHistoryScopeFloor>;