@kaelio/ktx 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/python/{kaelio_ktx-0.12.0-py3-none-any.whl → kaelio_ktx-0.13.0-py3-none-any.whl} +0 -0
- package/assets/python/manifest.json +4 -4
- package/dist/.tsbuildinfo +1 -1
- package/dist/commands/setup-commands.js +13 -0
- package/dist/connection.js +14 -2
- package/dist/connectors/bigquery/connector.js +1 -14
- package/dist/connectors/clickhouse/connector.js +1 -15
- package/dist/connectors/duckdb/federated-attach.d.ts +7 -0
- package/dist/connectors/duckdb/federated-attach.js +86 -0
- package/dist/connectors/duckdb/federated-executor.d.ts +5 -0
- package/dist/connectors/duckdb/federated-executor.js +59 -0
- package/dist/connectors/mysql/connector.js +1 -15
- package/dist/connectors/postgres/connector.js +1 -14
- package/dist/connectors/shared/string-reference.d.ts +6 -0
- package/dist/connectors/shared/string-reference.js +19 -0
- package/dist/connectors/snowflake/connector.js +1 -14
- package/dist/connectors/sqlserver/connector.js +1 -14
- package/dist/context/connections/federation.d.ts +33 -0
- package/dist/context/connections/federation.js +51 -0
- package/dist/context/connections/local-warehouse-descriptor.d.ts +2 -0
- package/dist/context/connections/project-sql-executor.d.ts +18 -0
- package/dist/context/connections/project-sql-executor.js +39 -0
- package/dist/context/connections/query-executor.d.ts +2 -2
- package/dist/context/connections/read-only-sql.js +4 -3
- package/dist/context/connections/resolve-connection.d.ts +12 -0
- package/dist/context/connections/resolve-connection.js +37 -0
- package/dist/context/core/git-env.d.ts +4 -0
- package/dist/context/core/git-env.js +5 -1
- package/dist/context/ingest/adapters/live-database/manifest.d.ts +3 -0
- package/dist/context/ingest/adapters/live-database/manifest.js +19 -11
- package/dist/context/llm/claude-code-runtime.js +18 -2
- package/dist/context/mcp/context-tools.js +27 -2
- package/dist/context/mcp/local-project-ports.js +55 -50
- package/dist/context/mcp/types.d.ts +2 -0
- package/dist/context/scan/local-enrichment-artifacts.js +31 -3
- package/dist/context/sl/local-query.js +29 -12
- package/dist/context/sl/local-sl.js +27 -1
- package/dist/context/sl/source-files.d.ts +2 -0
- package/dist/context/sl/source-files.js +7 -0
- package/dist/ingest-query-executor.d.ts +2 -0
- package/dist/ingest-query-executor.js +8 -22
- package/dist/setup-agents.d.ts +21 -15
- package/dist/setup-agents.js +128 -42
- package/dist/setup-databases.d.ts +3 -0
- package/dist/setup-databases.js +16 -0
- package/dist/setup-sources.js +1 -5
- package/dist/setup.d.ts +1 -0
- package/dist/setup.js +1 -0
- package/dist/sql.d.ts +2 -0
- package/dist/sql.js +35 -53
- package/dist/telemetry/events.d.ts +2 -1
- package/dist/telemetry/events.js +11 -1
- package/package.json +2 -1
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { KtxProjectConfig, KtxProjectConnectionConfig } from '../project/config.js';
|
|
2
|
+
/**
|
|
3
|
+
* Look up a connection by id, throwing an expected (caller-driven) error that
|
|
4
|
+
* names the configured connections so an agent or CLI user can self-correct.
|
|
5
|
+
*/
|
|
6
|
+
export declare function resolveConfiguredConnection(config: KtxProjectConfig, connectionId: string): KtxProjectConnectionConfig;
|
|
7
|
+
/**
|
|
8
|
+
* Resolve the connection id to run against: validate a requested id against the
|
|
9
|
+
* configured connections, or default to the sole connection when none is given.
|
|
10
|
+
* Throws an expected error that lists the configured connections otherwise.
|
|
11
|
+
*/
|
|
12
|
+
export declare function resolveRequiredConnectionId(config: KtxProjectConfig, requested: string | undefined): string;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { KtxExpectedError } from '../../errors.js';
|
|
2
|
+
function configuredConnectionIds(config) {
|
|
3
|
+
return Object.keys(config.connections).sort();
|
|
4
|
+
}
|
|
5
|
+
function availableConnectionsHint(config) {
|
|
6
|
+
const ids = configuredConnectionIds(config);
|
|
7
|
+
return ids.length === 0
|
|
8
|
+
? 'No connections are configured in ktx.yaml.'
|
|
9
|
+
: `Configured connections: ${ids.join(', ')}.`;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Look up a connection by id, throwing an expected (caller-driven) error that
|
|
13
|
+
* names the configured connections so an agent or CLI user can self-correct.
|
|
14
|
+
*/
|
|
15
|
+
export function resolveConfiguredConnection(config, connectionId) {
|
|
16
|
+
const connection = config.connections[connectionId];
|
|
17
|
+
if (!connection) {
|
|
18
|
+
throw new KtxExpectedError(`Connection "${connectionId}" is not configured in ktx.yaml. ${availableConnectionsHint(config)}`);
|
|
19
|
+
}
|
|
20
|
+
return connection;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Resolve the connection id to run against: validate a requested id against the
|
|
24
|
+
* configured connections, or default to the sole connection when none is given.
|
|
25
|
+
* Throws an expected error that lists the configured connections otherwise.
|
|
26
|
+
*/
|
|
27
|
+
export function resolveRequiredConnectionId(config, requested) {
|
|
28
|
+
if (requested !== undefined) {
|
|
29
|
+
resolveConfiguredConnection(config, requested);
|
|
30
|
+
return requested;
|
|
31
|
+
}
|
|
32
|
+
const ids = configuredConnectionIds(config);
|
|
33
|
+
if (ids.length === 1) {
|
|
34
|
+
return ids[0];
|
|
35
|
+
}
|
|
36
|
+
throw new KtxExpectedError(`connectionId is required. ${availableConnectionsHint(config)}`);
|
|
37
|
+
}
|
|
@@ -6,6 +6,10 @@ import { type SimpleGit } from 'simple-git';
|
|
|
6
6
|
* directory is an existing repo ktx did not create and the machine has no configured git
|
|
7
7
|
* identity (e.g. a fresh Mac with no ~/.gitconfig), without mutating the user's repo config.
|
|
8
8
|
* Explicit `--author` flags on individual commits still take precedence over GIT_AUTHOR_NAME.
|
|
9
|
+
*
|
|
10
|
+
* `commit.gpgsign=false` is injected as a per-invocation `-c` override so ktx's commits never
|
|
11
|
+
* attempt GPG signing: ktx commits under a synthetic identity that can never own a secret key, so
|
|
12
|
+
* a user's `commit.gpgsign=true` would otherwise fail every commit with "No secret key".
|
|
9
13
|
*/
|
|
10
14
|
export declare function createSimpleGit(baseDir: string, identity?: {
|
|
11
15
|
name: string;
|
|
@@ -28,6 +28,10 @@ function sanitizedGitEnv(env = process.env) {
|
|
|
28
28
|
* directory is an existing repo ktx did not create and the machine has no configured git
|
|
29
29
|
* identity (e.g. a fresh Mac with no ~/.gitconfig), without mutating the user's repo config.
|
|
30
30
|
* Explicit `--author` flags on individual commits still take precedence over GIT_AUTHOR_NAME.
|
|
31
|
+
*
|
|
32
|
+
* `commit.gpgsign=false` is injected as a per-invocation `-c` override so ktx's commits never
|
|
33
|
+
* attempt GPG signing: ktx commits under a synthetic identity that can never own a secret key, so
|
|
34
|
+
* a user's `commit.gpgsign=true` would otherwise fail every commit with "No secret key".
|
|
31
35
|
*/
|
|
32
36
|
export function createSimpleGit(baseDir, identity) {
|
|
33
37
|
const env = sanitizedGitEnv();
|
|
@@ -37,5 +41,5 @@ export function createSimpleGit(baseDir, identity) {
|
|
|
37
41
|
env.GIT_COMMITTER_NAME = identity.name;
|
|
38
42
|
env.GIT_COMMITTER_EMAIL = identity.email;
|
|
39
43
|
}
|
|
40
|
-
return simpleGit({ baseDir, unsafe: { allowUnsafeAskPass: true } }).env(env);
|
|
44
|
+
return simpleGit({ baseDir, config: ['commit.gpgsign=false'], unsafe: { allowUnsafeAskPass: true } }).env(env);
|
|
41
45
|
}
|
|
@@ -56,11 +56,14 @@ export interface BuildLiveDatabaseManifestShardsInput {
|
|
|
56
56
|
existingPreservedJoins?: Map<string, LiveDatabaseManifestJoinEntry[]>;
|
|
57
57
|
existingDescriptions?: Map<string, LiveDatabaseManifestExistingDescriptions>;
|
|
58
58
|
existingUsage?: Map<string, TableUsageOutput>;
|
|
59
|
+
federatedSiblingTargets?: Set<string>;
|
|
59
60
|
}
|
|
60
61
|
export interface BuildLiveDatabaseManifestShardsResult {
|
|
61
62
|
shards: Map<string, LiveDatabaseManifestShard>;
|
|
62
63
|
tablesProcessed: number;
|
|
63
64
|
}
|
|
64
65
|
export declare function mergeUsagePreservingExternal(existing: TableUsageOutput | undefined, incoming: TableUsageOutput | undefined): TableUsageOutput | undefined;
|
|
66
|
+
/** @internal */
|
|
67
|
+
export declare function buildJoinsByTable(tableNames: Set<string>, joins: LiveDatabaseManifestJoinData[], preservedJoins: Map<string, LiveDatabaseManifestJoinEntry[]>, federatedSiblingTargets?: Set<string>): Map<string, LiveDatabaseManifestJoinEntry[]>;
|
|
65
68
|
export declare function buildLiveDatabaseManifestShards(input: BuildLiveDatabaseManifestShardsInput): BuildLiveDatabaseManifestShardsResult;
|
|
66
69
|
export {};
|
|
@@ -106,10 +106,14 @@ function joinCondition(leftTable, leftColumns, rightTable, rightColumns) {
|
|
|
106
106
|
})
|
|
107
107
|
.join(' AND ');
|
|
108
108
|
}
|
|
109
|
-
|
|
109
|
+
/** @internal */
|
|
110
|
+
export function buildJoinsByTable(tableNames, joins, preservedJoins, federatedSiblingTargets = new Set()) {
|
|
110
111
|
const joinsByTable = new Map();
|
|
111
112
|
for (const join of joins) {
|
|
112
|
-
|
|
113
|
+
const fromLocal = tableNames.has(join.fromTable);
|
|
114
|
+
const toLocal = tableNames.has(join.toTable);
|
|
115
|
+
const toSibling = federatedSiblingTargets.has(join.toTable);
|
|
116
|
+
if (!fromLocal || (!toLocal && !toSibling)) {
|
|
113
117
|
continue;
|
|
114
118
|
}
|
|
115
119
|
const relationship = RELATIONSHIP_MAP[join.relationship] ?? join.relationship;
|
|
@@ -119,20 +123,24 @@ function buildJoinsByTable(tableNames, joins, preservedJoins) {
|
|
|
119
123
|
relationship,
|
|
120
124
|
source: join.source,
|
|
121
125
|
});
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
126
|
+
// Reverse direction only when the target is a local table in THIS snapshot;
|
|
127
|
+
// a federated sibling has no shard here, so it gets no reverse entry.
|
|
128
|
+
if (toLocal) {
|
|
129
|
+
const reverseRelationship = RELATIONSHIP_INVERSE[relationship] ?? 'one_to_many';
|
|
130
|
+
addJoinOnce(joinsByTable, join.toTable, {
|
|
131
|
+
to: join.fromTable,
|
|
132
|
+
on: joinCondition(join.toTable, join.toColumns, join.fromTable, join.fromColumns),
|
|
133
|
+
relationship: reverseRelationship,
|
|
134
|
+
source: join.source,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
129
137
|
}
|
|
130
138
|
for (const [tableName, tableJoins] of preservedJoins) {
|
|
131
139
|
if (!tableNames.has(tableName)) {
|
|
132
140
|
continue;
|
|
133
141
|
}
|
|
134
142
|
for (const join of tableJoins) {
|
|
135
|
-
if (tableNames.has(join.to)) {
|
|
143
|
+
if (tableNames.has(join.to) || federatedSiblingTargets.has(join.to)) {
|
|
136
144
|
addJoinOnce(joinsByTable, tableName, join);
|
|
137
145
|
}
|
|
138
146
|
}
|
|
@@ -141,7 +149,7 @@ function buildJoinsByTable(tableNames, joins, preservedJoins) {
|
|
|
141
149
|
}
|
|
142
150
|
export function buildLiveDatabaseManifestShards(input) {
|
|
143
151
|
const tableNames = new Set(input.tables.map((table) => table.name));
|
|
144
|
-
const joinsByTable = buildJoinsByTable(tableNames, input.joins, input.existingPreservedJoins ?? new Map());
|
|
152
|
+
const joinsByTable = buildJoinsByTable(tableNames, input.joins, input.existingPreservedJoins ?? new Map(), input.federatedSiblingTargets ?? new Set());
|
|
145
153
|
const shards = new Map();
|
|
146
154
|
for (const table of input.tables) {
|
|
147
155
|
const shardKey = getShardKey(input.connectionType, table.catalog, table.db);
|
|
@@ -89,7 +89,23 @@ function assertInitIsolation(message, allowedToolIds, expectedMcpServerNames) {
|
|
|
89
89
|
function expectedMcpServerNames(tools) {
|
|
90
90
|
return tools && Object.keys(tools).length > 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set();
|
|
91
91
|
}
|
|
92
|
-
|
|
92
|
+
// "session limit" is the Claude Code subscription cap ("You've hit your session
|
|
93
|
+
// limit · resets …"); the rest are transient 429-style throttling. All mean
|
|
94
|
+
// Claude Code authenticated successfully, so they must not be read as auth
|
|
95
|
+
// failures by the governor classifier or the auth probe.
|
|
96
|
+
const CLAUDE_RATE_LIMIT_ERROR_MARKERS = /\b429\b|rate limit|session limit|usage limit|too many requests|quota exceeded|overloaded|max_retries/i;
|
|
97
|
+
// The subscription cap is its own case: re-authenticating and retrying both fail
|
|
98
|
+
// until reset, so it gets a distinct message from transient rate limiting.
|
|
99
|
+
const CLAUDE_SESSION_LIMIT_MARKERS = /session limit|usage limit/i;
|
|
100
|
+
function describeClaudeProbeFailure(message) {
|
|
101
|
+
if (CLAUDE_SESSION_LIMIT_MARKERS.test(message)) {
|
|
102
|
+
return `Claude Code session limit reached. Wait for the reset shown, then rerun setup or the command. Details: ${message}`;
|
|
103
|
+
}
|
|
104
|
+
if (CLAUDE_RATE_LIMIT_ERROR_MARKERS.test(message)) {
|
|
105
|
+
return `Claude Code is rate limited. Retry shortly, then rerun setup or the command. Details: ${message}`;
|
|
106
|
+
}
|
|
107
|
+
return `Claude Code authentication is not usable. Authenticate Claude Code locally with the Claude Code CLI, then rerun setup or the command. ${message}`;
|
|
108
|
+
}
|
|
93
109
|
function normalizeClaudeResetAtMs(value) {
|
|
94
110
|
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
95
111
|
return Math.round(value < 10_000_000_000 ? value * 1_000 : value);
|
|
@@ -402,7 +418,7 @@ export async function runClaudeCodeAuthProbe(input) {
|
|
|
402
418
|
const message = error instanceof Error ? error.message : String(error);
|
|
403
419
|
return {
|
|
404
420
|
ok: false,
|
|
405
|
-
message:
|
|
421
|
+
message: describeClaudeProbeFailure(message),
|
|
406
422
|
};
|
|
407
423
|
}
|
|
408
424
|
}
|
|
@@ -2,7 +2,7 @@ import { randomUUID } from 'node:crypto';
|
|
|
2
2
|
import { z } from 'zod';
|
|
3
3
|
import { emitTelemetryEvent, mcpTelemetrySampleRate, reportException, shouldEmitMcpTelemetry, } from '../../telemetry/index.js';
|
|
4
4
|
import { collectTelemetryRedactionSecrets } from '../../telemetry/redaction-secrets.js';
|
|
5
|
-
import { scrubErrorClass } from '../../telemetry/scrubber.js';
|
|
5
|
+
import { formatErrorDetail, scrubErrorClass } from '../../telemetry/scrubber.js';
|
|
6
6
|
const connectionIdSchema = z.string().min(1);
|
|
7
7
|
const unknownRecordSchema = z.record(z.string(), z.unknown());
|
|
8
8
|
const tableRefSchema = z.object({
|
|
@@ -24,7 +24,7 @@ const toolAnnotations = {
|
|
|
24
24
|
memory_ingest_status: { title: 'Memory Ingest Status', readOnlyHint: true, openWorldHint: false },
|
|
25
25
|
};
|
|
26
26
|
const toolDescriptions = {
|
|
27
|
-
connection_list: 'List configured read-only data connections available to this ktx project. Use this before connection-scoped tools when the project may have multiple warehouses.',
|
|
27
|
+
connection_list: 'List configured read-only data connections available to this ktx project. Use this before connection-scoped tools when the project may have multiple warehouses. A "_ktx_federated" entry (when present) queries all its member databases together; use its id for cross-database joins.',
|
|
28
28
|
discover_data: 'Search across ktx wiki pages, semantic-layer sources, measures, dimensions, raw tables, and columns. Example: discover_data({ query: "monthly orders by customer", connectionId: "warehouse", kinds: ["sl_source", "table"] }).',
|
|
29
29
|
wiki_search: 'Search ktx wiki pages for reusable business context. Example: wiki_search({ query: "revenue recognition", limit: 5 }).',
|
|
30
30
|
wiki_read: 'Read a ktx wiki page by key returned from wiki_search. Example: wiki_read({ key: "global/revenue" }).',
|
|
@@ -160,6 +160,8 @@ const connectionListOutputSchema = z.object({
|
|
|
160
160
|
id: z.string(),
|
|
161
161
|
name: z.string(),
|
|
162
162
|
connectionType: z.string(),
|
|
163
|
+
members: z.array(z.string()).optional(),
|
|
164
|
+
hint: z.string().optional(),
|
|
163
165
|
})),
|
|
164
166
|
});
|
|
165
167
|
const wikiSearchOutputSchema = z.object({
|
|
@@ -442,6 +444,25 @@ function clientTelemetryFields(getClientInfo) {
|
|
|
442
444
|
...(client?.version ? { mcpClientVersion: client.version } : {}),
|
|
443
445
|
};
|
|
444
446
|
}
|
|
447
|
+
// Tools registered via registerParsedTool catch their own errors and return an
|
|
448
|
+
// isError result, so the telemetry layer never sees the thrown Error. Recover
|
|
449
|
+
// the failure message from the result's text content (the same string the agent
|
|
450
|
+
// reads) so the outcome event is self-diagnosing.
|
|
451
|
+
function mcpErrorResultDetail(result) {
|
|
452
|
+
if (typeof result !== 'object' || result === null || !('content' in result)) {
|
|
453
|
+
return undefined;
|
|
454
|
+
}
|
|
455
|
+
const content = result.content;
|
|
456
|
+
if (!Array.isArray(content)) {
|
|
457
|
+
return undefined;
|
|
458
|
+
}
|
|
459
|
+
const text = content
|
|
460
|
+
.map((block) => typeof block === 'object' && block !== null && typeof block.text === 'string'
|
|
461
|
+
? block.text
|
|
462
|
+
: '')
|
|
463
|
+
.join('\n');
|
|
464
|
+
return formatErrorDetail(text);
|
|
465
|
+
}
|
|
445
466
|
function instrumentMcpServer(server, telemetry) {
|
|
446
467
|
return {
|
|
447
468
|
registerTool(name, config, handler) {
|
|
@@ -451,6 +472,7 @@ function instrumentMcpServer(server, telemetry) {
|
|
|
451
472
|
const result = await handler(input, context);
|
|
452
473
|
if (telemetry.io && telemetry.projectDir && shouldEmitMcpTelemetry()) {
|
|
453
474
|
const isError = typeof result === 'object' && result !== null && 'isError' in result && result.isError === true;
|
|
475
|
+
const errorDetail = isError ? mcpErrorResultDetail(result) : undefined;
|
|
454
476
|
await emitTelemetryEvent({
|
|
455
477
|
name: 'mcp_request_completed',
|
|
456
478
|
projectDir: telemetry.projectDir,
|
|
@@ -460,6 +482,7 @@ function instrumentMcpServer(server, telemetry) {
|
|
|
460
482
|
outcome: isError ? 'error' : 'ok',
|
|
461
483
|
durationMs: Math.max(0, performance.now() - startedAt),
|
|
462
484
|
sampleRate: mcpTelemetrySampleRate(),
|
|
485
|
+
...(errorDetail ? { errorDetail } : {}),
|
|
463
486
|
...clientTelemetryFields(telemetry.getClientInfo),
|
|
464
487
|
},
|
|
465
488
|
});
|
|
@@ -483,6 +506,7 @@ function instrumentMcpServer(server, telemetry) {
|
|
|
483
506
|
}
|
|
484
507
|
if (telemetry.io && telemetry.projectDir && shouldEmitMcpTelemetry()) {
|
|
485
508
|
const errorClass = scrubErrorClass(error);
|
|
509
|
+
const errorDetail = formatErrorDetail(error);
|
|
486
510
|
await emitTelemetryEvent({
|
|
487
511
|
name: 'mcp_request_completed',
|
|
488
512
|
projectDir: telemetry.projectDir,
|
|
@@ -491,6 +515,7 @@ function instrumentMcpServer(server, telemetry) {
|
|
|
491
515
|
toolName: name,
|
|
492
516
|
outcome: 'error',
|
|
493
517
|
...(errorClass ? { errorClass } : {}),
|
|
518
|
+
...(errorDetail ? { errorDetail } : {}),
|
|
494
519
|
durationMs: Math.max(0, performance.now() - startedAt),
|
|
495
520
|
sampleRate: mcpTelemetrySampleRate(),
|
|
496
521
|
...clientTelemetryFields(telemetry.getClientInfo),
|
|
@@ -1,5 +1,8 @@
|
|
|
1
|
-
import { KtxQueryError, isNativeProgrammingFault } from '../../errors.js';
|
|
2
|
-
import {
|
|
1
|
+
import { KtxExpectedError, KtxQueryError, isNativeProgrammingFault } from '../../errors.js';
|
|
2
|
+
import { executeProjectReadOnlySql } from '../../context/connections/project-sql-executor.js';
|
|
3
|
+
import { FEDERATED_CONNECTION_ID, federatedConnectionListing } from '../../context/connections/federation.js';
|
|
4
|
+
import { resolveConfiguredConnection } from '../../context/connections/resolve-connection.js';
|
|
5
|
+
import { localConnectionInfoFromConfig, } from '../../context/connections/local-warehouse-descriptor.js';
|
|
3
6
|
import { createKtxEntityDetailsService } from '../../context/scan/entity-details.js';
|
|
4
7
|
import { createKtxDiscoverDataService } from '../../context/search/discover.js';
|
|
5
8
|
import { sqlAnalysisDialectForDriver } from '../../context/sql-analysis/dialect.js';
|
|
@@ -8,75 +11,77 @@ import { createKtxDictionarySearchService } from '../../context/sl/dictionary-se
|
|
|
8
11
|
import { readLocalSlSource } from '../../context/sl/local-sl.js';
|
|
9
12
|
import { assertSafeConnectionId } from '../../context/sl/source-files.js';
|
|
10
13
|
import { readLocalKnowledgePage, searchLocalKnowledgePages } from '../wiki/local-knowledge.js';
|
|
11
|
-
async function cleanupConnector(connector) {
|
|
12
|
-
if (connector?.cleanup) {
|
|
13
|
-
await connector.cleanup();
|
|
14
|
-
}
|
|
15
|
-
}
|
|
16
14
|
async function executeValidatedReadOnlySql(project, options, input, onProgress) {
|
|
17
15
|
await onProgress?.({ progress: 0, message: 'Validating SQL' });
|
|
18
|
-
const connectionId = assertSafeConnectionId(input.connectionId);
|
|
19
|
-
const connection = project.config.connections[connectionId];
|
|
20
|
-
if (!connection) {
|
|
21
|
-
throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`);
|
|
22
|
-
}
|
|
23
16
|
if (!options.sqlAnalysis) {
|
|
24
17
|
throw new Error('sql_execution requires parser-backed SQL validation.');
|
|
25
18
|
}
|
|
26
|
-
const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver(connection.driver));
|
|
27
|
-
if (!validation.ok) {
|
|
28
|
-
throw new Error(validation.error ?? 'SQL is not read-only.');
|
|
29
|
-
}
|
|
30
19
|
const createConnector = options.localScan?.createConnector;
|
|
31
20
|
if (!createConnector) {
|
|
32
21
|
throw new Error('sql_execution requires a local scan connector factory.');
|
|
33
22
|
}
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
23
|
+
const isFederated = input.connectionId === FEDERATED_CONNECTION_ID;
|
|
24
|
+
const connectionId = isFederated ? input.connectionId : assertSafeConnectionId(input.connectionId);
|
|
25
|
+
const connection = isFederated ? undefined : resolveConfiguredConnection(project.config, connectionId);
|
|
26
|
+
const dialect = sqlAnalysisDialectForDriver(isFederated ? 'duckdb' : connection.driver);
|
|
27
|
+
const validation = await options.sqlAnalysis.validateReadOnly(input.sql, dialect);
|
|
28
|
+
if (!validation.ok) {
|
|
29
|
+
// A read-only guard rejecting the agent's SQL is an expected outcome, not a
|
|
30
|
+
// ktx fault: classify it so reportException keeps it out of Error Tracking.
|
|
31
|
+
throw new KtxQueryError(validation.error ?? 'SQL is not read-only.');
|
|
32
|
+
}
|
|
33
|
+
await onProgress?.({ progress: 0.3, message: 'Executing' });
|
|
34
|
+
const result = await executeProjectReadOnlySql({
|
|
35
|
+
project,
|
|
36
|
+
input: {
|
|
43
37
|
connectionId,
|
|
38
|
+
projectDir: project.projectDir,
|
|
39
|
+
connection,
|
|
44
40
|
sql: input.sql,
|
|
45
41
|
maxRows: input.maxRows,
|
|
46
|
-
},
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
throw
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
await cleanupConnector(connector);
|
|
69
|
-
}
|
|
42
|
+
},
|
|
43
|
+
createConnector,
|
|
44
|
+
runId: 'mcp-sql-execution',
|
|
45
|
+
}).catch((error) => {
|
|
46
|
+
// A warehouse/driver rejection (e.g. the agent's SQL failed to compile) is a
|
|
47
|
+
// surfaced operational outcome, not a ktx fault: mark it expected while
|
|
48
|
+
// preserving the warehouse's own diagnostics. A native JS error (TypeError,
|
|
49
|
+
// etc.) signals a bug in connector code — let it propagate unchanged so Error
|
|
50
|
+
// Tracking still sees it.
|
|
51
|
+
if (isNativeProgrammingFault(error) || error instanceof KtxExpectedError) {
|
|
52
|
+
throw error;
|
|
53
|
+
}
|
|
54
|
+
throw new KtxQueryError(error instanceof Error ? error.message : String(error), { cause: error });
|
|
55
|
+
});
|
|
56
|
+
const response = {
|
|
57
|
+
headers: result.headers,
|
|
58
|
+
...(result.headerTypes ? { headerTypes: result.headerTypes } : {}),
|
|
59
|
+
rows: result.rows,
|
|
60
|
+
rowCount: result.rowCount ?? result.rows.length,
|
|
61
|
+
};
|
|
62
|
+
await onProgress?.({ progress: 1, message: `Fetched ${response.rowCount} rows` });
|
|
63
|
+
return response;
|
|
70
64
|
}
|
|
71
65
|
export function createLocalProjectMcpContextPorts(project, options) {
|
|
72
66
|
const embeddingService = options.embeddingService;
|
|
73
67
|
const ports = {
|
|
74
68
|
connections: {
|
|
75
69
|
async list() {
|
|
76
|
-
|
|
70
|
+
const configured = Object.entries(project.config.connections)
|
|
77
71
|
.map(([id, config]) => localConnectionInfoFromConfig(id, config))
|
|
78
72
|
.filter((connection) => connection !== null)
|
|
79
73
|
.sort((a, b) => a.id.localeCompare(b.id));
|
|
74
|
+
const federated = federatedConnectionListing(project.config.connections, project.projectDir);
|
|
75
|
+
if (federated) {
|
|
76
|
+
configured.push({
|
|
77
|
+
id: federated.id,
|
|
78
|
+
name: federated.id,
|
|
79
|
+
connectionType: 'DUCKDB',
|
|
80
|
+
members: federated.members,
|
|
81
|
+
hint: federated.hint,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
return configured;
|
|
80
85
|
},
|
|
81
86
|
},
|
|
82
87
|
knowledge: {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import YAML from 'yaml';
|
|
2
2
|
import { buildLiveDatabaseManifestShards } from '../../context/ingest/adapters/live-database/manifest.js';
|
|
3
3
|
import { isSlYamlPath } from '../../context/sl/source-files.js';
|
|
4
|
+
import { deriveFederatedConnection } from '../connections/federation.js';
|
|
4
5
|
import { buildKtxRelationshipArtifacts, buildKtxRelationshipDiagnostics, emptyKtxRelationshipProfileArtifact, } from './relationship-diagnostics.js';
|
|
5
6
|
const LIVE_DATABASE_ADAPTER = 'live-database';
|
|
6
7
|
const LOCAL_AUTHOR = 'ktx';
|
|
@@ -113,7 +114,32 @@ function joinReferencesExistingColumns(join, columnsByTable) {
|
|
|
113
114
|
}
|
|
114
115
|
return true;
|
|
115
116
|
}
|
|
116
|
-
async function
|
|
117
|
+
async function federatedSiblingTargets(project, connectionId) {
|
|
118
|
+
const descriptor = deriveFederatedConnection(project.config.connections, project.projectDir);
|
|
119
|
+
if (!descriptor) {
|
|
120
|
+
return new Set();
|
|
121
|
+
}
|
|
122
|
+
const siblings = descriptor.members.filter((member) => member.connectionId !== connectionId);
|
|
123
|
+
const perSibling = await Promise.all(siblings.map((sibling) => siblingJoinTargets(project, sibling.connectionId)));
|
|
124
|
+
return new Set(perSibling.flat());
|
|
125
|
+
}
|
|
126
|
+
async function siblingJoinTargets(project, connectionId) {
|
|
127
|
+
const listed = await project.fileStore.listFiles(schemaDir(connectionId)).catch(() => ({ files: [] }));
|
|
128
|
+
const files = listed.files.filter(isSlYamlPath);
|
|
129
|
+
const perFile = await Promise.all(files.map(async (file) => {
|
|
130
|
+
const shard = await project.fileStore
|
|
131
|
+
.readFile(file)
|
|
132
|
+
.then(({ content }) => YAML.parse(content))
|
|
133
|
+
.catch(() => null);
|
|
134
|
+
// entry.table is buildTableRef's member-local ref (1-3 parts:
|
|
135
|
+
// table / schema.table / catalog.schema.table), never connectionId-
|
|
136
|
+
// prefixed — so prefixing with the member id yields the fully-qualified
|
|
137
|
+
// `to:` form authored in cross-DB joins.
|
|
138
|
+
return Object.values(shard?.tables ?? {}).map((entry) => `${connectionId}.${entry.table}`);
|
|
139
|
+
}));
|
|
140
|
+
return perFile.flat();
|
|
141
|
+
}
|
|
142
|
+
async function loadExistingManifestState(project, connectionId, snapshot, siblingTargets) {
|
|
117
143
|
const descriptions = new Map();
|
|
118
144
|
const preservedJoins = new Map();
|
|
119
145
|
const usage = new Map();
|
|
@@ -146,7 +172,7 @@ async function loadExistingManifestState(project, connectionId, snapshot) {
|
|
|
146
172
|
}
|
|
147
173
|
const joins = (entry.joins ?? []).filter((join) => {
|
|
148
174
|
return ((join.source === 'manual' || join.source === 'inferred') &&
|
|
149
|
-
validTableNames.has(join.to) &&
|
|
175
|
+
(validTableNames.has(join.to) || siblingTargets.has(join.to)) &&
|
|
150
176
|
joinReferencesExistingColumns(join, columnsByTable));
|
|
151
177
|
});
|
|
152
178
|
if (joins.length > 0) {
|
|
@@ -170,7 +196,8 @@ export async function writeLocalScanManifestShards(input) {
|
|
|
170
196
|
manifestShardsWritten: 0,
|
|
171
197
|
};
|
|
172
198
|
}
|
|
173
|
-
const
|
|
199
|
+
const siblingTargets = await federatedSiblingTargets(input.project, input.connectionId);
|
|
200
|
+
const existing = await loadExistingManifestState(input.project, input.connectionId, input.snapshot, siblingTargets);
|
|
174
201
|
const { shards } = buildLiveDatabaseManifestShards({
|
|
175
202
|
connectionType: input.driver.toUpperCase(),
|
|
176
203
|
tables: snapshotTablesToManifestData(input.snapshot, input.descriptionUpdates),
|
|
@@ -178,6 +205,7 @@ export async function writeLocalScanManifestShards(input) {
|
|
|
178
205
|
existingDescriptions: existing.descriptions,
|
|
179
206
|
existingPreservedJoins: existing.preservedJoins,
|
|
180
207
|
existingUsage: existing.usage,
|
|
208
|
+
federatedSiblingTargets: siblingTargets,
|
|
181
209
|
mapColumnType: (dimensionType) => dimensionType,
|
|
182
210
|
});
|
|
183
211
|
const manifestShards = [];
|
|
@@ -1,22 +1,35 @@
|
|
|
1
|
+
import { FEDERATED_CONNECTION_ID } from '../connections/federation.js';
|
|
2
|
+
import { resolveRequiredConnectionId } from '../connections/resolve-connection.js';
|
|
1
3
|
import { sqlAnalysisDialectForDriver } from '../sql-analysis/dialect.js';
|
|
2
4
|
import { loadLocalSlSourceRecords } from './local-sl.js';
|
|
3
5
|
import { toResolvedWire } from './semantic-layer.service.js';
|
|
4
6
|
import { assertSafeConnectionId } from './source-files.js';
|
|
5
7
|
const COMPILE_ONLY_REASON = 'Local semantic-layer query compiled SQL but no data-source execution adapter is configured.';
|
|
8
|
+
const FEDERATED_SL_QUERY_UNSUPPORTED = `Semantic-layer queries are per-connection and cannot target the federated connection '${FEDERATED_CONNECTION_ID}'. ` +
|
|
9
|
+
`Run a cross-database query as read-only SQL instead — ktx sql -c ${FEDERATED_CONNECTION_ID} "SELECT ..." or the sql_execution tool — ` +
|
|
10
|
+
'using catalog-qualified table names (connectionId.schema.table, or connectionId.table for sqlite; ' +
|
|
11
|
+
'double-quote ids that are not bare identifiers, e.g. "books-db".public.books).';
|
|
6
12
|
function resolveLocalConnectionId(project, requested) {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
+
return assertSafeConnectionId(resolveRequiredConnectionId(project.config, requested));
|
|
14
|
+
}
|
|
15
|
+
// The planner rejects a source set carrying a join whose `to` names a source
|
|
16
|
+
// outside that set, which would break every query for this connection. Keep only
|
|
17
|
+
// joins resolvable within the connection's own sources; a cross-database join
|
|
18
|
+
// (its `to` qualified by a sibling connection id) is just one such unresolvable
|
|
19
|
+
// target and runs as raw SQL instead. Membership is the test, not a connection-id
|
|
20
|
+
// prefix match, so a same-connection target whose name collides with a sibling
|
|
21
|
+
// connection id is preserved.
|
|
22
|
+
function withResolvableJoinsOnly(source, knownSourceNames) {
|
|
23
|
+
if (source.joins.length === 0) {
|
|
24
|
+
return source;
|
|
13
25
|
}
|
|
14
|
-
|
|
26
|
+
const joins = source.joins.filter((join) => knownSourceNames.has(join.to));
|
|
27
|
+
return joins.length === source.joins.length ? source : { ...source, joins };
|
|
15
28
|
}
|
|
16
29
|
async function loadComputableSources(project, connectionId) {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
30
|
+
const records = (await loadLocalSlSourceRecords(project, { connectionId })).filter((record) => record.source.table || record.source.sql);
|
|
31
|
+
const knownSourceNames = new Set(records.map((record) => record.source.name));
|
|
32
|
+
return records.map((record) => toResolvedWire(withResolvableJoinsOnly(record.source, knownSourceNames)));
|
|
20
33
|
}
|
|
21
34
|
function headersFromColumns(columns) {
|
|
22
35
|
return columns
|
|
@@ -24,9 +37,13 @@ function headersFromColumns(columns) {
|
|
|
24
37
|
.filter((name) => typeof name === 'string' && name.length > 0);
|
|
25
38
|
}
|
|
26
39
|
export async function compileLocalSlQuery(project, options) {
|
|
40
|
+
if (options.connectionId === FEDERATED_CONNECTION_ID) {
|
|
41
|
+
throw new Error(FEDERATED_SL_QUERY_UNSUPPORTED);
|
|
42
|
+
}
|
|
27
43
|
await options.onProgress?.({ progress: 0, message: 'Compiling query' });
|
|
28
44
|
const connectionId = resolveLocalConnectionId(project, options.connectionId);
|
|
29
|
-
const
|
|
45
|
+
const driver = project.config.connections[connectionId]?.driver;
|
|
46
|
+
const dialect = sqlAnalysisDialectForDriver(driver);
|
|
30
47
|
const sources = await loadComputableSources(project, connectionId);
|
|
31
48
|
await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' });
|
|
32
49
|
const response = await options.compute.query({
|
|
@@ -76,7 +93,7 @@ export async function compileLocalSlQuery(project, options) {
|
|
|
76
93
|
...response.plan,
|
|
77
94
|
execution: {
|
|
78
95
|
mode: 'executed',
|
|
79
|
-
driver:
|
|
96
|
+
driver: driver ?? 'unknown',
|
|
80
97
|
maxRows,
|
|
81
98
|
rowCount: execution.rowCount,
|
|
82
99
|
},
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { join } from 'node:path';
|
|
2
2
|
import YAML from 'yaml';
|
|
3
3
|
import { z } from 'zod';
|
|
4
|
+
import { deriveFederatedConnection, FEDERATED_CONNECTION_ID } from '../connections/federation.js';
|
|
4
5
|
import { HybridSearchCore } from '../../context/search/hybrid-search-core.js';
|
|
5
6
|
import { DEFAULT_PRIORITY, resolveDescription } from './descriptions.js';
|
|
6
7
|
import { normalizeSemanticLayerDescriptions } from './description-normalization.js';
|
|
@@ -86,7 +87,32 @@ function parsedStandaloneSource(parsed, name) {
|
|
|
86
87
|
});
|
|
87
88
|
}
|
|
88
89
|
export async function loadLocalSlSourceRecords(project, input) {
|
|
89
|
-
|
|
90
|
+
if (input.connectionId === FEDERATED_CONNECTION_ID) {
|
|
91
|
+
const descriptor = deriveFederatedConnection(project.config.connections, project.projectDir);
|
|
92
|
+
if (!descriptor) {
|
|
93
|
+
return [];
|
|
94
|
+
}
|
|
95
|
+
const perMember = await Promise.all(descriptor.members.map(async (member) => {
|
|
96
|
+
const records = await loadSingleConnectionSourceRecords(project, member.connectionId);
|
|
97
|
+
return records.map((record) => {
|
|
98
|
+
// The federated view is one virtual connection: rows carry its id and a
|
|
99
|
+
// member-prefixed name, so a listing/search row round-trips to
|
|
100
|
+
// `ktx sl -c _ktx_federated read <name>`. Member origin lives in the name.
|
|
101
|
+
const name = `${member.connectionId}.${record.name}`;
|
|
102
|
+
return {
|
|
103
|
+
...record,
|
|
104
|
+
connectionId: FEDERATED_CONNECTION_ID,
|
|
105
|
+
name,
|
|
106
|
+
source: { ...record.source, name },
|
|
107
|
+
};
|
|
108
|
+
});
|
|
109
|
+
}));
|
|
110
|
+
return perMember.flat();
|
|
111
|
+
}
|
|
112
|
+
return loadSingleConnectionSourceRecords(project, input.connectionId);
|
|
113
|
+
}
|
|
114
|
+
async function loadSingleConnectionSourceRecords(project, rawConnectionId) {
|
|
115
|
+
const connectionId = assertSafeConnectionId(rawConnectionId);
|
|
90
116
|
const dir = `semantic-layer/${connectionId}`;
|
|
91
117
|
const schemaDir = `${dir}/_schema`;
|
|
92
118
|
const listed = await project.fileStore.listFiles(dir);
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import type { KtxFileStorePort } from '../../context/core/file-store.js';
|
|
2
|
+
/** @internal */
|
|
3
|
+
export declare function isReservedConnectionId(connectionId: string): boolean;
|
|
2
4
|
export declare function assertSafeConnectionId(connectionId: string): string;
|
|
3
5
|
export declare function isSafeConnectionId(connectionId: string | undefined): connectionId is string;
|
|
4
6
|
export declare function sourceNameFromPath(path: string): string;
|
|
@@ -17,7 +17,14 @@ function assertSafePathToken(kind, value) {
|
|
|
17
17
|
}
|
|
18
18
|
return value;
|
|
19
19
|
}
|
|
20
|
+
/** @internal */
|
|
21
|
+
export function isReservedConnectionId(connectionId) {
|
|
22
|
+
return connectionId.startsWith('_ktx_');
|
|
23
|
+
}
|
|
20
24
|
export function assertSafeConnectionId(connectionId) {
|
|
25
|
+
if (isReservedConnectionId(connectionId)) {
|
|
26
|
+
throw new Error(`Connection id "${connectionId}" uses the reserved "_ktx_" prefix.`);
|
|
27
|
+
}
|
|
21
28
|
if (!isSafeConnectionId(connectionId)) {
|
|
22
29
|
throw new Error(`Unsafe connection id: ${connectionId}`);
|
|
23
30
|
}
|