@oscharko-dev/keiko 0.1.0-beta.0 → 0.1.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -570
- package/dist/cli/gen-tests.js +8 -3
- package/dist/cli/index.js +0 -0
- package/dist/cli/init.d.ts +8 -0
- package/dist/cli/init.js +122 -0
- package/dist/cli/investigate.js +6 -2
- package/dist/cli/lifecycle.d.ts +18 -0
- package/dist/cli/lifecycle.js +289 -0
- package/dist/cli/models.js +2 -2
- package/dist/cli/runner.js +21 -28
- package/dist/gateway/capabilities.d.ts +1 -0
- package/dist/gateway/capabilities.data.js +5 -203
- package/dist/gateway/capabilities.js +18 -0
- package/dist/gateway/config.d.ts +2 -1
- package/dist/gateway/config.js +98 -9
- package/dist/gateway/gateway.js +3 -3
- package/dist/gateway/index.d.ts +2 -2
- package/dist/gateway/index.js +2 -2
- package/dist/gateway/model-selection.d.ts +3 -1
- package/dist/gateway/model-selection.js +15 -4
- package/dist/gateway/types.d.ts +1 -0
- package/dist/harness/session.d.ts +1 -1
- package/dist/harness/session.js +1 -1
- package/dist/sdk/index.d.ts +1 -1
- package/dist/sdk/index.js +1 -1
- package/dist/tools/patch-normalize.js +1 -2
- package/dist/tools/terminal-policy.js +1 -8
- package/dist/ui/chat-handlers.js +26 -12
- package/dist/ui/csp-hashes.json +6 -6
- package/dist/ui/deps.d.ts +14 -0
- package/dist/ui/deps.js +92 -20
- package/dist/ui/gateway-setup.d.ts +3 -0
- package/dist/ui/gateway-setup.js +235 -0
- package/dist/ui/read-handlers.js +14 -7
- package/dist/ui/routes.js +6 -4
- package/dist/ui/run-handlers.js +3 -2
- package/dist/ui/server.d.ts +1 -1
- package/dist/ui/server.js +1 -1
- package/dist/ui/static/404.html +1 -1
- package/dist/ui/static/_next/static/chunks/44-17c259c8e72fb82f.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/_not-found/{page-75825b09bcecad97.js → page-7bd871301b874ae0.js} +1 -1
- package/dist/ui/static/_next/static/chunks/app/launch/{page-9c86a13c29884245.js → page-3bd098d60d6df513.js} +1 -1
- package/dist/ui/static/_next/static/chunks/app/layout-091bb8be985f5c03.js +1 -0
- package/dist/ui/static/_next/static/chunks/app/{page-4168c12c68b7a853.js → page-2006f21df58c2bb9.js} +1 -1
- package/dist/ui/static/_next/static/chunks/{main-app-30679af7240d63e9.js → main-app-e8144a306630b76d.js} +1 -1
- package/dist/ui/static/_next/static/css/{be7cb54d5c5673b6.css → 3d68155c8db012f4.css} +1 -1
- package/dist/ui/static/index.html +1 -1
- package/dist/ui/static/index.txt +3 -3
- package/dist/ui/static/launch.html +1 -1
- package/dist/ui/static/launch.txt +3 -3
- package/dist/ui/store-handlers.js +16 -12
- package/dist/workflows/bug-investigation/model-loop.js +1 -4
- package/dist/workflows/bug-investigation/parse.js +5 -3
- package/dist/workflows/unit-tests/model-loop.js +1 -1
- package/dist/workspace/retrieval.js +1 -1
- package/package.json +4 -3
- package/dist/ui/static/_next/static/chunks/4-be1fef693af8e088.js +0 -1
- package/dist/ui/static/_next/static/chunks/app/layout-bdea63fe87947d50.js +0 -1
- /package/dist/ui/static/_next/static/{ca-A01hy9W98aRvMZKdAw → f456ZUOjzfLnTnTyaLylj}/_buildManifest.js +0 -0
- /package/dist/ui/static/_next/static/{ca-A01hy9W98aRvMZKdAw → f456ZUOjzfLnTnTyaLylj}/_ssgManifest.js +0 -0
|
@@ -1,203 +1,5 @@
|
|
|
1
|
-
// Raw capability registry data
|
|
2
|
-
//
|
|
3
|
-
//
|
|
4
|
-
//
|
|
5
|
-
export const CAPABILITY_DATA = [
|
|
6
|
-
{
|
|
7
|
-
id: "Qwen3-Coder-480B-A35B-Instruct-FP8",
|
|
8
|
-
kind: "chat",
|
|
9
|
-
contextWindow: 128_000, // [assumption]
|
|
10
|
-
maxOutputTokens: 8_192, // [assumption]
|
|
11
|
-
toolCalling: true,
|
|
12
|
-
structuredOutput: true,
|
|
13
|
-
streaming: true,
|
|
14
|
-
costClass: "high",
|
|
15
|
-
latencyClass: "slow",
|
|
16
|
-
throughputHint: "~40 tok/s [assumption]",
|
|
17
|
-
preferredUseCases: ["Large-codebase refactor", "Cross-file analysis"],
|
|
18
|
-
knownLimitations: ["Very high VRAM; slow for interactive use"],
|
|
19
|
-
},
|
|
20
|
-
{
|
|
21
|
-
id: "Qwen/Qwen3-Coder-Next-FP8",
|
|
22
|
-
kind: "chat",
|
|
23
|
-
contextWindow: 128_000, // [assumption]
|
|
24
|
-
maxOutputTokens: 8_192, // [assumption]
|
|
25
|
-
toolCalling: true,
|
|
26
|
-
structuredOutput: true,
|
|
27
|
-
streaming: true,
|
|
28
|
-
costClass: "high",
|
|
29
|
-
latencyClass: "slow",
|
|
30
|
-
throughputHint: "~40 tok/s [assumption]",
|
|
31
|
-
preferredUseCases: ["Deep code synthesis requiring maximum reasoning depth"],
|
|
32
|
-
knownLimitations: [
|
|
33
|
-
"Same VRAM/latency constraints as Qwen3-Coder-480B; treat as next-generation upgrade path",
|
|
34
|
-
],
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
id: "Devstral-2-123B-Instruct-2512",
|
|
38
|
-
kind: "chat",
|
|
39
|
-
contextWindow: 128_000, // [assumption]
|
|
40
|
-
maxOutputTokens: 8_192, // [assumption]
|
|
41
|
-
toolCalling: true,
|
|
42
|
-
structuredOutput: true,
|
|
43
|
-
streaming: true,
|
|
44
|
-
costClass: "high",
|
|
45
|
-
latencyClass: "standard",
|
|
46
|
-
throughputHint: "~80 tok/s [assumption]",
|
|
47
|
-
preferredUseCases: ["Agentic code completion", "Multi-step software engineering"],
|
|
48
|
-
knownLimitations: [
|
|
49
|
-
"123B scale; requires dedicated GPU allocation; not suitable for high-QPS workloads",
|
|
50
|
-
],
|
|
51
|
-
},
|
|
52
|
-
{
|
|
53
|
-
id: "gpt-oss-120b",
|
|
54
|
-
kind: "chat",
|
|
55
|
-
contextWindow: 128_000, // [assumption]
|
|
56
|
-
maxOutputTokens: 8_192, // [assumption]
|
|
57
|
-
toolCalling: true,
|
|
58
|
-
structuredOutput: true,
|
|
59
|
-
streaming: true,
|
|
60
|
-
costClass: "high",
|
|
61
|
-
latencyClass: "standard",
|
|
62
|
-
throughputHint: "~80 tok/s [assumption]",
|
|
63
|
-
preferredUseCases: ["General-purpose coding", "Code review", "Explanation"],
|
|
64
|
-
knownLimitations: [
|
|
65
|
-
"Customer-hosted OSS model; endpoint reliability depends on customer infrastructure",
|
|
66
|
-
],
|
|
67
|
-
},
|
|
68
|
-
{
|
|
69
|
-
id: "mistral-large-3",
|
|
70
|
-
kind: "chat",
|
|
71
|
-
contextWindow: 128_000, // Azure deployment Mistral-Large-3, Swedish Central.
|
|
72
|
-
maxOutputTokens: 8_192, // [assumption]
|
|
73
|
-
toolCalling: true,
|
|
74
|
-
structuredOutput: true, // [assumption]
|
|
75
|
-
streaming: true,
|
|
76
|
-
costClass: "high",
|
|
77
|
-
latencyClass: "standard",
|
|
78
|
-
throughputHint: "20 RPM / 20k TPM on current Visual Studio subscription quota",
|
|
79
|
-
preferredUseCases: ["Alternative coding agent", "Large-context explanation", "Review"],
|
|
80
|
-
knownLimitations: [
|
|
81
|
-
"Current subscription quota caps this deployment at 20 capacity units without quota increase",
|
|
82
|
-
],
|
|
83
|
-
},
|
|
84
|
-
{
|
|
85
|
-
id: "llama-4-maverick-vision",
|
|
86
|
-
kind: "chat",
|
|
87
|
-
contextWindow: 128_000, // Azure deployment Llama-4-Maverick-17B-128E-Instruct-FP8.
|
|
88
|
-
maxOutputTokens: 8_192, // [assumption]
|
|
89
|
-
toolCalling: true,
|
|
90
|
-
structuredOutput: false, // [assumption]
|
|
91
|
-
streaming: true,
|
|
92
|
-
costClass: "high",
|
|
93
|
-
latencyClass: "standard",
|
|
94
|
-
throughputHint: "20 RPM / 20k TPM on current Visual Studio subscription quota",
|
|
95
|
-
preferredUseCases: ["Alternative agent model", "Vision-capable review path", "Explanation"],
|
|
96
|
-
knownLimitations: [
|
|
97
|
-
"Current subscription quota caps this deployment at 20 capacity units without quota increase",
|
|
98
|
-
"Structured output reliability must be verified before routing patch-producing workflows",
|
|
99
|
-
],
|
|
100
|
-
},
|
|
101
|
-
{
|
|
102
|
-
id: "Mistral-Small-3.1-24B-Instruct-2503",
|
|
103
|
-
kind: "chat",
|
|
104
|
-
contextWindow: 128_000,
|
|
105
|
-
maxOutputTokens: 8_192, // [assumption]
|
|
106
|
-
toolCalling: true,
|
|
107
|
-
structuredOutput: true,
|
|
108
|
-
streaming: true,
|
|
109
|
-
costClass: "medium",
|
|
110
|
-
latencyClass: "fast",
|
|
111
|
-
throughputHint: "~150 tok/s [assumption]",
|
|
112
|
-
preferredUseCases: ["Interactive code assist", "Quick edits", "Low-latency agent steps"],
|
|
113
|
-
knownLimitations: ["Smaller model; may require multi-turn for complex reasoning"],
|
|
114
|
-
},
|
|
115
|
-
{
|
|
116
|
-
id: "Qwen2.5-Coder-7B-Instruct",
|
|
117
|
-
kind: "chat",
|
|
118
|
-
contextWindow: 128_000,
|
|
119
|
-
maxOutputTokens: 4_096, // [assumption]
|
|
120
|
-
toolCalling: true,
|
|
121
|
-
structuredOutput: false, // [assumption]
|
|
122
|
-
streaming: true,
|
|
123
|
-
costClass: "low",
|
|
124
|
-
latencyClass: "fast",
|
|
125
|
-
throughputHint: "~200 tok/s [assumption]",
|
|
126
|
-
preferredUseCases: [
|
|
127
|
-
"Inline completion",
|
|
128
|
-
"Snippet generation",
|
|
129
|
-
"High-throughput batch coding tasks",
|
|
130
|
-
],
|
|
131
|
-
knownLimitations: [
|
|
132
|
-
"Limited structured-output reliability; context degradation beyond 64K tokens observed in benchmarks [assumption]",
|
|
133
|
-
],
|
|
134
|
-
},
|
|
135
|
-
{
|
|
136
|
-
id: "gemma-4-31b-it",
|
|
137
|
-
kind: "chat",
|
|
138
|
-
contextWindow: 128_000, // [assumption]
|
|
139
|
-
maxOutputTokens: 8_192, // [assumption]
|
|
140
|
-
toolCalling: true,
|
|
141
|
-
structuredOutput: true,
|
|
142
|
-
streaming: true,
|
|
143
|
-
costClass: "medium",
|
|
144
|
-
latencyClass: "standard",
|
|
145
|
-
throughputHint: "~120 tok/s [assumption]",
|
|
146
|
-
preferredUseCases: ["Document summarisation", "Code explanation", "Regulated-context Q&A"],
|
|
147
|
-
knownLimitations: [
|
|
148
|
-
"Instruction-tuned variant; verify function-calling reliability against customer endpoint",
|
|
149
|
-
],
|
|
150
|
-
},
|
|
151
|
-
{
|
|
152
|
-
id: "dotsocr",
|
|
153
|
-
kind: "ocr-vision",
|
|
154
|
-
contextWindow: 0,
|
|
155
|
-
maxOutputTokens: 0,
|
|
156
|
-
toolCalling: false,
|
|
157
|
-
structuredOutput: false,
|
|
158
|
-
streaming: false,
|
|
159
|
-
costClass: "medium",
|
|
160
|
-
latencyClass: "standard",
|
|
161
|
-
throughputHint: "n/a",
|
|
162
|
-
preferredUseCases: [
|
|
163
|
-
"Document OCR",
|
|
164
|
-
"Scanned contract/form extraction",
|
|
165
|
-
"Image-to-text in regulated workflows",
|
|
166
|
-
],
|
|
167
|
-
knownLimitations: [
|
|
168
|
-
"Not a chat model; chat-completions adapter does not apply; callOcr method is Wave 2",
|
|
169
|
-
],
|
|
170
|
-
},
|
|
171
|
-
{
|
|
172
|
-
id: "multilingual-e5-large Embedding",
|
|
173
|
-
kind: "embedding",
|
|
174
|
-
contextWindow: 512, // [assumption]
|
|
175
|
-
maxOutputTokens: 0,
|
|
176
|
-
toolCalling: false,
|
|
177
|
-
structuredOutput: false,
|
|
178
|
-
streaming: false,
|
|
179
|
-
costClass: "low",
|
|
180
|
-
latencyClass: "fast",
|
|
181
|
-
throughputHint: "n/a",
|
|
182
|
-
preferredUseCases: [
|
|
183
|
-
"Semantic search",
|
|
184
|
-
"RAG retrieval",
|
|
185
|
-
"Similarity ranking across multilingual content",
|
|
186
|
-
],
|
|
187
|
-
knownLimitations: ["Max 512 tokens per input; callEmbedding method is Wave 2"],
|
|
188
|
-
},
|
|
189
|
-
{
|
|
190
|
-
id: "text-embedding-3-large",
|
|
191
|
-
kind: "embedding",
|
|
192
|
-
contextWindow: 8_191, // Azure OpenAI embedding deployment.
|
|
193
|
-
maxOutputTokens: 0,
|
|
194
|
-
toolCalling: false,
|
|
195
|
-
structuredOutput: false,
|
|
196
|
-
streaming: false,
|
|
197
|
-
costClass: "low",
|
|
198
|
-
latencyClass: "fast",
|
|
199
|
-
throughputHint: "120 requests / 10s and 120k TPM on current deployment",
|
|
200
|
-
preferredUseCases: ["Semantic search", "RAG retrieval", "Similarity ranking"],
|
|
201
|
-
knownLimitations: ["Embedding-only model; chat-completions adapter does not apply"],
|
|
202
|
-
},
|
|
203
|
-
];
|
|
1
|
+
// Raw built-in capability registry data.
|
|
2
|
+
//
|
|
3
|
+
// Keiko intentionally ships no customer or deployment-specific model ids. Private model
|
|
4
|
+
// capabilities are supplied by local config or discovered at runtime during UI onboarding.
|
|
5
|
+
export const CAPABILITY_DATA = [];
|
|
@@ -10,6 +10,24 @@ export function findCapability(modelId) {
|
|
|
10
10
|
export function listCapabilities() {
|
|
11
11
|
return CAPABILITY_REGISTRY;
|
|
12
12
|
}
|
|
13
|
+
export function createDefaultChatCapability(modelId) {
|
|
14
|
+
return {
|
|
15
|
+
id: modelId,
|
|
16
|
+
kind: "chat",
|
|
17
|
+
contextWindow: 0,
|
|
18
|
+
maxOutputTokens: 0,
|
|
19
|
+
toolCalling: true,
|
|
20
|
+
structuredOutput: true,
|
|
21
|
+
streaming: true,
|
|
22
|
+
costClass: "medium",
|
|
23
|
+
latencyClass: "standard",
|
|
24
|
+
throughputHint: "runtime-configured endpoint",
|
|
25
|
+
preferredUseCases: ["Chat", "Agent workflow"],
|
|
26
|
+
knownLimitations: [
|
|
27
|
+
"Runtime-configured capability; validate against the target endpoint before production use",
|
|
28
|
+
],
|
|
29
|
+
};
|
|
30
|
+
}
|
|
13
31
|
function matches(cap, query) {
|
|
14
32
|
if (query.kind !== undefined && cap.kind !== query.kind) {
|
|
15
33
|
return false;
|
package/dist/gateway/config.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { CircuitBreakerConfig, GatewayConfig } from "./types.js";
|
|
1
|
+
import type { CircuitBreakerConfig, GatewayConfig, ModelCapability } from "./types.js";
|
|
2
2
|
export type EnvSource = Readonly<Record<string, string | undefined>>;
|
|
3
3
|
export interface SafeProviderConfig {
|
|
4
4
|
readonly modelId: string;
|
|
@@ -9,6 +9,7 @@ export interface SafeProviderConfig {
|
|
|
9
9
|
export interface SafeGatewayConfig {
|
|
10
10
|
readonly providers: readonly SafeProviderConfig[];
|
|
11
11
|
readonly circuitBreaker: CircuitBreakerConfig;
|
|
12
|
+
readonly capabilities?: readonly ModelCapability[] | undefined;
|
|
12
13
|
}
|
|
13
14
|
export declare function parseGatewayConfig(raw: unknown, env?: EnvSource): GatewayConfig;
|
|
14
15
|
export declare function loadConfigFromFile(path: string, env?: EnvSource): GatewayConfig;
|
package/dist/gateway/config.js
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
// API keys are sourced only from environment or the config file, never CLI flags,
|
|
4
4
|
// and are excluded from every serialisation path.
|
|
5
5
|
import { readFileSync } from "node:fs";
|
|
6
|
-
import { findCapability } from "./capabilities.js";
|
|
7
6
|
import { ConfigInvalidError } from "./errors.js";
|
|
8
7
|
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
9
8
|
const DEFAULT_MAX_RETRIES = 3;
|
|
@@ -26,6 +25,45 @@ function requireNonEmptyString(value, path) {
|
|
|
26
25
|
}
|
|
27
26
|
return value;
|
|
28
27
|
}
|
|
28
|
+
function optionalStringArray(value, path, fallback) {
|
|
29
|
+
if (value === undefined) {
|
|
30
|
+
return fallback;
|
|
31
|
+
}
|
|
32
|
+
if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
|
|
33
|
+
throw new ConfigInvalidError(`${path} must be an array of strings`);
|
|
34
|
+
}
|
|
35
|
+
return value;
|
|
36
|
+
}
|
|
37
|
+
function optionalNonNegativeInt(value, path, fallback) {
|
|
38
|
+
if (value === undefined) {
|
|
39
|
+
return fallback;
|
|
40
|
+
}
|
|
41
|
+
if (typeof value !== "number" || !Number.isInteger(value) || value < 0) {
|
|
42
|
+
throw new ConfigInvalidError(`${path} must be a non-negative integer`);
|
|
43
|
+
}
|
|
44
|
+
return value;
|
|
45
|
+
}
|
|
46
|
+
function optionalBoolean(value, path, fallback) {
|
|
47
|
+
if (value === undefined) {
|
|
48
|
+
return fallback;
|
|
49
|
+
}
|
|
50
|
+
if (typeof value !== "boolean") {
|
|
51
|
+
throw new ConfigInvalidError(`${path} must be a boolean`);
|
|
52
|
+
}
|
|
53
|
+
return value;
|
|
54
|
+
}
|
|
55
|
+
function optionalNonEmptyString(value, path, fallback) {
|
|
56
|
+
if (value === undefined) {
|
|
57
|
+
return fallback;
|
|
58
|
+
}
|
|
59
|
+
return requireNonEmptyString(value, path);
|
|
60
|
+
}
|
|
61
|
+
function requireEnum(value, path, allowed) {
|
|
62
|
+
if (typeof value !== "string" || !allowed.includes(value)) {
|
|
63
|
+
throw new ConfigInvalidError(`${path} must be one of ${allowed.join(", ")}`);
|
|
64
|
+
}
|
|
65
|
+
return value;
|
|
66
|
+
}
|
|
29
67
|
// Model id → KEIKO_MODEL_<UPPER>_ form: non-alphanumerics become "_", uppercased.
|
|
30
68
|
function envModelToken(modelId) {
|
|
31
69
|
return modelId.replace(/[^A-Za-z0-9]/g, "_").toUpperCase();
|
|
@@ -42,7 +80,7 @@ function resolveSecret(modelId, fileValue, env, suffix) {
|
|
|
42
80
|
return fallback ?? "";
|
|
43
81
|
}
|
|
44
82
|
// Validates a resolved baseUrl for scheme and credential hygiene. Host/IP is
|
|
45
|
-
// intentionally NOT restricted: Keiko addresses
|
|
83
|
+
// intentionally NOT restricted: Keiko addresses private network endpoints
|
|
46
84
|
// (private IPs are a valid, first-class target); this guard is scheme/credential
|
|
47
85
|
// hygiene + defence-in-depth, not host filtering.
|
|
48
86
|
function isLoopbackHost(hostname) {
|
|
@@ -69,15 +107,41 @@ function validateBaseUrl(baseUrl, path) {
|
|
|
69
107
|
throw new ConfigInvalidError(`${path}.baseUrl must not embed credentials in the URL; provide the key via apiKey`);
|
|
70
108
|
}
|
|
71
109
|
}
|
|
72
|
-
function
|
|
73
|
-
|
|
110
|
+
function parseProviderCapability(raw, path, modelId) {
|
|
111
|
+
if (raw === undefined) {
|
|
112
|
+
return undefined;
|
|
113
|
+
}
|
|
74
114
|
if (!isRecord(raw)) {
|
|
75
115
|
throw new ConfigInvalidError(`${path} must be an object`);
|
|
76
116
|
}
|
|
77
|
-
const
|
|
78
|
-
if (
|
|
79
|
-
throw new ConfigInvalidError(`${path}.
|
|
117
|
+
const id = optionalNonEmptyString(raw.id, `${path}.id`, modelId);
|
|
118
|
+
if (id !== modelId) {
|
|
119
|
+
throw new ConfigInvalidError(`${path}.id must match the provider modelId`);
|
|
80
120
|
}
|
|
121
|
+
return {
|
|
122
|
+
id,
|
|
123
|
+
kind: requireEnum(raw.kind, `${path}.kind`, ["chat", "embedding", "ocr-vision"]),
|
|
124
|
+
contextWindow: optionalNonNegativeInt(raw.contextWindow, `${path}.contextWindow`, 0),
|
|
125
|
+
maxOutputTokens: optionalNonNegativeInt(raw.maxOutputTokens, `${path}.maxOutputTokens`, 0),
|
|
126
|
+
toolCalling: optionalBoolean(raw.toolCalling, `${path}.toolCalling`, false),
|
|
127
|
+
structuredOutput: optionalBoolean(raw.structuredOutput, `${path}.structuredOutput`, false),
|
|
128
|
+
streaming: optionalBoolean(raw.streaming, `${path}.streaming`, false),
|
|
129
|
+
costClass: requireEnum(raw.costClass ?? "medium", `${path}.costClass`, [
|
|
130
|
+
"low",
|
|
131
|
+
"medium",
|
|
132
|
+
"high",
|
|
133
|
+
]),
|
|
134
|
+
latencyClass: requireEnum(raw.latencyClass ?? "standard", `${path}.latencyClass`, ["fast", "standard", "slow"]),
|
|
135
|
+
throughputHint: optionalNonEmptyString(raw.throughputHint, `${path}.throughputHint`, "runtime-configured"),
|
|
136
|
+
preferredUseCases: optionalStringArray(raw.preferredUseCases, `${path}.preferredUseCases`, [
|
|
137
|
+
"Runtime-configured model",
|
|
138
|
+
]),
|
|
139
|
+
knownLimitations: optionalStringArray(raw.knownLimitations, `${path}.knownLimitations`, [
|
|
140
|
+
"Capabilities are runtime-declared and should be verified in the target environment",
|
|
141
|
+
]),
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
function resolveProviderConnection(raw, path, modelId, env) {
|
|
81
145
|
const fileBaseUrl = typeof raw.baseUrl === "string" ? raw.baseUrl : "";
|
|
82
146
|
const fileApiKey = typeof raw.apiKey === "string" ? raw.apiKey : "";
|
|
83
147
|
const baseUrl = resolveSecret(modelId, fileBaseUrl, env, "BASE_URL");
|
|
@@ -89,6 +153,10 @@ function parseProvider(raw, index, env) {
|
|
|
89
153
|
throw new ConfigInvalidError(`${path}.apiKey must be set via config or environment`);
|
|
90
154
|
}
|
|
91
155
|
validateBaseUrl(baseUrl, path);
|
|
156
|
+
return { baseUrl, apiKey };
|
|
157
|
+
}
|
|
158
|
+
function parseProviderConfig(raw, path, modelId, env) {
|
|
159
|
+
const { baseUrl, apiKey } = resolveProviderConnection(raw, path, modelId, env);
|
|
92
160
|
return {
|
|
93
161
|
modelId,
|
|
94
162
|
baseUrl,
|
|
@@ -98,6 +166,18 @@ function parseProvider(raw, index, env) {
|
|
|
98
166
|
retryBaseDelayMs: requirePositiveInt(raw.retryBaseDelayMs ?? DEFAULT_RETRY_BASE_DELAY_MS, `${path}.retryBaseDelayMs`),
|
|
99
167
|
};
|
|
100
168
|
}
|
|
169
|
+
function parseProvider(raw, index, env) {
|
|
170
|
+
const path = `providers[${String(index)}]`;
|
|
171
|
+
if (!isRecord(raw)) {
|
|
172
|
+
throw new ConfigInvalidError(`${path} must be an object`);
|
|
173
|
+
}
|
|
174
|
+
const modelId = requireNonEmptyString(raw.modelId, `${path}.modelId`);
|
|
175
|
+
const capability = parseProviderCapability(raw.capability, `${path}.capability`, modelId);
|
|
176
|
+
return {
|
|
177
|
+
provider: parseProviderConfig(raw, path, modelId, env),
|
|
178
|
+
...(capability === undefined ? {} : { capability }),
|
|
179
|
+
};
|
|
180
|
+
}
|
|
101
181
|
function requireNonNegativeInt(value, path) {
|
|
102
182
|
if (typeof value !== "number" || !Number.isInteger(value) || value < 0) {
|
|
103
183
|
throw new ConfigInvalidError(`${path} must be a non-negative integer`);
|
|
@@ -120,8 +200,16 @@ export function parseGatewayConfig(raw, env = {}) {
|
|
|
120
200
|
if (!Array.isArray(providersRaw) || providersRaw.length === 0) {
|
|
121
201
|
throw new ConfigInvalidError("providers must be a non-empty array");
|
|
122
202
|
}
|
|
123
|
-
const
|
|
124
|
-
|
|
203
|
+
const parsed = providersRaw.map((item, index) => parseProvider(item, index, env));
|
|
204
|
+
const providers = parsed.map((item) => item.provider);
|
|
205
|
+
const capabilities = parsed
|
|
206
|
+
.map((item) => item.capability)
|
|
207
|
+
.filter((item) => item !== undefined);
|
|
208
|
+
return {
|
|
209
|
+
providers,
|
|
210
|
+
circuitBreaker: parseCircuitBreaker(raw.circuitBreaker),
|
|
211
|
+
...(capabilities.length === 0 ? {} : { capabilities }),
|
|
212
|
+
};
|
|
125
213
|
}
|
|
126
214
|
export function loadConfigFromFile(path, env = {}) {
|
|
127
215
|
let text;
|
|
@@ -150,5 +238,6 @@ export function toSafeObject(config) {
|
|
|
150
238
|
retryBaseDelayMs: provider.retryBaseDelayMs,
|
|
151
239
|
})),
|
|
152
240
|
circuitBreaker: config.circuitBreaker,
|
|
241
|
+
...(config.capabilities === undefined ? {} : { capabilities: config.capabilities }),
|
|
153
242
|
};
|
|
154
243
|
}
|
package/dist/gateway/gateway.js
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
// (request id, latency, cost class) is owned by the gateway, not the provider, so
|
|
4
4
|
// the audit ledger (issue #10) has a reliable typed target on every response.
|
|
5
5
|
import { randomUUID } from "node:crypto";
|
|
6
|
-
import { findCapability } from "./capabilities.js";
|
|
7
6
|
import { UnknownModelError } from "./errors.js";
|
|
7
|
+
import { findConfiguredCapability } from "./model-selection.js";
|
|
8
8
|
import { OpenAiAdapter } from "./openai-adapter.js";
|
|
9
9
|
import { CircuitBreaker, executeWithRetry, systemClock } from "./resilience.js";
|
|
10
10
|
export class Gateway {
|
|
@@ -65,9 +65,9 @@ export class Gateway {
|
|
|
65
65
|
if (provider === undefined) {
|
|
66
66
|
throw new UnknownModelError(`no provider configured for model '${modelId}'`);
|
|
67
67
|
}
|
|
68
|
-
const capability =
|
|
68
|
+
const capability = findConfiguredCapability(this.config, modelId);
|
|
69
69
|
if (capability === undefined) {
|
|
70
|
-
throw new UnknownModelError(`model '${modelId}'
|
|
70
|
+
throw new UnknownModelError(`model '${modelId}' has no capability metadata`);
|
|
71
71
|
}
|
|
72
72
|
if (capability.kind !== "chat") {
|
|
73
73
|
throw new UnknownModelError(`model '${modelId}' has kind '${capability.kind}'; the chat path requires a chat model`);
|
package/dist/gateway/index.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
export type { CircuitBreakerConfig, CircuitBreakerStatus, CircuitState, ChatMessage, Clock, CostClass, FinishReason, GatewayConfig, GatewayRequest, LatencyClass, ModelCapability, ModelKind, ModelProviderConfig, NormalizedResponse, NormalizedToolCall, ProviderAdapter, ResponseFormat, StreamDelta, StreamEvent, ToolDefinition, UsageMetadata, } from "./types.js";
|
|
2
|
-
export { CAPABILITY_REGISTRY, findCapability, listCapabilities, selectCheapest, type CapabilityQuery, } from "./capabilities.js";
|
|
2
|
+
export { CAPABILITY_REGISTRY, createDefaultChatCapability, findCapability, listCapabilities, selectCheapest, type CapabilityQuery, } from "./capabilities.js";
|
|
3
3
|
export { loadConfigFromFile, parseGatewayConfig, toSafeObject, type EnvSource, type SafeGatewayConfig, type SafeProviderConfig, } from "./config.js";
|
|
4
4
|
export { Gateway, type GatewayDeps } from "./gateway.js";
|
|
5
5
|
export { OpenAiAdapter, type AdapterDeps } from "./openai-adapter.js";
|
|
6
|
-
export { assertConfiguredModel, selectConfiguredModel, type ModelSelectionQuery, } from "./model-selection.js";
|
|
6
|
+
export { assertConfiguredModel, findConfiguredCapability, listConfiguredCapabilities, selectConfiguredModel, type ModelSelectionQuery, } from "./model-selection.js";
|
|
7
7
|
export { CircuitBreaker, executeWithRetry, systemClock, type RetryConfig } from "./resilience.js";
|
|
8
8
|
export { normalizeChatResponse, type UsageSeed } from "./normalize.js";
|
|
9
9
|
export { redact } from "./redaction.js";
|
package/dist/gateway/index.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
// Public barrel for the model gateway: all types, the Gateway orchestrator, the
|
|
2
2
|
// capability registry helpers, config loaders, and the typed error taxonomy.
|
|
3
|
-
export { CAPABILITY_REGISTRY, findCapability, listCapabilities, selectCheapest, } from "./capabilities.js";
|
|
3
|
+
export { CAPABILITY_REGISTRY, createDefaultChatCapability, findCapability, listCapabilities, selectCheapest, } from "./capabilities.js";
|
|
4
4
|
export { loadConfigFromFile, parseGatewayConfig, toSafeObject, } from "./config.js";
|
|
5
5
|
export { Gateway } from "./gateway.js";
|
|
6
6
|
export { OpenAiAdapter } from "./openai-adapter.js";
|
|
7
|
-
export { assertConfiguredModel, selectConfiguredModel, } from "./model-selection.js";
|
|
7
|
+
export { assertConfiguredModel, findConfiguredCapability, listConfiguredCapabilities, selectConfiguredModel, } from "./model-selection.js";
|
|
8
8
|
export { CircuitBreaker, executeWithRetry, systemClock } from "./resilience.js";
|
|
9
9
|
export { normalizeChatResponse } from "./normalize.js";
|
|
10
10
|
export { redact } from "./redaction.js";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { GatewayConfig, ModelKind } from "./types.js";
|
|
1
|
+
import type { GatewayConfig, ModelCapability, ModelKind } from "./types.js";
|
|
2
2
|
export interface ModelSelectionQuery {
|
|
3
3
|
readonly kind: ModelKind;
|
|
4
4
|
readonly toolCalling?: boolean | undefined;
|
|
@@ -6,4 +6,6 @@ export interface ModelSelectionQuery {
|
|
|
6
6
|
readonly minContextWindow?: number | undefined;
|
|
7
7
|
}
|
|
8
8
|
export declare function assertConfiguredModel(config: GatewayConfig, modelId: string): void;
|
|
9
|
+
export declare function findConfiguredCapability(config: GatewayConfig, modelId: string): ModelCapability | undefined;
|
|
10
|
+
export declare function listConfiguredCapabilities(config: GatewayConfig): readonly ModelCapability[];
|
|
9
11
|
export declare function selectConfiguredModel(config: GatewayConfig, query: ModelSelectionQuery): string | undefined;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { listCapabilities } from "./capabilities.js";
|
|
1
|
+
import { createDefaultChatCapability, listCapabilities } from "./capabilities.js";
|
|
2
2
|
import { ConfigInvalidError } from "./errors.js";
|
|
3
3
|
const COST_RANK = { low: 0, medium: 1, high: 2 };
|
|
4
4
|
function matches(capability, query) {
|
|
@@ -21,11 +21,22 @@ export function assertConfiguredModel(config, modelId) {
|
|
|
21
21
|
throw new ConfigInvalidError(`model '${modelId}' is not configured as a provider`);
|
|
22
22
|
}
|
|
23
23
|
}
|
|
24
|
+
export function findConfiguredCapability(config, modelId) {
|
|
25
|
+
return (config.capabilities?.find((capability) => capability.id === modelId) ??
|
|
26
|
+
listCapabilities().find((capability) => capability.id === modelId) ??
|
|
27
|
+
(config.providers.some((provider) => provider.modelId === modelId)
|
|
28
|
+
? createDefaultChatCapability(modelId)
|
|
29
|
+
: undefined));
|
|
30
|
+
}
|
|
31
|
+
export function listConfiguredCapabilities(config) {
|
|
32
|
+
return config.providers
|
|
33
|
+
.map((provider) => findConfiguredCapability(config, provider.modelId))
|
|
34
|
+
.filter((capability) => capability !== undefined);
|
|
35
|
+
}
|
|
24
36
|
export function selectConfiguredModel(config, query) {
|
|
25
|
-
const configured = new Set(config.providers.map((provider) => provider.modelId));
|
|
26
37
|
let best;
|
|
27
|
-
for (const capability of
|
|
28
|
-
if (!
|
|
38
|
+
for (const capability of listConfiguredCapabilities(config)) {
|
|
39
|
+
if (!matches(capability, query)) {
|
|
29
40
|
continue;
|
|
30
41
|
}
|
|
31
42
|
if (best === undefined || COST_RANK[capability.costClass] < COST_RANK[best.costClass]) {
|
package/dist/gateway/types.d.ts
CHANGED
|
@@ -31,6 +31,7 @@ export interface CircuitBreakerConfig {
|
|
|
31
31
|
export interface GatewayConfig {
|
|
32
32
|
readonly providers: readonly ModelProviderConfig[];
|
|
33
33
|
readonly circuitBreaker: CircuitBreakerConfig;
|
|
34
|
+
readonly capabilities?: readonly ModelCapability[] | undefined;
|
|
34
35
|
}
|
|
35
36
|
export interface ChatMessage {
|
|
36
37
|
readonly role: "system" | "user" | "assistant" | "tool";
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { Clock } from "../gateway/types.js";
|
|
2
2
|
import type { EventSink, Fingerprinter, IdSource, ModelPort, ToolPort } from "./ports.js";
|
|
3
3
|
import { type HarnessLimits, type RunResult, type TaskInput } from "./types.js";
|
|
4
|
-
export declare const HARNESS_VERSION = "0.1.0-beta.
|
|
4
|
+
export declare const HARNESS_VERSION = "0.1.0-beta.3";
|
|
5
5
|
export interface AgentConfig {
|
|
6
6
|
readonly model: string;
|
|
7
7
|
readonly workingDirectory: string;
|
package/dist/harness/session.js
CHANGED
|
@@ -10,7 +10,7 @@ import { runLoop } from "./loop.js";
|
|
|
10
10
|
import { MemoryEventSink } from "./sinks.js";
|
|
11
11
|
import { resolveTaskPlan } from "./tasks/policy.js";
|
|
12
12
|
import { DEFAULT_LIMITS, } from "./types.js";
|
|
13
|
-
export const HARNESS_VERSION = "0.1.0-beta.
|
|
13
|
+
export const HARNESS_VERSION = "0.1.0-beta.3";
|
|
14
14
|
function resolveLimits(config) {
|
|
15
15
|
return { ...DEFAULT_LIMITS, ...config.limits };
|
|
16
16
|
}
|
package/dist/sdk/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export declare const SDK_VERSION = "0.1.0-beta.
|
|
1
|
+
export declare const SDK_VERSION = "0.1.0-beta.3";
|
|
2
2
|
export { createSession, type AgentConfig, type AgentSession, type HarnessDeps, type RunResult, type TaskInput, type TaskType, } from "../harness/index.js";
|
|
3
3
|
export { runAgent, type SdkAgentConfig, type SdkEvidenceOptions } from "./run-agent.js";
|
|
4
4
|
export { buildWorkspaceSummary, detectWorkspace, summarizeForAudit, type AuditEntry, type AuditSummary, type ContextEntrySummary, type ContextPackSummary, type WorkspaceInfo, type WorkspaceSummary, } from "../workspace/index.js";
|
package/dist/sdk/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// Single-sourced package version; CLI and SDK both read this to avoid drift.
|
|
2
|
-
export const SDK_VERSION = "0.1.0-beta.
|
|
2
|
+
export const SDK_VERSION = "0.1.0-beta.3";
|
|
3
3
|
// The typed agent surface. AgentConfig, the session factory, the run result, and the
|
|
4
4
|
// session handle all live in the harness module (ADR-0004); the SDK re-exports them so
|
|
5
5
|
// callers import the agent API from one place.
|
|
@@ -8,8 +8,7 @@ function isBodyLine(line) {
|
|
|
8
8
|
return marker === " " || marker === "+" || marker === "-";
|
|
9
9
|
}
|
|
10
10
|
function isFileHeaderPair(lines, index) {
|
|
11
|
-
return
|
|
12
|
-
lines[index + 1]?.startsWith("+++ ") === true);
|
|
11
|
+
return lines[index]?.startsWith("--- ") === true && lines[index + 1]?.startsWith("+++ ") === true;
|
|
13
12
|
}
|
|
14
13
|
function hunkEnd(lines, start) {
|
|
15
14
|
let index = start;
|
|
@@ -42,14 +42,7 @@ export const TERMINAL_COMMAND_RULES = Object.freeze([
|
|
|
42
42
|
{
|
|
43
43
|
executable: "npm",
|
|
44
44
|
allowedSubcommands: Object.freeze(["ls", "list", "help"]),
|
|
45
|
-
denyFlags: Object.freeze([
|
|
46
|
-
"-c",
|
|
47
|
-
"--call",
|
|
48
|
-
"--prefix",
|
|
49
|
-
"--global",
|
|
50
|
-
"-g",
|
|
51
|
-
"--location",
|
|
52
|
-
]),
|
|
45
|
+
denyFlags: Object.freeze(["-c", "--call", "--prefix", "--global", "-g", "--location"]),
|
|
53
46
|
},
|
|
54
47
|
{
|
|
55
48
|
executable: "git",
|