@warmdrift/kgauto-compiler 2.0.0-alpha.6 → 2.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +59 -3
- package/dist/index.d.ts +59 -3
- package/dist/index.js +77 -33
- package/dist/index.mjs +77 -33
- package/dist/{profiles-CQnLkQ7b.d.ts → profiles-B3eNQ2py.d.ts} +8 -0
- package/dist/{profiles-zm6diETo.d.mts → profiles-Py8c7zjJ.d.mts} +8 -0
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-Py8c7zjJ.mjs';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -19,6 +19,15 @@ interface CompileOptions {
|
|
|
19
19
|
toolRelevanceThreshold?: number;
|
|
20
20
|
/** History compression — turns count threshold (default 8). */
|
|
21
21
|
compressHistoryAfter?: number;
|
|
22
|
+
/**
|
|
23
|
+
* History compression — token threshold (alpha.7). When total history
|
|
24
|
+
* tokens exceed this AND there are more recent turns to keep, compress
|
|
25
|
+
* even when count threshold is below `compressHistoryAfter`. Catches
|
|
26
|
+
* fat-message bloat (tool-using agents pack many tool-call/result pairs
|
|
27
|
+
* into single assistant messages — count stays low, tokens explode).
|
|
28
|
+
* Default undefined (disabled — backward-compatible).
|
|
29
|
+
*/
|
|
30
|
+
compressHistoryAboveTokens?: number;
|
|
22
31
|
/**
|
|
23
32
|
* Consumer-declared policy. Filters blocked models, enforces cost
|
|
24
33
|
* ceiling, boosts preferred. See CompilePolicy in ir.ts.
|
|
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
|
|
|
114
123
|
* network error is swallowed/forwarded to onError.
|
|
115
124
|
*/
|
|
116
125
|
declare function record(input: RecordInput): Promise<void>;
|
|
126
|
+
/**
|
|
127
|
+
* Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
|
|
128
|
+
*
|
|
129
|
+
* Exported so consumer proxies can `import { OutcomePayload } from
|
|
130
|
+
* '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
|
|
131
|
+
* TypeScript catches future schema additions (cache fields, advisory
|
|
132
|
+
* telemetry, etc.) at consumer build time, not silently at runtime.
|
|
133
|
+
*
|
|
134
|
+
* **Forward-compat rule:** consumer proxies should pass the body through to
|
|
135
|
+
* Supabase rather than reconstructing field-by-field. The recommended shape
|
|
136
|
+
* is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
|
|
137
|
+
* .insert(body)` directly). Filtering proxies break schema evolution
|
|
138
|
+
* silently — see s17 root-cause investigation 2026-05-10.
|
|
139
|
+
*/
|
|
140
|
+
interface OutcomePayload {
|
|
141
|
+
handle: string;
|
|
142
|
+
app_id?: string;
|
|
143
|
+
intent_archetype?: string;
|
|
144
|
+
/** The model that ACTUALLY RAN (post-fallback). */
|
|
145
|
+
model?: string;
|
|
146
|
+
/** The model v2 compile() originally targeted. NULL when no fallback. */
|
|
147
|
+
requested_model?: string;
|
|
148
|
+
provider?: string;
|
|
149
|
+
shape_key?: string;
|
|
150
|
+
learning_key?: string;
|
|
151
|
+
mutations_applied: string[];
|
|
152
|
+
tokens_in: number;
|
|
153
|
+
tokens_out: number;
|
|
154
|
+
estimated_tokens_in?: number;
|
|
155
|
+
latency_ms: number;
|
|
156
|
+
success: boolean;
|
|
157
|
+
empty_response: boolean;
|
|
158
|
+
error_type?: string;
|
|
159
|
+
tools_called?: string[];
|
|
160
|
+
oracle_score?: number;
|
|
161
|
+
oracle_dimensions?: Record<string, number>;
|
|
162
|
+
oracle_rationale?: string;
|
|
163
|
+
prompt_preview?: string;
|
|
164
|
+
response_preview?: string;
|
|
165
|
+
dialect_version: string;
|
|
166
|
+
cache_read_input_tokens?: number;
|
|
167
|
+
cache_creation_input_tokens?: number;
|
|
168
|
+
cost_usd_actual?: number;
|
|
169
|
+
ttft_ms?: number;
|
|
170
|
+
history_cacheable_tokens?: number;
|
|
171
|
+
history_tokens_at_compile?: number;
|
|
172
|
+
}
|
|
117
173
|
|
|
118
174
|
/**
|
|
119
175
|
* Oracle contract — how an app tells the brain whether a response was good.
|
|
@@ -270,4 +326,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
|
|
|
270
326
|
*/
|
|
271
327
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
272
328
|
|
|
273
|
-
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
|
329
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-B3eNQ2py.js';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -19,6 +19,15 @@ interface CompileOptions {
|
|
|
19
19
|
toolRelevanceThreshold?: number;
|
|
20
20
|
/** History compression — turns count threshold (default 8). */
|
|
21
21
|
compressHistoryAfter?: number;
|
|
22
|
+
/**
|
|
23
|
+
* History compression — token threshold (alpha.7). When total history
|
|
24
|
+
* tokens exceed this AND there are more recent turns to keep, compress
|
|
25
|
+
* even when count threshold is below `compressHistoryAfter`. Catches
|
|
26
|
+
* fat-message bloat (tool-using agents pack many tool-call/result pairs
|
|
27
|
+
* into single assistant messages — count stays low, tokens explode).
|
|
28
|
+
* Default undefined (disabled — backward-compatible).
|
|
29
|
+
*/
|
|
30
|
+
compressHistoryAboveTokens?: number;
|
|
22
31
|
/**
|
|
23
32
|
* Consumer-declared policy. Filters blocked models, enforces cost
|
|
24
33
|
* ceiling, boosts preferred. See CompilePolicy in ir.ts.
|
|
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
|
|
|
114
123
|
* network error is swallowed/forwarded to onError.
|
|
115
124
|
*/
|
|
116
125
|
declare function record(input: RecordInput): Promise<void>;
|
|
126
|
+
/**
|
|
127
|
+
* Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
|
|
128
|
+
*
|
|
129
|
+
* Exported so consumer proxies can `import { OutcomePayload } from
|
|
130
|
+
* '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
|
|
131
|
+
* TypeScript catches future schema additions (cache fields, advisory
|
|
132
|
+
* telemetry, etc.) at consumer build time, not silently at runtime.
|
|
133
|
+
*
|
|
134
|
+
* **Forward-compat rule:** consumer proxies should pass the body through to
|
|
135
|
+
* Supabase rather than reconstructing field-by-field. The recommended shape
|
|
136
|
+
* is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
|
|
137
|
+
* .insert(body)` directly). Filtering proxies break schema evolution
|
|
138
|
+
* silently — see s17 root-cause investigation 2026-05-10.
|
|
139
|
+
*/
|
|
140
|
+
interface OutcomePayload {
|
|
141
|
+
handle: string;
|
|
142
|
+
app_id?: string;
|
|
143
|
+
intent_archetype?: string;
|
|
144
|
+
/** The model that ACTUALLY RAN (post-fallback). */
|
|
145
|
+
model?: string;
|
|
146
|
+
/** The model v2 compile() originally targeted. NULL when no fallback. */
|
|
147
|
+
requested_model?: string;
|
|
148
|
+
provider?: string;
|
|
149
|
+
shape_key?: string;
|
|
150
|
+
learning_key?: string;
|
|
151
|
+
mutations_applied: string[];
|
|
152
|
+
tokens_in: number;
|
|
153
|
+
tokens_out: number;
|
|
154
|
+
estimated_tokens_in?: number;
|
|
155
|
+
latency_ms: number;
|
|
156
|
+
success: boolean;
|
|
157
|
+
empty_response: boolean;
|
|
158
|
+
error_type?: string;
|
|
159
|
+
tools_called?: string[];
|
|
160
|
+
oracle_score?: number;
|
|
161
|
+
oracle_dimensions?: Record<string, number>;
|
|
162
|
+
oracle_rationale?: string;
|
|
163
|
+
prompt_preview?: string;
|
|
164
|
+
response_preview?: string;
|
|
165
|
+
dialect_version: string;
|
|
166
|
+
cache_read_input_tokens?: number;
|
|
167
|
+
cache_creation_input_tokens?: number;
|
|
168
|
+
cost_usd_actual?: number;
|
|
169
|
+
ttft_ms?: number;
|
|
170
|
+
history_cacheable_tokens?: number;
|
|
171
|
+
history_tokens_at_compile?: number;
|
|
172
|
+
}
|
|
117
173
|
|
|
118
174
|
/**
|
|
119
175
|
* Oracle contract — how an app tells the brain whether a response was good.
|
|
@@ -270,4 +326,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
|
|
|
270
326
|
*/
|
|
271
327
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
272
328
|
|
|
273
|
-
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
|
329
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.js
CHANGED
|
@@ -236,20 +236,37 @@ function passToolRelevance(ir, opts = {}) {
|
|
|
236
236
|
]
|
|
237
237
|
};
|
|
238
238
|
}
|
|
239
|
+
function totalHistoryTokens(history) {
|
|
240
|
+
let total = 0;
|
|
241
|
+
for (const m of history) {
|
|
242
|
+
if (typeof m.content === "string") total += countTokens(m.content);
|
|
243
|
+
}
|
|
244
|
+
return total;
|
|
245
|
+
}
|
|
239
246
|
function passCompressHistory(ir, opts = {}) {
|
|
240
247
|
const history = ir.history;
|
|
241
|
-
if (!history || history.length === 0)
|
|
248
|
+
if (!history || history.length === 0) {
|
|
249
|
+
return { value: ir, mutations: [], historyTokensTotal: 0 };
|
|
250
|
+
}
|
|
242
251
|
const keepRecent = opts.keepRecent ?? 4;
|
|
243
252
|
const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
|
|
244
|
-
|
|
253
|
+
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
254
|
+
const historyTokensTotal = totalHistoryTokens(history);
|
|
255
|
+
const countThresholdHit = history.length > summarizeOlderThan;
|
|
256
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
|
|
257
|
+
if (!countThresholdHit && !tokenThresholdHit) {
|
|
258
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
259
|
+
}
|
|
245
260
|
const cutIndex = history.length - keepRecent;
|
|
246
261
|
const old = history.slice(0, cutIndex);
|
|
247
262
|
const recent = history.slice(cutIndex);
|
|
248
263
|
const userTurns = old.filter((m) => m.role === "user");
|
|
249
264
|
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
265
|
+
const oldTokens = totalHistoryTokens(old);
|
|
266
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
250
267
|
const summary = {
|
|
251
268
|
role: "system",
|
|
252
|
-
content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
|
|
269
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
253
270
|
};
|
|
254
271
|
return {
|
|
255
272
|
value: { ...ir, history: [summary, ...recent] },
|
|
@@ -258,9 +275,10 @@ function passCompressHistory(ir, opts = {}) {
|
|
|
258
275
|
id: `compress-history-${old.length}`,
|
|
259
276
|
source: "static_pass",
|
|
260
277
|
passName: "compress_history",
|
|
261
|
-
description: `Compressed ${old.length} old turns into 1 summary
|
|
278
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
262
279
|
}
|
|
263
|
-
]
|
|
280
|
+
],
|
|
281
|
+
historyTokensTotal
|
|
264
282
|
};
|
|
265
283
|
}
|
|
266
284
|
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
@@ -506,7 +524,11 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
506
524
|
system: systemBlocks,
|
|
507
525
|
messages,
|
|
508
526
|
tools,
|
|
509
|
-
|
|
527
|
+
// alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
|
|
528
|
+
// floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
|
|
529
|
+
// Profile is the single source of truth; consumers wanting a tighter
|
|
530
|
+
// budget can pass providerOverrides.anthropic.max_tokens explicitly.
|
|
531
|
+
max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
|
|
510
532
|
},
|
|
511
533
|
diagnostics: {
|
|
512
534
|
cacheableTokens,
|
|
@@ -1280,7 +1302,8 @@ function compile(ir, opts = {}) {
|
|
|
1280
1302
|
threshold: opts.toolRelevanceThreshold
|
|
1281
1303
|
});
|
|
1282
1304
|
const compressed = passCompressHistory(toolFiltered.value, {
|
|
1283
|
-
summarizeOlderThan: opts.compressHistoryAfter
|
|
1305
|
+
summarizeOlderThan: opts.compressHistoryAfter,
|
|
1306
|
+
summarizeAboveTokens: opts.compressHistoryAboveTokens
|
|
1284
1307
|
});
|
|
1285
1308
|
let workingIR = compressed.value;
|
|
1286
1309
|
const accumulatedMutations = [
|
|
@@ -1324,7 +1347,8 @@ function compile(ir, opts = {}) {
|
|
|
1324
1347
|
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1325
1348
|
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1326
1349
|
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
1327
|
-
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
1350
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
|
|
1351
|
+
historyTokensTotal: compressed.historyTokensTotal
|
|
1328
1352
|
};
|
|
1329
1353
|
const advisories = runAdvisor(
|
|
1330
1354
|
ir,
|
|
@@ -1431,7 +1455,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
1431
1455
|
estimatedTokensIn: tokens,
|
|
1432
1456
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
1433
1457
|
startedAt: Date.now(),
|
|
1434
|
-
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
1458
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
1459
|
+
historyTokensTotal: result.diagnostics.historyTokensTotal
|
|
1435
1460
|
});
|
|
1436
1461
|
}
|
|
1437
1462
|
async function record(input) {
|
|
@@ -1505,7 +1530,8 @@ function buildPayload(input, reg) {
|
|
|
1505
1530
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
1506
1531
|
cost_usd_actual: costUsdActual,
|
|
1507
1532
|
ttft_ms: input.ttftMs,
|
|
1508
|
-
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1533
|
+
history_cacheable_tokens: reg?.historyCacheableTokens,
|
|
1534
|
+
history_tokens_at_compile: reg?.historyTokensTotal
|
|
1509
1535
|
};
|
|
1510
1536
|
}
|
|
1511
1537
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -1784,31 +1810,31 @@ async function call(ir, opts = {}) {
|
|
|
1784
1810
|
fetchImpl: opts.fetchImpl,
|
|
1785
1811
|
providerOverrides: opts.providerOverrides
|
|
1786
1812
|
});
|
|
1787
|
-
|
|
1813
|
+
const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
|
|
1814
|
+
if (validated.ok) {
|
|
1788
1815
|
attempts.push({ model: targetModel, status: "success" });
|
|
1789
1816
|
const latencyMs2 = Date.now() - start;
|
|
1790
|
-
const responseWithStructured = withStructuredOutput(exec.response, ir);
|
|
1791
1817
|
await record({
|
|
1792
1818
|
handle: initial.handle,
|
|
1793
|
-
tokensIn:
|
|
1794
|
-
tokensOut:
|
|
1819
|
+
tokensIn: validated.response.tokens.input,
|
|
1820
|
+
tokensOut: validated.response.tokens.output,
|
|
1795
1821
|
latencyMs: latencyMs2,
|
|
1796
1822
|
success: true,
|
|
1797
|
-
emptyResponse:
|
|
1798
|
-
toolsCalled:
|
|
1823
|
+
emptyResponse: validated.response.tokens.output === 0,
|
|
1824
|
+
toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
|
|
1799
1825
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1800
1826
|
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
1801
1827
|
promptPreview: extractPromptPreview(ir),
|
|
1802
|
-
responsePreview:
|
|
1803
|
-
cacheReadInputTokens:
|
|
1804
|
-
cacheCreationInputTokens:
|
|
1828
|
+
responsePreview: validated.response.text.slice(0, 200),
|
|
1829
|
+
cacheReadInputTokens: validated.response.tokens.cached,
|
|
1830
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
1805
1831
|
});
|
|
1806
1832
|
return {
|
|
1807
1833
|
handle: initial.handle,
|
|
1808
1834
|
actualModel: targetModel,
|
|
1809
1835
|
requestedModel: initial.target,
|
|
1810
1836
|
provider: activeCompile.provider,
|
|
1811
|
-
response:
|
|
1837
|
+
response: validated.response,
|
|
1812
1838
|
latencyMs: latencyMs2,
|
|
1813
1839
|
mutationsApplied: activeCompile.mutationsApplied,
|
|
1814
1840
|
attempts
|
|
@@ -1816,12 +1842,12 @@ async function call(ir, opts = {}) {
|
|
|
1816
1842
|
}
|
|
1817
1843
|
attempts.push({
|
|
1818
1844
|
model: targetModel,
|
|
1819
|
-
status:
|
|
1820
|
-
errorCode:
|
|
1821
|
-
message:
|
|
1845
|
+
status: validated.errorType,
|
|
1846
|
+
errorCode: validated.errorCode,
|
|
1847
|
+
message: validated.message
|
|
1822
1848
|
});
|
|
1823
|
-
lastErr =
|
|
1824
|
-
if (
|
|
1849
|
+
lastErr = validated;
|
|
1850
|
+
if (validated.errorType === "terminal" || opts.noFallback) {
|
|
1825
1851
|
break;
|
|
1826
1852
|
}
|
|
1827
1853
|
}
|
|
@@ -1858,17 +1884,35 @@ function extractPromptPreview(ir) {
|
|
|
1858
1884
|
if (lastHist) return lastHist.slice(0, 200);
|
|
1859
1885
|
return void 0;
|
|
1860
1886
|
}
|
|
1861
|
-
function
|
|
1862
|
-
if (!ir.constraints?.structuredOutput)
|
|
1863
|
-
|
|
1887
|
+
function validateStructuredContract(exec, ir) {
|
|
1888
|
+
if (!ir.constraints?.structuredOutput) {
|
|
1889
|
+
return { ok: true, response: exec.response };
|
|
1890
|
+
}
|
|
1891
|
+
const finish = (exec.response.finishReason ?? "").toLowerCase();
|
|
1892
|
+
if (finish === "max_tokens" || finish === "length") {
|
|
1893
|
+
return {
|
|
1894
|
+
ok: false,
|
|
1895
|
+
status: exec.status,
|
|
1896
|
+
errorType: "retryable",
|
|
1897
|
+
errorCode: "max_tokens_on_structured_output",
|
|
1898
|
+
message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
|
|
1899
|
+
raw: exec.response.raw
|
|
1900
|
+
};
|
|
1901
|
+
}
|
|
1902
|
+
if (!exec.response.text) {
|
|
1903
|
+
return { ok: true, response: exec.response };
|
|
1904
|
+
}
|
|
1864
1905
|
try {
|
|
1865
|
-
const parsed = JSON.parse(response.text);
|
|
1866
|
-
return { ...response, structuredOutput: parsed };
|
|
1906
|
+
const parsed = JSON.parse(exec.response.text);
|
|
1907
|
+
return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
|
|
1867
1908
|
} catch (err) {
|
|
1868
1909
|
return {
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1910
|
+
ok: false,
|
|
1911
|
+
status: exec.status,
|
|
1912
|
+
errorType: "retryable",
|
|
1913
|
+
errorCode: "structured_output_parse_failed",
|
|
1914
|
+
message: err instanceof Error ? err.message : String(err),
|
|
1915
|
+
raw: exec.response.raw
|
|
1872
1916
|
};
|
|
1873
1917
|
}
|
|
1874
1918
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -120,20 +120,37 @@ function passToolRelevance(ir, opts = {}) {
|
|
|
120
120
|
]
|
|
121
121
|
};
|
|
122
122
|
}
|
|
123
|
+
function totalHistoryTokens(history) {
|
|
124
|
+
let total = 0;
|
|
125
|
+
for (const m of history) {
|
|
126
|
+
if (typeof m.content === "string") total += countTokens(m.content);
|
|
127
|
+
}
|
|
128
|
+
return total;
|
|
129
|
+
}
|
|
123
130
|
function passCompressHistory(ir, opts = {}) {
|
|
124
131
|
const history = ir.history;
|
|
125
|
-
if (!history || history.length === 0)
|
|
132
|
+
if (!history || history.length === 0) {
|
|
133
|
+
return { value: ir, mutations: [], historyTokensTotal: 0 };
|
|
134
|
+
}
|
|
126
135
|
const keepRecent = opts.keepRecent ?? 4;
|
|
127
136
|
const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
|
|
128
|
-
|
|
137
|
+
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
138
|
+
const historyTokensTotal = totalHistoryTokens(history);
|
|
139
|
+
const countThresholdHit = history.length > summarizeOlderThan;
|
|
140
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
|
|
141
|
+
if (!countThresholdHit && !tokenThresholdHit) {
|
|
142
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
143
|
+
}
|
|
129
144
|
const cutIndex = history.length - keepRecent;
|
|
130
145
|
const old = history.slice(0, cutIndex);
|
|
131
146
|
const recent = history.slice(cutIndex);
|
|
132
147
|
const userTurns = old.filter((m) => m.role === "user");
|
|
133
148
|
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
149
|
+
const oldTokens = totalHistoryTokens(old);
|
|
150
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
134
151
|
const summary = {
|
|
135
152
|
role: "system",
|
|
136
|
-
content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
|
|
153
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
137
154
|
};
|
|
138
155
|
return {
|
|
139
156
|
value: { ...ir, history: [summary, ...recent] },
|
|
@@ -142,9 +159,10 @@ function passCompressHistory(ir, opts = {}) {
|
|
|
142
159
|
id: `compress-history-${old.length}`,
|
|
143
160
|
source: "static_pass",
|
|
144
161
|
passName: "compress_history",
|
|
145
|
-
description: `Compressed ${old.length} old turns into 1 summary
|
|
162
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
146
163
|
}
|
|
147
|
-
]
|
|
164
|
+
],
|
|
165
|
+
historyTokensTotal
|
|
148
166
|
};
|
|
149
167
|
}
|
|
150
168
|
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
@@ -390,7 +408,11 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
390
408
|
system: systemBlocks,
|
|
391
409
|
messages,
|
|
392
410
|
tools,
|
|
393
|
-
|
|
411
|
+
// alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
|
|
412
|
+
// floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
|
|
413
|
+
// Profile is the single source of truth; consumers wanting a tighter
|
|
414
|
+
// budget can pass providerOverrides.anthropic.max_tokens explicitly.
|
|
415
|
+
max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
|
|
394
416
|
},
|
|
395
417
|
diagnostics: {
|
|
396
418
|
cacheableTokens,
|
|
@@ -762,7 +784,8 @@ function compile(ir, opts = {}) {
|
|
|
762
784
|
threshold: opts.toolRelevanceThreshold
|
|
763
785
|
});
|
|
764
786
|
const compressed = passCompressHistory(toolFiltered.value, {
|
|
765
|
-
summarizeOlderThan: opts.compressHistoryAfter
|
|
787
|
+
summarizeOlderThan: opts.compressHistoryAfter,
|
|
788
|
+
summarizeAboveTokens: opts.compressHistoryAboveTokens
|
|
766
789
|
});
|
|
767
790
|
let workingIR = compressed.value;
|
|
768
791
|
const accumulatedMutations = [
|
|
@@ -806,7 +829,8 @@ function compile(ir, opts = {}) {
|
|
|
806
829
|
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
807
830
|
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
808
831
|
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
809
|
-
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
832
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
|
|
833
|
+
historyTokensTotal: compressed.historyTokensTotal
|
|
810
834
|
};
|
|
811
835
|
const advisories = runAdvisor(
|
|
812
836
|
ir,
|
|
@@ -913,7 +937,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
913
937
|
estimatedTokensIn: tokens,
|
|
914
938
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
915
939
|
startedAt: Date.now(),
|
|
916
|
-
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
940
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
941
|
+
historyTokensTotal: result.diagnostics.historyTokensTotal
|
|
917
942
|
});
|
|
918
943
|
}
|
|
919
944
|
async function record(input) {
|
|
@@ -987,7 +1012,8 @@ function buildPayload(input, reg) {
|
|
|
987
1012
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
988
1013
|
cost_usd_actual: costUsdActual,
|
|
989
1014
|
ttft_ms: input.ttftMs,
|
|
990
|
-
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1015
|
+
history_cacheable_tokens: reg?.historyCacheableTokens,
|
|
1016
|
+
history_tokens_at_compile: reg?.historyTokensTotal
|
|
991
1017
|
};
|
|
992
1018
|
}
|
|
993
1019
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -1266,31 +1292,31 @@ async function call(ir, opts = {}) {
|
|
|
1266
1292
|
fetchImpl: opts.fetchImpl,
|
|
1267
1293
|
providerOverrides: opts.providerOverrides
|
|
1268
1294
|
});
|
|
1269
|
-
|
|
1295
|
+
const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
|
|
1296
|
+
if (validated.ok) {
|
|
1270
1297
|
attempts.push({ model: targetModel, status: "success" });
|
|
1271
1298
|
const latencyMs2 = Date.now() - start;
|
|
1272
|
-
const responseWithStructured = withStructuredOutput(exec.response, ir);
|
|
1273
1299
|
await record({
|
|
1274
1300
|
handle: initial.handle,
|
|
1275
|
-
tokensIn:
|
|
1276
|
-
tokensOut:
|
|
1301
|
+
tokensIn: validated.response.tokens.input,
|
|
1302
|
+
tokensOut: validated.response.tokens.output,
|
|
1277
1303
|
latencyMs: latencyMs2,
|
|
1278
1304
|
success: true,
|
|
1279
|
-
emptyResponse:
|
|
1280
|
-
toolsCalled:
|
|
1305
|
+
emptyResponse: validated.response.tokens.output === 0,
|
|
1306
|
+
toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
|
|
1281
1307
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1282
1308
|
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
1283
1309
|
promptPreview: extractPromptPreview(ir),
|
|
1284
|
-
responsePreview:
|
|
1285
|
-
cacheReadInputTokens:
|
|
1286
|
-
cacheCreationInputTokens:
|
|
1310
|
+
responsePreview: validated.response.text.slice(0, 200),
|
|
1311
|
+
cacheReadInputTokens: validated.response.tokens.cached,
|
|
1312
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
1287
1313
|
});
|
|
1288
1314
|
return {
|
|
1289
1315
|
handle: initial.handle,
|
|
1290
1316
|
actualModel: targetModel,
|
|
1291
1317
|
requestedModel: initial.target,
|
|
1292
1318
|
provider: activeCompile.provider,
|
|
1293
|
-
response:
|
|
1319
|
+
response: validated.response,
|
|
1294
1320
|
latencyMs: latencyMs2,
|
|
1295
1321
|
mutationsApplied: activeCompile.mutationsApplied,
|
|
1296
1322
|
attempts
|
|
@@ -1298,12 +1324,12 @@ async function call(ir, opts = {}) {
|
|
|
1298
1324
|
}
|
|
1299
1325
|
attempts.push({
|
|
1300
1326
|
model: targetModel,
|
|
1301
|
-
status:
|
|
1302
|
-
errorCode:
|
|
1303
|
-
message:
|
|
1327
|
+
status: validated.errorType,
|
|
1328
|
+
errorCode: validated.errorCode,
|
|
1329
|
+
message: validated.message
|
|
1304
1330
|
});
|
|
1305
|
-
lastErr =
|
|
1306
|
-
if (
|
|
1331
|
+
lastErr = validated;
|
|
1332
|
+
if (validated.errorType === "terminal" || opts.noFallback) {
|
|
1307
1333
|
break;
|
|
1308
1334
|
}
|
|
1309
1335
|
}
|
|
@@ -1340,17 +1366,35 @@ function extractPromptPreview(ir) {
|
|
|
1340
1366
|
if (lastHist) return lastHist.slice(0, 200);
|
|
1341
1367
|
return void 0;
|
|
1342
1368
|
}
|
|
1343
|
-
function
|
|
1344
|
-
if (!ir.constraints?.structuredOutput)
|
|
1345
|
-
|
|
1369
|
+
function validateStructuredContract(exec, ir) {
|
|
1370
|
+
if (!ir.constraints?.structuredOutput) {
|
|
1371
|
+
return { ok: true, response: exec.response };
|
|
1372
|
+
}
|
|
1373
|
+
const finish = (exec.response.finishReason ?? "").toLowerCase();
|
|
1374
|
+
if (finish === "max_tokens" || finish === "length") {
|
|
1375
|
+
return {
|
|
1376
|
+
ok: false,
|
|
1377
|
+
status: exec.status,
|
|
1378
|
+
errorType: "retryable",
|
|
1379
|
+
errorCode: "max_tokens_on_structured_output",
|
|
1380
|
+
message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
|
|
1381
|
+
raw: exec.response.raw
|
|
1382
|
+
};
|
|
1383
|
+
}
|
|
1384
|
+
if (!exec.response.text) {
|
|
1385
|
+
return { ok: true, response: exec.response };
|
|
1386
|
+
}
|
|
1346
1387
|
try {
|
|
1347
|
-
const parsed = JSON.parse(response.text);
|
|
1348
|
-
return { ...response, structuredOutput: parsed };
|
|
1388
|
+
const parsed = JSON.parse(exec.response.text);
|
|
1389
|
+
return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
|
|
1349
1390
|
} catch (err) {
|
|
1350
1391
|
return {
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1392
|
+
ok: false,
|
|
1393
|
+
status: exec.status,
|
|
1394
|
+
errorType: "retryable",
|
|
1395
|
+
errorCode: "structured_output_parse_failed",
|
|
1396
|
+
message: err instanceof Error ? err.message : String(err),
|
|
1397
|
+
raw: exec.response.raw
|
|
1354
1398
|
};
|
|
1355
1399
|
}
|
|
1356
1400
|
}
|
|
@@ -330,6 +330,14 @@ interface CompileResult {
|
|
|
330
330
|
* from history caching. alpha.5.
|
|
331
331
|
*/
|
|
332
332
|
historyCacheableTokens: number;
|
|
333
|
+
/**
|
|
334
|
+
* Total tokens in input `history` (pre-compression). Computed regardless
|
|
335
|
+
* of whether `passCompressHistory` fired — surfaces how close a tuple is
|
|
336
|
+
* to its `compressHistoryAboveTokens` threshold so dashboards / cost-
|
|
337
|
+
* watchers can see the bloat axis the count-based threshold misses.
|
|
338
|
+
* 0 when history is empty. alpha.7.
|
|
339
|
+
*/
|
|
340
|
+
historyTokensTotal: number;
|
|
333
341
|
};
|
|
334
342
|
}
|
|
335
343
|
/**
|
|
@@ -330,6 +330,14 @@ interface CompileResult {
|
|
|
330
330
|
* from history caching. alpha.5.
|
|
331
331
|
*/
|
|
332
332
|
historyCacheableTokens: number;
|
|
333
|
+
/**
|
|
334
|
+
* Total tokens in input `history` (pre-compression). Computed regardless
|
|
335
|
+
* of whether `passCompressHistory` fired — surfaces how close a tuple is
|
|
336
|
+
* to its `compressHistoryAboveTokens` threshold so dashboards / cost-
|
|
337
|
+
* watchers can see the bloat axis the count-based threshold misses.
|
|
338
|
+
* 0 when history is empty. alpha.7.
|
|
339
|
+
*/
|
|
340
|
+
historyTokensTotal: number;
|
|
333
341
|
};
|
|
334
342
|
}
|
|
335
343
|
/**
|
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
|
|
2
2
|
import './dialect.mjs';
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
|
|
2
2
|
import './dialect.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.8",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|