@warmdrift/kgauto-compiler 2.0.0-alpha.6 → 2.0.0-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +59 -3
- package/dist/index.d.ts +59 -3
- package/dist/index.js +31 -9
- package/dist/index.mjs +31 -9
- package/dist/{profiles-CQnLkQ7b.d.ts → profiles-B3eNQ2py.d.ts} +8 -0
- package/dist/{profiles-zm6diETo.d.mts → profiles-Py8c7zjJ.d.mts} +8 -0
- package/dist/profiles.d.mts +1 -1
- package/dist/profiles.d.ts +1 -1
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-Py8c7zjJ.mjs';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.mjs';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -19,6 +19,15 @@ interface CompileOptions {
|
|
|
19
19
|
toolRelevanceThreshold?: number;
|
|
20
20
|
/** History compression — turns count threshold (default 8). */
|
|
21
21
|
compressHistoryAfter?: number;
|
|
22
|
+
/**
|
|
23
|
+
* History compression — token threshold (alpha.7). When total history
|
|
24
|
+
* tokens exceed this AND there are more recent turns to keep, compress
|
|
25
|
+
* even when count threshold is below `compressHistoryAfter`. Catches
|
|
26
|
+
* fat-message bloat (tool-using agents pack many tool-call/result pairs
|
|
27
|
+
* into single assistant messages — count stays low, tokens explode).
|
|
28
|
+
* Default undefined (disabled — backward-compatible).
|
|
29
|
+
*/
|
|
30
|
+
compressHistoryAboveTokens?: number;
|
|
22
31
|
/**
|
|
23
32
|
* Consumer-declared policy. Filters blocked models, enforces cost
|
|
24
33
|
* ceiling, boosts preferred. See CompilePolicy in ir.ts.
|
|
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
|
|
|
114
123
|
* network error is swallowed/forwarded to onError.
|
|
115
124
|
*/
|
|
116
125
|
declare function record(input: RecordInput): Promise<void>;
|
|
126
|
+
/**
|
|
127
|
+
* Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
|
|
128
|
+
*
|
|
129
|
+
* Exported so consumer proxies can `import { OutcomePayload } from
|
|
130
|
+
* '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
|
|
131
|
+
* TypeScript catches future schema additions (cache fields, advisory
|
|
132
|
+
* telemetry, etc.) at consumer build time, not silently at runtime.
|
|
133
|
+
*
|
|
134
|
+
* **Forward-compat rule:** consumer proxies should pass the body through to
|
|
135
|
+
* Supabase rather than reconstructing field-by-field. The recommended shape
|
|
136
|
+
* is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
|
|
137
|
+
* .insert(body)` directly). Filtering proxies break schema evolution
|
|
138
|
+
* silently — see s17 root-cause investigation 2026-05-10.
|
|
139
|
+
*/
|
|
140
|
+
interface OutcomePayload {
|
|
141
|
+
handle: string;
|
|
142
|
+
app_id?: string;
|
|
143
|
+
intent_archetype?: string;
|
|
144
|
+
/** The model that ACTUALLY RAN (post-fallback). */
|
|
145
|
+
model?: string;
|
|
146
|
+
/** The model v2 compile() originally targeted. NULL when no fallback. */
|
|
147
|
+
requested_model?: string;
|
|
148
|
+
provider?: string;
|
|
149
|
+
shape_key?: string;
|
|
150
|
+
learning_key?: string;
|
|
151
|
+
mutations_applied: string[];
|
|
152
|
+
tokens_in: number;
|
|
153
|
+
tokens_out: number;
|
|
154
|
+
estimated_tokens_in?: number;
|
|
155
|
+
latency_ms: number;
|
|
156
|
+
success: boolean;
|
|
157
|
+
empty_response: boolean;
|
|
158
|
+
error_type?: string;
|
|
159
|
+
tools_called?: string[];
|
|
160
|
+
oracle_score?: number;
|
|
161
|
+
oracle_dimensions?: Record<string, number>;
|
|
162
|
+
oracle_rationale?: string;
|
|
163
|
+
prompt_preview?: string;
|
|
164
|
+
response_preview?: string;
|
|
165
|
+
dialect_version: string;
|
|
166
|
+
cache_read_input_tokens?: number;
|
|
167
|
+
cache_creation_input_tokens?: number;
|
|
168
|
+
cost_usd_actual?: number;
|
|
169
|
+
ttft_ms?: number;
|
|
170
|
+
history_cacheable_tokens?: number;
|
|
171
|
+
history_tokens_at_compile?: number;
|
|
172
|
+
}
|
|
117
173
|
|
|
118
174
|
/**
|
|
119
175
|
* Oracle contract — how an app tells the brain whether a response was good.
|
|
@@ -270,4 +326,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
|
|
|
270
326
|
*/
|
|
271
327
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
272
328
|
|
|
273
|
-
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
|
329
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-
|
|
2
|
-
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
import { M as ModelProfile, C as CompilePolicy, N as NormalizedResponse, A as ApiKeys, P as ProviderOverrides, a as CompiledRequest, b as PromptIR, c as CallOptions, d as CallResult, R as RecordInput, O as OracleScore, e as CompileResult, B as BestPracticeAdvisory } from './profiles-B3eNQ2py.js';
|
|
2
|
+
export { f as ALIASES, g as CacheStrategy, h as CallAttempt, i as CallError, j as CliffRule, k as Constraints, H as HistoryCachePolicy, I as IntentDeclaration, L as LoweringSpec, l as Message, m as MutationApplied, n as NormalizedTokens, o as PromptSection, p as Provider, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, T as ToolCall, s as ToolDefinition, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
|
|
3
3
|
export { ALL_ARCHETYPES, ContextBucket, DIALECT_VERSION, HistoryDepth, INTENT_ARCHETYPES, IntentArchetypeName, OutputMode, ShapeSignature, ToolCountBucket, bucketContext, bucketHistory, bucketToolCount, hashShape, isArchetype, learningKey } from './dialect.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -19,6 +19,15 @@ interface CompileOptions {
|
|
|
19
19
|
toolRelevanceThreshold?: number;
|
|
20
20
|
/** History compression — turns count threshold (default 8). */
|
|
21
21
|
compressHistoryAfter?: number;
|
|
22
|
+
/**
|
|
23
|
+
* History compression — token threshold (alpha.7). When total history
|
|
24
|
+
* tokens exceed this AND there are more recent turns to keep, compress
|
|
25
|
+
* even when count threshold is below `compressHistoryAfter`. Catches
|
|
26
|
+
* fat-message bloat (tool-using agents pack many tool-call/result pairs
|
|
27
|
+
* into single assistant messages — count stays low, tokens explode).
|
|
28
|
+
* Default undefined (disabled — backward-compatible).
|
|
29
|
+
*/
|
|
30
|
+
compressHistoryAboveTokens?: number;
|
|
22
31
|
/**
|
|
23
32
|
* Consumer-declared policy. Filters blocked models, enforces cost
|
|
24
33
|
* ceiling, boosts preferred. See CompilePolicy in ir.ts.
|
|
@@ -114,6 +123,53 @@ declare function clearBrain(): void;
|
|
|
114
123
|
* network error is swallowed/forwarded to onError.
|
|
115
124
|
*/
|
|
116
125
|
declare function record(input: RecordInput): Promise<void>;
|
|
126
|
+
/**
|
|
127
|
+
* Wire shape POSTed by `record()` to the brain proxy's `/outcomes` endpoint.
|
|
128
|
+
*
|
|
129
|
+
* Exported so consumer proxies can `import { OutcomePayload } from
|
|
130
|
+
* '@warmdrift/kgauto-compiler'` instead of redefining the shape — that way
|
|
131
|
+
* TypeScript catches future schema additions (cache fields, advisory
|
|
132
|
+
* telemetry, etc.) at consumer build time, not silently at runtime.
|
|
133
|
+
*
|
|
134
|
+
* **Forward-compat rule:** consumer proxies should pass the body through to
|
|
135
|
+
* Supabase rather than reconstructing field-by-field. The recommended shape
|
|
136
|
+
* is `const row = { ...body }` (or `await supabase.from('compile_outcomes')
|
|
137
|
+
* .insert(body)` directly). Filtering proxies break schema evolution
|
|
138
|
+
* silently — see s17 root-cause investigation 2026-05-10.
|
|
139
|
+
*/
|
|
140
|
+
interface OutcomePayload {
|
|
141
|
+
handle: string;
|
|
142
|
+
app_id?: string;
|
|
143
|
+
intent_archetype?: string;
|
|
144
|
+
/** The model that ACTUALLY RAN (post-fallback). */
|
|
145
|
+
model?: string;
|
|
146
|
+
/** The model v2 compile() originally targeted. NULL when no fallback. */
|
|
147
|
+
requested_model?: string;
|
|
148
|
+
provider?: string;
|
|
149
|
+
shape_key?: string;
|
|
150
|
+
learning_key?: string;
|
|
151
|
+
mutations_applied: string[];
|
|
152
|
+
tokens_in: number;
|
|
153
|
+
tokens_out: number;
|
|
154
|
+
estimated_tokens_in?: number;
|
|
155
|
+
latency_ms: number;
|
|
156
|
+
success: boolean;
|
|
157
|
+
empty_response: boolean;
|
|
158
|
+
error_type?: string;
|
|
159
|
+
tools_called?: string[];
|
|
160
|
+
oracle_score?: number;
|
|
161
|
+
oracle_dimensions?: Record<string, number>;
|
|
162
|
+
oracle_rationale?: string;
|
|
163
|
+
prompt_preview?: string;
|
|
164
|
+
response_preview?: string;
|
|
165
|
+
dialect_version: string;
|
|
166
|
+
cache_read_input_tokens?: number;
|
|
167
|
+
cache_creation_input_tokens?: number;
|
|
168
|
+
cost_usd_actual?: number;
|
|
169
|
+
ttft_ms?: number;
|
|
170
|
+
history_cacheable_tokens?: number;
|
|
171
|
+
history_tokens_at_compile?: number;
|
|
172
|
+
}
|
|
117
173
|
|
|
118
174
|
/**
|
|
119
175
|
* Oracle contract — how an app tells the brain whether a response was good.
|
|
@@ -270,4 +326,4 @@ declare function runAdvisor(ir: PromptIR, result: AdvisorContext, profile: Model
|
|
|
270
326
|
*/
|
|
271
327
|
declare function compile(ir: PromptIR, opts?: CompileOptions): CompileResult;
|
|
272
328
|
|
|
273
|
-
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
|
329
|
+
export { ApiKeys, type AppOracle, BestPracticeAdvisory, type BrainConfig, CallOptions, CallResult, type CompileOptions, CompilePolicy, CompileResult, CompiledRequest, type ExecuteErr, type ExecuteOk, type ExecuteOptions, type ExecuteResult, type LLMJudgeOptions, ModelProfile, NormalizedResponse, type OracleContext, OracleScore, type OutcomePayload, PromptIR, ProviderOverrides, RecordInput, buildLLMJudge, call, clearBrain, compile, configureBrain, countTokens, execute, record, resetTokenizer, runAdvisor, setTokenizer };
|
package/dist/index.js
CHANGED
|
@@ -236,20 +236,37 @@ function passToolRelevance(ir, opts = {}) {
|
|
|
236
236
|
]
|
|
237
237
|
};
|
|
238
238
|
}
|
|
239
|
+
function totalHistoryTokens(history) {
|
|
240
|
+
let total = 0;
|
|
241
|
+
for (const m of history) {
|
|
242
|
+
if (typeof m.content === "string") total += countTokens(m.content);
|
|
243
|
+
}
|
|
244
|
+
return total;
|
|
245
|
+
}
|
|
239
246
|
function passCompressHistory(ir, opts = {}) {
|
|
240
247
|
const history = ir.history;
|
|
241
|
-
if (!history || history.length === 0)
|
|
248
|
+
if (!history || history.length === 0) {
|
|
249
|
+
return { value: ir, mutations: [], historyTokensTotal: 0 };
|
|
250
|
+
}
|
|
242
251
|
const keepRecent = opts.keepRecent ?? 4;
|
|
243
252
|
const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
|
|
244
|
-
|
|
253
|
+
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
254
|
+
const historyTokensTotal = totalHistoryTokens(history);
|
|
255
|
+
const countThresholdHit = history.length > summarizeOlderThan;
|
|
256
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
|
|
257
|
+
if (!countThresholdHit && !tokenThresholdHit) {
|
|
258
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
259
|
+
}
|
|
245
260
|
const cutIndex = history.length - keepRecent;
|
|
246
261
|
const old = history.slice(0, cutIndex);
|
|
247
262
|
const recent = history.slice(cutIndex);
|
|
248
263
|
const userTurns = old.filter((m) => m.role === "user");
|
|
249
264
|
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
265
|
+
const oldTokens = totalHistoryTokens(old);
|
|
266
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
250
267
|
const summary = {
|
|
251
268
|
role: "system",
|
|
252
|
-
content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
|
|
269
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
253
270
|
};
|
|
254
271
|
return {
|
|
255
272
|
value: { ...ir, history: [summary, ...recent] },
|
|
@@ -258,9 +275,10 @@ function passCompressHistory(ir, opts = {}) {
|
|
|
258
275
|
id: `compress-history-${old.length}`,
|
|
259
276
|
source: "static_pass",
|
|
260
277
|
passName: "compress_history",
|
|
261
|
-
description: `Compressed ${old.length} old turns into 1 summary
|
|
278
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
262
279
|
}
|
|
263
|
-
]
|
|
280
|
+
],
|
|
281
|
+
historyTokensTotal
|
|
264
282
|
};
|
|
265
283
|
}
|
|
266
284
|
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
@@ -1280,7 +1298,8 @@ function compile(ir, opts = {}) {
|
|
|
1280
1298
|
threshold: opts.toolRelevanceThreshold
|
|
1281
1299
|
});
|
|
1282
1300
|
const compressed = passCompressHistory(toolFiltered.value, {
|
|
1283
|
-
summarizeOlderThan: opts.compressHistoryAfter
|
|
1301
|
+
summarizeOlderThan: opts.compressHistoryAfter,
|
|
1302
|
+
summarizeAboveTokens: opts.compressHistoryAboveTokens
|
|
1284
1303
|
});
|
|
1285
1304
|
let workingIR = compressed.value;
|
|
1286
1305
|
const accumulatedMutations = [
|
|
@@ -1324,7 +1343,8 @@ function compile(ir, opts = {}) {
|
|
|
1324
1343
|
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1325
1344
|
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1326
1345
|
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
1327
|
-
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
1346
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
|
|
1347
|
+
historyTokensTotal: compressed.historyTokensTotal
|
|
1328
1348
|
};
|
|
1329
1349
|
const advisories = runAdvisor(
|
|
1330
1350
|
ir,
|
|
@@ -1431,7 +1451,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
1431
1451
|
estimatedTokensIn: tokens,
|
|
1432
1452
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
1433
1453
|
startedAt: Date.now(),
|
|
1434
|
-
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
1454
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
1455
|
+
historyTokensTotal: result.diagnostics.historyTokensTotal
|
|
1435
1456
|
});
|
|
1436
1457
|
}
|
|
1437
1458
|
async function record(input) {
|
|
@@ -1505,7 +1526,8 @@ function buildPayload(input, reg) {
|
|
|
1505
1526
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
1506
1527
|
cost_usd_actual: costUsdActual,
|
|
1507
1528
|
ttft_ms: input.ttftMs,
|
|
1508
|
-
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1529
|
+
history_cacheable_tokens: reg?.historyCacheableTokens,
|
|
1530
|
+
history_tokens_at_compile: reg?.historyTokensTotal
|
|
1509
1531
|
};
|
|
1510
1532
|
}
|
|
1511
1533
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
package/dist/index.mjs
CHANGED
|
@@ -120,20 +120,37 @@ function passToolRelevance(ir, opts = {}) {
|
|
|
120
120
|
]
|
|
121
121
|
};
|
|
122
122
|
}
|
|
123
|
+
function totalHistoryTokens(history) {
|
|
124
|
+
let total = 0;
|
|
125
|
+
for (const m of history) {
|
|
126
|
+
if (typeof m.content === "string") total += countTokens(m.content);
|
|
127
|
+
}
|
|
128
|
+
return total;
|
|
129
|
+
}
|
|
123
130
|
function passCompressHistory(ir, opts = {}) {
|
|
124
131
|
const history = ir.history;
|
|
125
|
-
if (!history || history.length === 0)
|
|
132
|
+
if (!history || history.length === 0) {
|
|
133
|
+
return { value: ir, mutations: [], historyTokensTotal: 0 };
|
|
134
|
+
}
|
|
126
135
|
const keepRecent = opts.keepRecent ?? 4;
|
|
127
136
|
const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
|
|
128
|
-
|
|
137
|
+
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
138
|
+
const historyTokensTotal = totalHistoryTokens(history);
|
|
139
|
+
const countThresholdHit = history.length > summarizeOlderThan;
|
|
140
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
|
|
141
|
+
if (!countThresholdHit && !tokenThresholdHit) {
|
|
142
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
143
|
+
}
|
|
129
144
|
const cutIndex = history.length - keepRecent;
|
|
130
145
|
const old = history.slice(0, cutIndex);
|
|
131
146
|
const recent = history.slice(cutIndex);
|
|
132
147
|
const userTurns = old.filter((m) => m.role === "user");
|
|
133
148
|
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
149
|
+
const oldTokens = totalHistoryTokens(old);
|
|
150
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
134
151
|
const summary = {
|
|
135
152
|
role: "system",
|
|
136
|
-
content: `[Earlier conversation: ${old.length} turns omitted. First user message: "${firstUserLine}"]`
|
|
153
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
137
154
|
};
|
|
138
155
|
return {
|
|
139
156
|
value: { ...ir, history: [summary, ...recent] },
|
|
@@ -142,9 +159,10 @@ function passCompressHistory(ir, opts = {}) {
|
|
|
142
159
|
id: `compress-history-${old.length}`,
|
|
143
160
|
source: "static_pass",
|
|
144
161
|
passName: "compress_history",
|
|
145
|
-
description: `Compressed ${old.length} old turns into 1 summary
|
|
162
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
146
163
|
}
|
|
147
|
-
]
|
|
164
|
+
],
|
|
165
|
+
historyTokensTotal
|
|
148
166
|
};
|
|
149
167
|
}
|
|
150
168
|
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
@@ -762,7 +780,8 @@ function compile(ir, opts = {}) {
|
|
|
762
780
|
threshold: opts.toolRelevanceThreshold
|
|
763
781
|
});
|
|
764
782
|
const compressed = passCompressHistory(toolFiltered.value, {
|
|
765
|
-
summarizeOlderThan: opts.compressHistoryAfter
|
|
783
|
+
summarizeOlderThan: opts.compressHistoryAfter,
|
|
784
|
+
summarizeAboveTokens: opts.compressHistoryAboveTokens
|
|
766
785
|
});
|
|
767
786
|
let workingIR = compressed.value;
|
|
768
787
|
const accumulatedMutations = [
|
|
@@ -806,7 +825,8 @@ function compile(ir, opts = {}) {
|
|
|
806
825
|
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
807
826
|
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
808
827
|
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
809
|
-
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens
|
|
828
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
|
|
829
|
+
historyTokensTotal: compressed.historyTokensTotal
|
|
810
830
|
};
|
|
811
831
|
const advisories = runAdvisor(
|
|
812
832
|
ir,
|
|
@@ -913,7 +933,8 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
913
933
|
estimatedTokensIn: tokens,
|
|
914
934
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
915
935
|
startedAt: Date.now(),
|
|
916
|
-
historyCacheableTokens: result.diagnostics.historyCacheableTokens
|
|
936
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
937
|
+
historyTokensTotal: result.diagnostics.historyTokensTotal
|
|
917
938
|
});
|
|
918
939
|
}
|
|
919
940
|
async function record(input) {
|
|
@@ -987,7 +1008,8 @@ function buildPayload(input, reg) {
|
|
|
987
1008
|
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
988
1009
|
cost_usd_actual: costUsdActual,
|
|
989
1010
|
ttft_ms: input.ttftMs,
|
|
990
|
-
history_cacheable_tokens: reg?.historyCacheableTokens
|
|
1011
|
+
history_cacheable_tokens: reg?.historyCacheableTokens,
|
|
1012
|
+
history_tokens_at_compile: reg?.historyTokensTotal
|
|
991
1013
|
};
|
|
992
1014
|
}
|
|
993
1015
|
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
@@ -330,6 +330,14 @@ interface CompileResult {
|
|
|
330
330
|
* from history caching. alpha.5.
|
|
331
331
|
*/
|
|
332
332
|
historyCacheableTokens: number;
|
|
333
|
+
/**
|
|
334
|
+
* Total tokens in input `history` (pre-compression). Computed regardless
|
|
335
|
+
* of whether `passCompressHistory` fired — surfaces how close a tuple is
|
|
336
|
+
* to its `compressHistoryAboveTokens` threshold so dashboards / cost-
|
|
337
|
+
* watchers can see the bloat axis the count-based threshold misses.
|
|
338
|
+
* 0 when history is empty. alpha.7.
|
|
339
|
+
*/
|
|
340
|
+
historyTokensTotal: number;
|
|
333
341
|
};
|
|
334
342
|
}
|
|
335
343
|
/**
|
|
@@ -330,6 +330,14 @@ interface CompileResult {
|
|
|
330
330
|
* from history caching. alpha.5.
|
|
331
331
|
*/
|
|
332
332
|
historyCacheableTokens: number;
|
|
333
|
+
/**
|
|
334
|
+
* Total tokens in input `history` (pre-compression). Computed regardless
|
|
335
|
+
* of whether `passCompressHistory` fired — surfaces how close a tuple is
|
|
336
|
+
* to its `compressHistoryAboveTokens` threshold so dashboards / cost-
|
|
337
|
+
* watchers can see the bloat axis the count-based threshold misses.
|
|
338
|
+
* 0 when history is empty. alpha.7.
|
|
339
|
+
*/
|
|
340
|
+
historyTokensTotal: number;
|
|
333
341
|
};
|
|
334
342
|
}
|
|
335
343
|
/**
|
package/dist/profiles.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Py8c7zjJ.mjs';
|
|
2
2
|
import './dialect.mjs';
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-
|
|
1
|
+
export { f as ALIASES, g as CacheStrategy, j as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-B3eNQ2py.js';
|
|
2
2
|
import './dialect.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@warmdrift/kgauto-compiler",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.7",
|
|
4
4
|
"description": "Prompt compiler + central learning brain for multi-model AI apps. Swap models without rewriting prompts.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|