opencode-lore 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/gradient.ts +57 -7
- package/src/index.ts +9 -3
package/package.json
CHANGED
package/src/gradient.ts
CHANGED
|
@@ -51,6 +51,18 @@ let lastKnownLtm = 0;
|
|
|
51
51
|
let lastKnownSessionID: string | null = null;
|
|
52
52
|
let lastKnownMessageCount = 0;
|
|
53
53
|
|
|
54
|
+
// Number of messages in the most recent transform() output — i.e. how many
|
|
55
|
+
// messages were actually sent to the model. On layer 0 this equals the full
|
|
56
|
+
// session length. On layers 1-4 it equals the compressed window size.
|
|
57
|
+
// Calibration must use this count (not the total DB message count) so that
|
|
58
|
+
// the delta on the next turn reflects only messages added since the last
|
|
59
|
+
// compressed window, not since the last DB snapshot.
|
|
60
|
+
let lastTransformedCount = 0;
|
|
61
|
+
|
|
62
|
+
export function getLastTransformedCount(): number {
|
|
63
|
+
return lastTransformedCount;
|
|
64
|
+
}
|
|
65
|
+
|
|
54
66
|
// --- Force escalation ---
|
|
55
67
|
// Set when the API returns "prompt is too long" — forces the transform to skip
|
|
56
68
|
// layer 0 (and optionally layer 1) on the next call to ensure the context is
|
|
@@ -139,6 +151,7 @@ export function resetCalibration() {
|
|
|
139
151
|
lastKnownLtm = 0;
|
|
140
152
|
lastKnownSessionID = null;
|
|
141
153
|
lastKnownMessageCount = 0;
|
|
154
|
+
lastTransformedCount = 0;
|
|
142
155
|
forceMinLayer = 0;
|
|
143
156
|
}
|
|
144
157
|
|
|
@@ -691,7 +704,7 @@ export function needsUrgentDistillation(): boolean {
|
|
|
691
704
|
return v;
|
|
692
705
|
}
|
|
693
706
|
|
|
694
|
-
|
|
707
|
+
function transformInner(input: {
|
|
695
708
|
messages: MessageWithParts[];
|
|
696
709
|
projectPath: string;
|
|
697
710
|
sessionID?: string;
|
|
@@ -722,8 +735,27 @@ export function transform(input: {
|
|
|
722
735
|
const maxInput = contextLimit - outputReserved;
|
|
723
736
|
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
724
737
|
|
|
738
|
+
// True when we have real API token data from a previous turn in this session.
|
|
739
|
+
// When false (first turn / session change), chars/4 estimates can undercount by
|
|
740
|
+
// up to 1.8x — so tryFit output must be validated with a safety multiplier before
|
|
741
|
+
// being used, to prevent sending an apparently-fitting window that actually overflows.
|
|
742
|
+
const calibrated = lastKnownInput > 0 && sid === lastKnownSessionID;
|
|
743
|
+
|
|
744
|
+
// On uncalibrated turns, apply this multiplier to tryFit's estimated total to
|
|
745
|
+
// approximate the real token count. 1.5 is conservative but not so aggressive
|
|
746
|
+
// that it forces layer 4 on modestly-sized sessions.
|
|
747
|
+
const UNCALIBRATED_SAFETY = 1.5;
|
|
748
|
+
|
|
749
|
+
// Returns true if the tryFit result is safe to use: either we have calibrated
|
|
750
|
+
// data (exact) or the estimated total * safety factor fits within maxInput.
|
|
751
|
+
function fitsWithSafetyMargin(result: { totalTokens: number } | null): boolean {
|
|
752
|
+
if (!result) return false;
|
|
753
|
+
if (calibrated) return true;
|
|
754
|
+
return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
|
|
755
|
+
}
|
|
756
|
+
|
|
725
757
|
let expectedInput: number;
|
|
726
|
-
if (
|
|
758
|
+
if (calibrated) {
|
|
727
759
|
// Exact approach: prior API count + estimate of only the new messages.
|
|
728
760
|
const newMsgCount = Math.max(0, input.messages.length - lastKnownMessageCount);
|
|
729
761
|
const newMsgTokens = newMsgCount > 0
|
|
@@ -793,7 +825,7 @@ export function transform(input: {
|
|
|
793
825
|
rawBudget,
|
|
794
826
|
strip: "none",
|
|
795
827
|
});
|
|
796
|
-
if (layer1) return { ...layer1
|
|
828
|
+
if (fitsWithSafetyMargin(layer1)) return { ...layer1!, layer: 1, usable, distilledBudget, rawBudget };
|
|
797
829
|
}
|
|
798
830
|
|
|
799
831
|
// Layer 1 didn't fit (or was force-skipped) — reset the raw window cache.
|
|
@@ -812,9 +844,9 @@ export function transform(input: {
|
|
|
812
844
|
strip: "old-tools",
|
|
813
845
|
protectedTurns: 2,
|
|
814
846
|
});
|
|
815
|
-
if (layer2) {
|
|
847
|
+
if (fitsWithSafetyMargin(layer2)) {
|
|
816
848
|
urgentDistillation = true;
|
|
817
|
-
return { ...layer2
|
|
849
|
+
return { ...layer2!, layer: 2, usable, distilledBudget, rawBudget };
|
|
818
850
|
}
|
|
819
851
|
}
|
|
820
852
|
|
|
@@ -833,9 +865,9 @@ export function transform(input: {
|
|
|
833
865
|
rawBudget: Math.floor(usable * 0.55),
|
|
834
866
|
strip: "all-tools",
|
|
835
867
|
});
|
|
836
|
-
if (layer3) {
|
|
868
|
+
if (fitsWithSafetyMargin(layer3)) {
|
|
837
869
|
urgentDistillation = true;
|
|
838
|
-
return { ...layer3
|
|
870
|
+
return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
|
|
839
871
|
}
|
|
840
872
|
|
|
841
873
|
// Layer 4: Emergency — last 2 distillations, last 3 raw messages with tool parts intact.
|
|
@@ -871,6 +903,24 @@ export function transform(input: {
|
|
|
871
903
|
};
|
|
872
904
|
}
|
|
873
905
|
|
|
906
|
+
// Public wrapper: records the compressed message count for calibration.
|
|
907
|
+
// Calibration needs to know how many messages were SENT to the model (the
|
|
908
|
+
// compressed window), not the total DB count. On layer 0 these are equal;
|
|
909
|
+
// on layers 1-4 the compressed window is smaller, and the delta on the next
|
|
910
|
+
// turn must be computed relative to the compressed count — otherwise the
|
|
911
|
+
// expected input on the next turn is anchored to the compressed input token
|
|
912
|
+
// count but the "new messages" delta is computed against the full DB count,
|
|
913
|
+
// making newMsgCount ≈ 0 and causing layer 0 passthrough on an overflowing session.
|
|
914
|
+
export function transform(input: {
|
|
915
|
+
messages: MessageWithParts[];
|
|
916
|
+
projectPath: string;
|
|
917
|
+
sessionID?: string;
|
|
918
|
+
}): TransformResult {
|
|
919
|
+
const result = transformInner(input);
|
|
920
|
+
lastTransformedCount = result.messages.length;
|
|
921
|
+
return result;
|
|
922
|
+
}
|
|
923
|
+
|
|
874
924
|
// Compute our message-only estimate for a set of messages (for calibration use)
|
|
875
925
|
export function estimateMessages(messages: MessageWithParts[]): number {
|
|
876
926
|
return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
|
package/src/index.ts
CHANGED
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
setLtmTokens,
|
|
15
15
|
getLtmBudget,
|
|
16
16
|
setForceMinLayer,
|
|
17
|
+
getLastTransformedCount,
|
|
17
18
|
} from "./gradient";
|
|
18
19
|
import { formatKnowledge } from "./prompt";
|
|
19
20
|
import { createRecallTool } from "./reflect";
|
|
@@ -219,7 +220,11 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
219
220
|
const msgEstimate = estimateMessages(withParts);
|
|
220
221
|
const actualInput =
|
|
221
222
|
msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
|
|
222
|
-
|
|
223
|
+
// Use the compressed message count (from the last transform output),
|
|
224
|
+
// not the total DB count. On layer 0 these are equal. On layers 1-4,
|
|
225
|
+
// the model only saw the compressed window — calibrate must track that
|
|
226
|
+
// count so the next turn's delta is computed correctly.
|
|
227
|
+
calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount() || withParts.length);
|
|
223
228
|
}
|
|
224
229
|
}
|
|
225
230
|
}
|
|
@@ -389,12 +394,13 @@ export const LorePlugin: Plugin = async (ctx) => {
|
|
|
389
394
|
// Layer 0 means all messages fit within the context budget — leave them alone
|
|
390
395
|
// so the append-only sequence stays intact for prompt caching.
|
|
391
396
|
if (result.layer > 0) {
|
|
397
|
+
// The API requires the conversation to end with a user message.
|
|
398
|
+
// Always drop trailing non-user messages — even assistant messages with
|
|
399
|
+
// tool parts. A hard API error is worse than the model re-invoking a tool.
|
|
392
400
|
while (
|
|
393
401
|
result.messages.length > 0 &&
|
|
394
402
|
result.messages.at(-1)!.info.role !== "user"
|
|
395
403
|
) {
|
|
396
|
-
const last = result.messages.at(-1)!;
|
|
397
|
-
if (last.parts.some((p) => p.type === "tool")) break;
|
|
398
404
|
const dropped = result.messages.pop()!;
|
|
399
405
|
console.error(
|
|
400
406
|
"[lore] WARN: dropping trailing",
|