pi-smart-compact 7.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Pre-compaction redundancy pruning — deterministic, zero LLM cost.
3
+ * Reduces compaction input by collapsing redundant message sequences.
4
+ */
5
+
6
+ import type { LlmMessage, ToolCallBlock } from "../types.ts";
7
+ import { extractText } from "./extraction.ts";
8
+ import { estimateTokens } from "./tokens.ts";
9
+
10
+ export interface PruningResult {
11
+ messages: LlmMessage[];
12
+ prunedCount: number;
13
+ prunedTokenSaving: number;
14
+ reasons: Array<{ count: number; reason: string }>;
15
+ }
16
+
17
+ // Pattern for agent acknowledgment messages with no information
18
+ const ACK_RE = /^(?:I'?ll |let me |sure|ok[,.]?|got it|i understand|i see|now i|next,? i|alright|great|perfect|sounds good|i can|i will|checking|looking|right away)/i;
19
+
20
+ // Maximum chars to keep from a tool result output
21
+ const MAX_TOOL_OUTPUT_CHARS = 800;
22
+
23
+ /**
24
+ * Build a quick index of assistant tool calls for lookups.
25
+ */
26
+ function buildToolCallIndex(msgs: LlmMessage[]): Map<string, { name: string; args: Record<string, unknown>; msgIndex: number }> {
27
+ const idx = new Map<string, { name: string; args: Record<string, unknown>; msgIndex: number }>();
28
+ for (let i = 0; i < msgs.length; i++) {
29
+ if (msgs[i].role !== "assistant") continue;
30
+ for (const b of (msgs[i].content ?? []) as unknown[]) {
31
+ const block = b as ToolCallBlock;
32
+ if (block?.type === "toolCall" && block.id) {
33
+ idx.set(block.id, { name: block.name, args: block.arguments, msgIndex: i });
34
+ }
35
+ }
36
+ }
37
+ return idx;
38
+ }
39
+
40
+ /**
41
+ * Detect and collapse redundant message sequences.
42
+ */
43
+ export function pruneRedundant(msgs: LlmMessage[]): PruningResult {
44
+ if (msgs.length < 5) return { messages: msgs, prunedCount: 0, prunedTokenSaving: 0, reasons: [] };
45
+
46
+ const tcIdx = buildToolCallIndex(msgs);
47
+ const keep = new Set<number>(msgs.map((_, i) => i));
48
+ const reasonMap = new Map<string, number>();
49
+
50
+ // ── 1. Duplicate file reads: keep only last read per file ──
51
+ const readIndices = new Map<string, number[]>(); // filepath → [indices of toolResult]
52
+ for (let i = 0; i < msgs.length; i++) {
53
+ if (msgs[i].role !== "toolResult") continue;
54
+ const tc = tcIdx.get(msgs[i].toolCallId ?? "");
55
+ if (!tc || tc.name !== "read") continue;
56
+ const fp = (tc.args?.path ?? tc.args?.file_path) as string | undefined;
57
+ if (!fp) continue;
58
+ const arr = readIndices.get(fp) ?? [];
59
+ arr.push(i);
60
+ readIndices.set(fp, arr);
61
+ }
62
+ for (const [fp, indices] of readIndices) {
63
+ // Keep last read, prune the rest
64
+ for (let j = 0; j < indices.length - 1; j++) {
65
+ keep.delete(indices[j]);
66
+ // Also prune the corresponding assistant tool call message
67
+ const tc = tcIdx.get(msgs[indices[j]].toolCallId ?? "");
68
+ if (tc) keep.delete(tc.msgIndex);
69
+ }
70
+ if (indices.length > 1) {
71
+ reasonMap.set("Duplicate file reads", (reasonMap.get("Duplicate file reads") ?? 0) + indices.length - 1);
72
+ }
73
+ }
74
+
75
+ // ── 2. Failed → retry → success chains: keep first failure + success only ──
76
+ const failedToolResults: Array<{ index: number; tool: string; tcIndex: number }> = [];
77
+ for (let i = 0; i < msgs.length; i++) {
78
+ if (msgs[i].role !== "toolResult" || !msgs[i].isError) continue;
79
+ const tc = tcIdx.get(msgs[i].toolCallId ?? "");
80
+ failedToolResults.push({ index: i, tool: tc?.name ?? "unknown", tcIndex: tc?.msgIndex ?? -1 });
81
+ }
82
+ // Group consecutive failures of the same tool
83
+ let i = 0;
84
+ while (i < failedToolResults.length) {
85
+ const tool = failedToolResults[i].tool;
86
+ let j = i + 1;
87
+ while (j < failedToolResults.length && failedToolResults[j].tool === tool && failedToolResults[j].index - failedToolResults[j - 1].index < 10) {
88
+ j++;
89
+ }
90
+ // If 3+ consecutive failures of same tool, keep only first and last
91
+ if (j - i >= 3) {
92
+ for (let k = i + 1; k < j - 1; k++) {
93
+ keep.delete(failedToolResults[k].index);
94
+ if (failedToolResults[k].tcIndex >= 0) keep.delete(failedToolResults[k].tcIndex);
95
+ }
96
+ reasonMap.set("Collapsed error chains", (reasonMap.get("Collapsed error chains") ?? 0) + (j - i - 2));
97
+ }
98
+ i = j;
99
+ }
100
+
101
+ // ── 3. Agent acknowledgment messages: no informational content ──
102
+ for (let idx = 0; idx < msgs.length; idx++) {
103
+ if (msgs[idx].role !== "assistant") continue;
104
+ const blocks = (msgs[idx].content ?? []) as unknown[];
105
+ // Only consider messages that are pure text with no tool calls
106
+ const hasToolCall = blocks.some((b: any) => b?.type === "toolCall");
107
+ if (hasToolCall) continue;
108
+ const text = extractText(msgs[idx].content).trim();
109
+ if (text.length > 0 && text.length < 100 && ACK_RE.test(text)) {
110
+ keep.delete(idx);
111
+ reasonMap.set("Agent acknowledgments", (reasonMap.get("Agent acknowledgments") ?? 0) + 1);
112
+ }
113
+ }
114
+
115
+ // ── 4. Truncate long tool result outputs ──
116
+ // (Applied as content modification, not message removal)
117
+ const kept = msgs.map((m, idx) => {
118
+ if (!keep.has(idx)) return null;
119
+ if (m.role !== "toolResult") return m;
120
+ const text = extractText(m.content);
121
+ if (text.length > MAX_TOOL_OUTPUT_CHARS) {
122
+ // Keep first 400 chars + last 400 chars with truncation marker
123
+ const head = text.slice(0, 400);
124
+ const tail = text.slice(-400);
125
+ const truncated = head + "\n... [truncated " + (text.length - 800) + " chars] ...\n" + tail;
126
+ return { ...m, content: [{ type: "text" as const, text: truncated }] };
127
+ }
128
+ return m;
129
+ });
130
+
131
+ // Build final message list, preserving order
132
+ const finalMsgs = kept.filter((m): m is LlmMessage => m !== null);
133
+ const prunedCount = msgs.length - finalMsgs.length;
134
+
135
+ // Estimate token saving
136
+ const originalTokens = estimateTokens(msgs.map(m => extractText(m.content)).join(""));
137
+ const prunedTokens = estimateTokens(finalMsgs.map(m => extractText(m.content)).join(""));
138
+
139
+ const reasons = [...reasonMap.entries()].map(([reason, count]) => ({ count, reason }));
140
+
141
+ return {
142
+ messages: finalMsgs,
143
+ prunedCount,
144
+ prunedTokenSaving: Math.max(0, originalTokens - prunedTokens),
145
+ reasons,
146
+ };
147
+ }
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Token estimation with provider-specific ratios and EMA calibration.
3
+ */
4
+
5
+ import { CHARS_PER_TOKEN } from "../constants.ts";
6
+ import type { ProviderCapabilities } from "../types.ts";
7
+
8
+ const PROVIDER_MAP: Record<string, ProviderCapabilities> = {
9
+ "zai-anthropic": {
10
+ maxOutputTokens: 8192, supportsTools: "probe", jsonReliability: "high",
11
+ instructionFollowing: "high", tokenRatioEstimate: 3.5, concurrencyLimit: 3,
12
+ cacheStrategy: "anthropic",
13
+ },
14
+ "minimax": {
15
+ maxOutputTokens: 4096, supportsTools: "probe", jsonReliability: "medium",
16
+ instructionFollowing: "medium", tokenRatioEstimate: 3.8, concurrencyLimit: 2,
17
+ cacheStrategy: "anthropic",
18
+ },
19
+ "xiaomi-token-plan": {
20
+ maxOutputTokens: 8192, supportsTools: "probe", jsonReliability: "medium",
21
+ instructionFollowing: "medium", tokenRatioEstimate: 3.3, concurrencyLimit: 2,
22
+ cacheStrategy: "openai",
23
+ },
24
+ "openai": {
25
+ maxOutputTokens: 16384, supportsTools: true, jsonReliability: "high",
26
+ instructionFollowing: "high", tokenRatioEstimate: 4.0, concurrencyLimit: 5,
27
+ cacheStrategy: "openai",
28
+ },
29
+ };
30
+
31
+ export function getProviderCaps(provider: string): ProviderCapabilities {
32
+ return PROVIDER_MAP[provider] ?? {
33
+ maxOutputTokens: 8192, supportsTools: "probe", jsonReliability: "medium",
34
+ instructionFollowing: "medium", tokenRatioEstimate: 3.8, concurrencyLimit: 2,
35
+ cacheStrategy: "none",
36
+ };
37
+ }
38
+
39
+ // Per-provider calibration to avoid cross-session bleed.
40
+ const _calibrationFactors = new Map<string, number>();
41
+
42
+ function getCalibrationFactor(provider?: string): number {
43
+ if (!provider) return 1.0;
44
+ return _calibrationFactors.get(provider) ?? 1.0;
45
+ }
46
+
47
+ export function estimateTokens(text: string, provider?: string): number {
48
+ const baseRatio = provider ? getProviderCaps(provider).tokenRatioEstimate : CHARS_PER_TOKEN;
49
+ // JSON content has denser tokenization (brackets, quotes, escapes)
50
+ const jsonPenalty = text.startsWith("[") || text.startsWith("{") ? 0.85 : 1.0;
51
+ // Turkish/CE characters tokenize differently (multi-byte in some tokenizers)
52
+ const langPenalty = /[çğıöşüÇĞİÖŞÜ]/.test(text) ? 0.9 : 1.0;
53
+ const calibration = getCalibrationFactor(provider);
54
+ return Math.ceil((text.length / baseRatio) * jsonPenalty * langPenalty * calibration);
55
+ }
56
+
57
+ export function calibrateFromResponse(estimated: number, actual: number, provider?: string): void {
58
+ if (actual > 0 && estimated > 0 && provider) {
59
+ const prev = _calibrationFactors.get(provider) ?? 1.0;
60
+ const sample = actual / estimated;
61
+ _calibrationFactors.set(provider, prev * 0.7 + sample * 0.3); // EMA smoothing
62
+ }
63
+ }