@grapine.ai/contextprune 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +426 -1
- package/dist/cli/commands/analyze.d.ts +2 -0
- package/dist/cli/commands/analyze.js +161 -0
- package/dist/cli/commands/compress.d.ts +2 -0
- package/dist/cli/commands/compress.js +65 -0
- package/dist/cli/commands/watch.d.ts +2 -0
- package/dist/cli/commands/watch.js +432 -0
- package/dist/cli/dashboard/index.html +720 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +19 -0
- package/dist/cli/labels.d.ts +4 -0
- package/dist/cli/labels.js +35 -0
- package/dist/cli/parse-input.d.ts +33 -0
- package/dist/cli/parse-input.js +191 -0
- package/dist/src/brief/index.d.ts +2 -0
- package/dist/src/brief/index.js +101 -0
- package/dist/src/classifier/confidence.d.ts +4 -0
- package/dist/src/classifier/confidence.js +23 -0
- package/dist/src/classifier/index.d.ts +11 -0
- package/dist/src/classifier/index.js +217 -0
- package/dist/src/classifier/patterns.d.ts +7 -0
- package/dist/src/classifier/patterns.js +81 -0
- package/dist/src/compression/engine.d.ts +23 -0
- package/dist/src/compression/engine.js +363 -0
- package/dist/src/index.d.ts +41 -0
- package/dist/src/index.js +120 -0
- package/dist/src/pipeline/index.d.ts +5 -0
- package/dist/src/pipeline/index.js +167 -0
- package/dist/src/scorer/index.d.ts +4 -0
- package/dist/src/scorer/index.js +136 -0
- package/dist/src/scorer/session-extractor.d.ts +2 -0
- package/dist/src/scorer/session-extractor.js +57 -0
- package/dist/src/strategy/selector.d.ts +3 -0
- package/dist/src/strategy/selector.js +158 -0
- package/dist/src/tokenizer/index.d.ts +18 -0
- package/dist/src/tokenizer/index.js +195 -0
- package/dist/src/types.d.ts +161 -0
- package/dist/src/types.js +5 -0
- package/dist/src/utils/index.d.ts +4 -0
- package/dist/src/utils/index.js +48 -0
- package/dist/src/validation/coherence.d.ts +3 -0
- package/dist/src/validation/coherence.js +87 -0
- package/license.md +14 -0
- package/package.json +77 -41
- package/screenshots/cp_dashboard_compression.jpg +0 -0
- package/screenshots/cp_dashboard_healthy.jpg +0 -0
- package/index.js +0 -1
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// src/pipeline/index.ts
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.runPipeline = runPipeline;
|
|
5
|
+
const classifier_1 = require("../classifier");
|
|
6
|
+
const scorer_1 = require("../scorer");
|
|
7
|
+
const session_extractor_1 = require("../scorer/session-extractor");
|
|
8
|
+
const selector_1 = require("../strategy/selector");
|
|
9
|
+
const engine_1 = require("../compression/engine");
|
|
10
|
+
const coherence_1 = require("../validation/coherence");
|
|
11
|
+
const utils_1 = require("../utils");
|
|
12
|
+
const brief_1 = require("../brief");
|
|
13
|
+
async function runPipeline(messages, model, options = {}) {
|
|
14
|
+
const sessionId = (0, utils_1.generateId)();
|
|
15
|
+
// Stage 1: Classify
|
|
16
|
+
const annotated = await (0, classifier_1.classifyAll)(messages, model);
|
|
17
|
+
// Stage 2: Extract session state + score
|
|
18
|
+
const sessionState = (0, session_extractor_1.extractSessionState)(annotated, model, options.actualInputTokens);
|
|
19
|
+
const scored = (0, scorer_1.scoreAll)(annotated, sessionState);
|
|
20
|
+
// Stage 3: Select strategies
|
|
21
|
+
// manual mode = always compress; use 0.70 so the strategy table fires for stale
|
|
22
|
+
// content but does NOT escalate fresh active tool outputs (escalation needs >= 0.80)
|
|
23
|
+
const effectiveUtilization = options.compressionMode === 'manual'
|
|
24
|
+
? Math.max(sessionState.tokenBudget.utilizationPercent, 0.70)
|
|
25
|
+
: sessionState.tokenBudget.utilizationPercent;
|
|
26
|
+
const withStrategies = (0, selector_1.selectAllStrategies)(scored, effectiveUtilization);
|
|
27
|
+
// Stage 4: Compress — skip if suggest-only or auto mode hasn't hit threshold
|
|
28
|
+
const suggestOnly = options.compressionMode === 'suggest-only';
|
|
29
|
+
const utilization = sessionState.tokenBudget.utilizationPercent;
|
|
30
|
+
const warningThreshold = options.warningThreshold ?? 0.65;
|
|
31
|
+
const autoSkip = options.compressionMode === 'auto' && utilization < warningThreshold;
|
|
32
|
+
const compressionResult = (suggestOnly || autoSkip)
|
|
33
|
+
? null
|
|
34
|
+
: await (0, engine_1.compress)(withStrategies, model, options.useLLMSummarization ?? false);
|
|
35
|
+
// Stage 5: Validate (only when compression ran)
|
|
36
|
+
let finalMessages = messages;
|
|
37
|
+
if (compressionResult) {
|
|
38
|
+
const validation = (0, coherence_1.validate)(annotated, compressionResult.compressed, compressionResult.event.decisions, sessionState);
|
|
39
|
+
finalMessages = validation.passed
|
|
40
|
+
? compressionResult.compressed
|
|
41
|
+
: (0, coherence_1.restoreMessages)(annotated, compressionResult.compressed, validation.blockers);
|
|
42
|
+
}
|
|
43
|
+
const tokensBefore = compressionResult?.event.tokensBefore
|
|
44
|
+
?? annotated.reduce((s, m) => s + m.tokenCount, 0);
|
|
45
|
+
// When compression didn't run, estimate savings from selected strategies
|
|
46
|
+
const projectedSaved = withStrategies.reduce((sum, msg) => {
|
|
47
|
+
switch (msg.compressionStrategy) {
|
|
48
|
+
case 'DROP':
|
|
49
|
+
case 'DEDUPLICATE': return sum + msg.tokenCount;
|
|
50
|
+
case 'COLLAPSE_TO_MARKER': return sum + Math.max(0, msg.tokenCount - 15);
|
|
51
|
+
case 'EXTRACT_RESULT': return sum + Math.floor(msg.tokenCount * 0.5);
|
|
52
|
+
case 'SUMMARIZE': return sum + Math.floor(msg.tokenCount * 0.65);
|
|
53
|
+
default: return sum;
|
|
54
|
+
}
|
|
55
|
+
}, 0);
|
|
56
|
+
const tokensSaved = compressionResult?.event.tokensSaved ?? projectedSaved;
|
|
57
|
+
const tokensAfter = compressionResult?.event.tokensAfter ?? (tokensBefore - tokensSaved);
|
|
58
|
+
// Assemble outputs
|
|
59
|
+
const compressed = {
|
|
60
|
+
messages: finalMessages,
|
|
61
|
+
summary: {
|
|
62
|
+
tokensBefore,
|
|
63
|
+
tokensAfter,
|
|
64
|
+
tokensSaved,
|
|
65
|
+
savingsPercent: tokensBefore > 0 ? tokensSaved / tokensBefore : 0,
|
|
66
|
+
messagesRemoved: messages.length - finalMessages.length,
|
|
67
|
+
compressionTriggered: compressionResult !== null,
|
|
68
|
+
},
|
|
69
|
+
sessionId,
|
|
70
|
+
};
|
|
71
|
+
const decisions = compressionResult?.event.decisions ?? [];
|
|
72
|
+
const sessionBrief = (0, brief_1.generateSessionBrief)(annotated);
|
|
73
|
+
const analysis = {
|
|
74
|
+
sessionId,
|
|
75
|
+
annotatedMessages: annotated,
|
|
76
|
+
breakdown: buildBreakdown(annotated),
|
|
77
|
+
topConsumers: buildTopConsumers(annotated),
|
|
78
|
+
recommendation: buildRecommendation(sessionState, { tokensBefore, tokensSaved }),
|
|
79
|
+
sessionState,
|
|
80
|
+
sessionBrief,
|
|
81
|
+
compressionHistory: [{
|
|
82
|
+
eventId: (0, utils_1.generateId)(),
|
|
83
|
+
timestamp: new Date(),
|
|
84
|
+
trigger: 'api_call',
|
|
85
|
+
messagesEvaluated: annotated.length,
|
|
86
|
+
messagesCompressed: decisions.filter(d => d.strategy !== 'PRESERVE').length,
|
|
87
|
+
messagesDropped: decisions.filter(d => d.strategy === 'DROP' || d.strategy === 'DEDUPLICATE').length,
|
|
88
|
+
messagesPreserved: decisions.filter(d => d.strategy === 'PRESERVE').length,
|
|
89
|
+
tokensBefore,
|
|
90
|
+
tokensAfter,
|
|
91
|
+
tokensSaved,
|
|
92
|
+
savingsPercent: tokensBefore > 0 ? tokensSaved / tokensBefore : 0,
|
|
93
|
+
decisions,
|
|
94
|
+
}],
|
|
95
|
+
};
|
|
96
|
+
return { compressed, analysis };
|
|
97
|
+
}
|
|
98
|
+
// ─── Output Builders ──────────────────────────────────────────────────────────
|
|
99
|
+
function buildBreakdown(annotated) {
|
|
100
|
+
const byClassification = {};
|
|
101
|
+
const byCompressionStrategy = {};
|
|
102
|
+
const totalTokens = annotated.reduce((s, m) => s + m.tokenCount, 0);
|
|
103
|
+
for (const msg of annotated) {
|
|
104
|
+
if (!byClassification[msg.classification]) {
|
|
105
|
+
byClassification[msg.classification] = { count: 0, tokens: 0, percentOfTotal: 0 };
|
|
106
|
+
}
|
|
107
|
+
byClassification[msg.classification].count++;
|
|
108
|
+
byClassification[msg.classification].tokens += msg.tokenCount;
|
|
109
|
+
byClassification[msg.classification].percentOfTotal =
|
|
110
|
+
byClassification[msg.classification].tokens / (totalTokens || 1);
|
|
111
|
+
if (!byCompressionStrategy[msg.compressionStrategy]) {
|
|
112
|
+
byCompressionStrategy[msg.compressionStrategy] = { count: 0, tokensBefore: 0, tokensAfter: 0 };
|
|
113
|
+
}
|
|
114
|
+
byCompressionStrategy[msg.compressionStrategy].count++;
|
|
115
|
+
byCompressionStrategy[msg.compressionStrategy].tokensBefore += msg.tokenCount;
|
|
116
|
+
byCompressionStrategy[msg.compressionStrategy].tokensAfter += msg.compressedTokenCount ?? msg.tokenCount;
|
|
117
|
+
}
|
|
118
|
+
return { byClassification, byCompressionStrategy };
|
|
119
|
+
}
|
|
120
|
+
function buildTopConsumers(annotated) {
|
|
121
|
+
const totalTokens = annotated.reduce((s, m) => s + m.tokenCount, 0);
|
|
122
|
+
return [...annotated]
|
|
123
|
+
.sort((a, b) => b.tokenCount - a.tokenCount)
|
|
124
|
+
.slice(0, 10)
|
|
125
|
+
.map((msg) => ({
|
|
126
|
+
messageId: msg.id,
|
|
127
|
+
originalIndex: msg.originalIndex,
|
|
128
|
+
classification: msg.classification,
|
|
129
|
+
tokenCount: msg.tokenCount,
|
|
130
|
+
percentOfTotal: msg.tokenCount / (totalTokens || 1),
|
|
131
|
+
compressionOpportunity: getOpportunity(msg.compressionStrategy),
|
|
132
|
+
reason: getOpportunityReason(msg.classification),
|
|
133
|
+
}));
|
|
134
|
+
}
|
|
135
|
+
function getOpportunity(strategy) {
|
|
136
|
+
if (strategy === 'PRESERVE')
|
|
137
|
+
return 'none';
|
|
138
|
+
if (strategy === 'DROP' || strategy === 'EXTRACT_RESULT')
|
|
139
|
+
return 'high';
|
|
140
|
+
if (strategy === 'COLLAPSE_TO_MARKER' || strategy === 'SUMMARIZE')
|
|
141
|
+
return 'medium';
|
|
142
|
+
return 'low';
|
|
143
|
+
}
|
|
144
|
+
function getOpportunityReason(classification) {
|
|
145
|
+
const reasons = {
|
|
146
|
+
TOOL_OUTPUT_STALE: 'Stale tool output — no longer referenced',
|
|
147
|
+
ERROR_RESOLVED: 'Resolved error — stack trace no longer needed',
|
|
148
|
+
REASONING_INTERMEDIATE: 'Intermediate reasoning — conclusion already reached',
|
|
149
|
+
PROGRESS_MARKER: 'Completed step — collapsible to single line',
|
|
150
|
+
CONVERSATIONAL: 'General exchange — low active relevance',
|
|
151
|
+
};
|
|
152
|
+
return reasons[classification] ?? 'Low relevance score';
|
|
153
|
+
}
|
|
154
|
+
function buildRecommendation(sessionState, result) {
|
|
155
|
+
const util = sessionState.tokenBudget.utilizationPercent;
|
|
156
|
+
const savings = result.tokensBefore > 0 ? result.tokensSaved / result.tokensBefore : 0;
|
|
157
|
+
if (util < 0.50) {
|
|
158
|
+
return { shouldCompress: false, urgency: 'none', projectedSavings: result.tokensSaved, projectedSavingsPercent: savings, message: `Context is ${Math.round(util * 100)}% full. No compression needed yet.` };
|
|
159
|
+
}
|
|
160
|
+
if (util < 0.65) {
|
|
161
|
+
return { shouldCompress: false, urgency: 'suggested', projectedSavings: result.tokensSaved, projectedSavingsPercent: savings, message: `Context is ${Math.round(util * 100)}% full. Compression available but not urgent.` };
|
|
162
|
+
}
|
|
163
|
+
if (util < 0.80) {
|
|
164
|
+
return { shouldCompress: true, urgency: 'recommended', projectedSavings: result.tokensSaved, projectedSavingsPercent: savings, message: `Context is ${Math.round(util * 100)}% full. Recommend compressing — ${Math.round(savings * 100)}% savings available.` };
|
|
165
|
+
}
|
|
166
|
+
return { shouldCompress: true, urgency: 'critical', projectedSavings: result.tokensSaved, projectedSavingsPercent: savings, message: `Context is ${Math.round(util * 100)}% full. Compress now — model quality is degrading. ${Math.round(savings * 100)}% savings available.` };
|
|
167
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { AnnotatedMessage, SessionState, RelevanceTier } from '../types';
|
|
2
|
+
export declare function scoreTierFromScore(score: number): RelevanceTier;
|
|
3
|
+
export declare function scoreRelevance(message: AnnotatedMessage, allMessages: AnnotatedMessage[], sessionState: SessionState): number;
|
|
4
|
+
export declare function scoreAll(messages: AnnotatedMessage[], sessionState: SessionState): AnnotatedMessage[];
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// src/scorer/index.ts
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.scoreTierFromScore = scoreTierFromScore;
|
|
5
|
+
exports.scoreRelevance = scoreRelevance;
|
|
6
|
+
exports.scoreAll = scoreAll;
|
|
7
|
+
const utils_1 = require("../utils");
|
|
8
|
+
const classifier_1 = require("../classifier");
|
|
9
|
+
// ─── Type Weight Table ────────────────────────────────────────────────────────
|
|
10
|
+
const TYPE_WEIGHTS = {
|
|
11
|
+
SYSTEM_CONSTRAINT: 1.0,
|
|
12
|
+
USER_CORRECTION: 1.0,
|
|
13
|
+
TASK_DEFINITION: 1.0,
|
|
14
|
+
ERROR_ACTIVE: 1.0,
|
|
15
|
+
TOOL_OUTPUT_ACTIVE: 0.80,
|
|
16
|
+
DECISION_FINAL: 0.75,
|
|
17
|
+
PROGRESS_MARKER: 0.50,
|
|
18
|
+
CONVERSATIONAL: 0.45,
|
|
19
|
+
TOOL_OUTPUT_STALE: 0.20,
|
|
20
|
+
ERROR_RESOLVED: 0.15,
|
|
21
|
+
REASONING_INTERMEDIATE: 0.20,
|
|
22
|
+
};
|
|
23
|
+
const WEIGHTS = {
|
|
24
|
+
recency: 0.25,
|
|
25
|
+
references: 0.30,
|
|
26
|
+
taskProximity: 0.20,
|
|
27
|
+
typeWeight: 0.25,
|
|
28
|
+
};
|
|
29
|
+
// ─── Factor: Recency ──────────────────────────────────────────────────────────
|
|
30
|
+
function recencyScore(messageIndex, totalMessages, classification) {
|
|
31
|
+
if ((0, classifier_1.isPinned)(classification))
|
|
32
|
+
return 1.0;
|
|
33
|
+
const turnsFromEnd = totalMessages - 1 - messageIndex;
|
|
34
|
+
// Asymptotic decay: 1 / (1 + k * turnsFromEnd)
|
|
35
|
+
const k = 0.15;
|
|
36
|
+
return 1 / (1 + k * turnsFromEnd);
|
|
37
|
+
}
|
|
38
|
+
// ─── Factor: References ───────────────────────────────────────────────────────
|
|
39
|
+
function directlyReferences(source, later) {
|
|
40
|
+
// Method 1: Tool use ID (exact)
|
|
41
|
+
const sourceContent = Array.isArray(source.original.content)
|
|
42
|
+
? source.original.content
|
|
43
|
+
: [];
|
|
44
|
+
const sourceToolId = sourceContent.find((b) => b.id || b.tool_use_id);
|
|
45
|
+
if (sourceToolId) {
|
|
46
|
+
const id = sourceToolId.id ?? sourceToolId.tool_use_id;
|
|
47
|
+
if (id && JSON.stringify(later.original.content).includes(id))
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
// Method 2: Keyword overlap for short messages
|
|
51
|
+
if (source.tokenCount < 200) {
|
|
52
|
+
const sourceKws = (0, utils_1.extractKeywords)((0, utils_1.extractText)(source.original));
|
|
53
|
+
const laterKws = (0, utils_1.extractKeywords)((0, utils_1.extractText)(later.original));
|
|
54
|
+
const overlap = sourceKws.filter((k) => laterKws.includes(k));
|
|
55
|
+
const ratio = overlap.length / Math.max(sourceKws.length, 1);
|
|
56
|
+
if (ratio > 0.35)
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
function countDirectReferences(message, allMessages) {
|
|
62
|
+
const later = allMessages.slice(message.originalIndex + 1);
|
|
63
|
+
return later.filter((m) => directlyReferences(message, m)).length;
|
|
64
|
+
}
|
|
65
|
+
function referenceScore(message, allMessages) {
|
|
66
|
+
const later = allMessages.slice(message.originalIndex + 1);
|
|
67
|
+
if (later.length === 0)
|
|
68
|
+
return 1.0; // most recent message — assumed highly relevant
|
|
69
|
+
// Fast path: skip expensive check for very old messages
|
|
70
|
+
const turnsFromEnd = allMessages.length - message.originalIndex;
|
|
71
|
+
if (turnsFromEnd > 40)
|
|
72
|
+
return 0.0;
|
|
73
|
+
const refCount = countDirectReferences(message, allMessages);
|
|
74
|
+
if (refCount === 0)
|
|
75
|
+
return 0.0;
|
|
76
|
+
return Math.min(1.0, 0.5 + (Math.log(refCount + 1) / Math.log(4)) * 0.5);
|
|
77
|
+
}
|
|
78
|
+
// ─── Factor: Task Proximity ───────────────────────────────────────────────────
|
|
79
|
+
function taskProximityScore(message, sessionState) {
|
|
80
|
+
const messageText = (0, utils_1.extractText)(message.original).toLowerCase();
|
|
81
|
+
let score = 0.3;
|
|
82
|
+
if (sessionState.taskContext.currentSubtask) {
|
|
83
|
+
const subtaskKws = (0, utils_1.extractKeywords)(sessionState.taskContext.currentSubtask);
|
|
84
|
+
const messageKws = (0, utils_1.extractKeywords)(messageText);
|
|
85
|
+
const overlap = subtaskKws.filter((k) => messageKws.includes(k));
|
|
86
|
+
score += Math.min(0.4, overlap.length * 0.15);
|
|
87
|
+
}
|
|
88
|
+
const techStack = sessionState.taskContext.technologyStack;
|
|
89
|
+
if (techStack.some((t) => messageText.includes(t.toLowerCase()))) {
|
|
90
|
+
score += 0.15;
|
|
91
|
+
}
|
|
92
|
+
const mentionsCompleted = sessionState.taskContext.completedSubtasks.some((task) => (0, utils_1.extractKeywords)(task).some((k) => messageText.includes(k)));
|
|
93
|
+
if (mentionsCompleted)
|
|
94
|
+
score -= 0.10;
|
|
95
|
+
return Math.max(0, Math.min(1.0, score));
|
|
96
|
+
}
|
|
97
|
+
// ─── Tier Conversion ──────────────────────────────────────────────────────────
|
|
98
|
+
function scoreTierFromScore(score) {
|
|
99
|
+
if (score >= 0.85)
|
|
100
|
+
return 'CRITICAL';
|
|
101
|
+
if (score >= 0.65)
|
|
102
|
+
return 'HIGH';
|
|
103
|
+
if (score >= 0.40)
|
|
104
|
+
return 'MEDIUM';
|
|
105
|
+
if (score >= 0.20)
|
|
106
|
+
return 'LOW';
|
|
107
|
+
return 'NEGLIGIBLE';
|
|
108
|
+
}
|
|
109
|
+
// ─── Composite Scorer ─────────────────────────────────────────────────────────
|
|
110
|
+
function scoreRelevance(message, allMessages, sessionState) {
|
|
111
|
+
if (message.relevanceFactors.isPinned)
|
|
112
|
+
return 1.0;
|
|
113
|
+
const recency = recencyScore(message.originalIndex, allMessages.length, message.classification);
|
|
114
|
+
const references = referenceScore(message, allMessages);
|
|
115
|
+
const proximity = taskProximityScore(message, sessionState);
|
|
116
|
+
const typeWeight = TYPE_WEIGHTS[message.classification];
|
|
117
|
+
return (recency * WEIGHTS.recency +
|
|
118
|
+
references * WEIGHTS.references +
|
|
119
|
+
proximity * WEIGHTS.taskProximity +
|
|
120
|
+
typeWeight * WEIGHTS.typeWeight);
|
|
121
|
+
}
|
|
122
|
+
// ─── Batch Scorer ─────────────────────────────────────────────────────────────
|
|
123
|
+
function scoreAll(messages, sessionState) {
|
|
124
|
+
for (const message of messages) {
|
|
125
|
+
const score = scoreRelevance(message, messages, sessionState);
|
|
126
|
+
const refCount = countDirectReferences(message, messages);
|
|
127
|
+
message.relevanceScore = score;
|
|
128
|
+
message.relevanceFactors = {
|
|
129
|
+
...message.relevanceFactors,
|
|
130
|
+
recencyScore: recencyScore(message.originalIndex, messages.length, message.classification),
|
|
131
|
+
referenceCount: refCount,
|
|
132
|
+
taskProximity: taskProximityScore(message, sessionState),
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
return messages;
|
|
136
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// src/scorer/session-extractor.ts
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.extractSessionState = extractSessionState;
|
|
5
|
+
const utils_1 = require("../utils");
|
|
6
|
+
const tokenizer_1 = require("../tokenizer");
|
|
7
|
+
const utils_2 = require("../utils");
|
|
8
|
+
const patterns_1 = require("../classifier/patterns");
|
|
9
|
+
function extractSessionState(messages, model, actualInputTokens) {
|
|
10
|
+
const taskDef = messages.find((m) => m.classification === 'TASK_DEFINITION');
|
|
11
|
+
const allProgress = messages.filter((m) => m.classification === 'PROGRESS_MARKER');
|
|
12
|
+
const lastProgress = allProgress[allProgress.length - 1];
|
|
13
|
+
const lastUserMessage = [...messages].reverse().find((m) => m.original.role === 'user');
|
|
14
|
+
const completedSubtasks = allProgress
|
|
15
|
+
.slice(0, -1)
|
|
16
|
+
.map((m) => (0, utils_1.extractText)(m.original));
|
|
17
|
+
const activeConstraints = messages
|
|
18
|
+
.filter((m) => m.classification === 'USER_CORRECTION' ||
|
|
19
|
+
m.classification === 'SYSTEM_CONSTRAINT')
|
|
20
|
+
.map((m) => (0, utils_1.extractText)(m.original));
|
|
21
|
+
const allText = messages.map((m) => (0, utils_1.extractText)(m.original)).join(' ').toLowerCase();
|
|
22
|
+
const technologyStack = detectTechStack(allText);
|
|
23
|
+
const computedUsage = messages.reduce((sum, m) => sum + m.tokenCount, 0);
|
|
24
|
+
const currentUsage = actualInputTokens ?? computedUsage;
|
|
25
|
+
const modelMaxTokens = (0, tokenizer_1.getModelMaxTokens)(model);
|
|
26
|
+
return {
|
|
27
|
+
sessionId: (0, utils_2.generateId)(),
|
|
28
|
+
createdAt: new Date(),
|
|
29
|
+
lastUpdatedAt: new Date(),
|
|
30
|
+
taskContext: {
|
|
31
|
+
originalGoal: taskDef ? (0, utils_1.extractText)(taskDef.original) : null,
|
|
32
|
+
currentSubtask: lastProgress
|
|
33
|
+
? (0, utils_1.extractText)(lastProgress.original)
|
|
34
|
+
: lastUserMessage
|
|
35
|
+
? (0, utils_1.extractText)(lastUserMessage.original)
|
|
36
|
+
: null,
|
|
37
|
+
completedSubtasks,
|
|
38
|
+
activeConstraints,
|
|
39
|
+
technologyStack,
|
|
40
|
+
},
|
|
41
|
+
tokenBudget: {
|
|
42
|
+
modelMaxTokens,
|
|
43
|
+
currentUsage,
|
|
44
|
+
utilizationPercent: currentUsage / modelMaxTokens,
|
|
45
|
+
warningThreshold: 0.65,
|
|
46
|
+
criticalThreshold: 0.80,
|
|
47
|
+
},
|
|
48
|
+
messages: new Map(messages.map((m) => [m.id, m])),
|
|
49
|
+
messageOrder: messages.map((m) => m.id),
|
|
50
|
+
compressionHistory: [],
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
function detectTechStack(text) {
|
|
54
|
+
return Object.entries(patterns_1.TECH_STACK_SIGNALS)
|
|
55
|
+
.filter(([, pattern]) => pattern.test(text))
|
|
56
|
+
.map(([name]) => name);
|
|
57
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { AnnotatedMessage, CompressionStrategy } from '../types';
|
|
2
|
+
export declare function selectStrategy(message: AnnotatedMessage, utilizationPercent: number): CompressionStrategy;
|
|
3
|
+
export declare function selectAllStrategies(messages: AnnotatedMessage[], utilizationPercent: number): AnnotatedMessage[];
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// src/strategy/selector.ts
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.selectStrategy = selectStrategy;
|
|
5
|
+
exports.selectAllStrategies = selectAllStrategies;
|
|
6
|
+
const confidence_1 = require("../classifier/confidence");
|
|
7
|
+
const scorer_1 = require("../scorer");
|
|
8
|
+
const classifier_1 = require("../classifier");
|
|
9
|
+
// ─── Decision Table ───────────────────────────────────────────────────────────
|
|
10
|
+
function key(c, r, conf) {
|
|
11
|
+
return `${c}::${r}::${conf}`;
|
|
12
|
+
}
|
|
13
|
+
const STRATEGY_TABLE = {
|
|
14
|
+
// SYSTEM_CONSTRAINT — always preserve
|
|
15
|
+
[key('SYSTEM_CONSTRAINT', 'CRITICAL', 'HIGH')]: 'PRESERVE',
|
|
16
|
+
[key('SYSTEM_CONSTRAINT', 'CRITICAL', 'MEDIUM')]: 'PRESERVE',
|
|
17
|
+
[key('SYSTEM_CONSTRAINT', 'CRITICAL', 'LOW')]: 'PRESERVE',
|
|
18
|
+
// USER_CORRECTION — never touch
|
|
19
|
+
[key('USER_CORRECTION', 'CRITICAL', 'HIGH')]: 'PRESERVE',
|
|
20
|
+
[key('USER_CORRECTION', 'CRITICAL', 'MEDIUM')]: 'PRESERVE',
|
|
21
|
+
[key('USER_CORRECTION', 'CRITICAL', 'LOW')]: 'PRESERVE',
|
|
22
|
+
// TASK_DEFINITION — preserve
|
|
23
|
+
[key('TASK_DEFINITION', 'CRITICAL', 'HIGH')]: 'PRESERVE',
|
|
24
|
+
[key('TASK_DEFINITION', 'CRITICAL', 'MEDIUM')]: 'PRESERVE',
|
|
25
|
+
[key('TASK_DEFINITION', 'CRITICAL', 'LOW')]: 'PRESERVE',
|
|
26
|
+
// ERROR_ACTIVE — always preserve (current problem)
|
|
27
|
+
[key('ERROR_ACTIVE', 'CRITICAL', 'HIGH')]: 'PRESERVE',
|
|
28
|
+
[key('ERROR_ACTIVE', 'CRITICAL', 'MEDIUM')]: 'PRESERVE',
|
|
29
|
+
[key('ERROR_ACTIVE', 'CRITICAL', 'LOW')]: 'PRESERVE',
|
|
30
|
+
// ERROR_RESOLVED — goldmine for savings
|
|
31
|
+
[key('ERROR_RESOLVED', 'CRITICAL', 'HIGH')]: 'PRESERVE',
|
|
32
|
+
[key('ERROR_RESOLVED', 'HIGH', 'HIGH')]: 'EXTRACT_RESULT',
|
|
33
|
+
[key('ERROR_RESOLVED', 'HIGH', 'MEDIUM')]: 'EXTRACT_RESULT',
|
|
34
|
+
[key('ERROR_RESOLVED', 'HIGH', 'LOW')]: 'PRESERVE',
|
|
35
|
+
[key('ERROR_RESOLVED', 'MEDIUM', 'HIGH')]: 'EXTRACT_RESULT',
|
|
36
|
+
[key('ERROR_RESOLVED', 'MEDIUM', 'MEDIUM')]: 'COLLAPSE_TO_MARKER',
|
|
37
|
+
[key('ERROR_RESOLVED', 'MEDIUM', 'LOW')]: 'PRESERVE',
|
|
38
|
+
[key('ERROR_RESOLVED', 'LOW', 'HIGH')]: 'COLLAPSE_TO_MARKER',
|
|
39
|
+
[key('ERROR_RESOLVED', 'LOW', 'MEDIUM')]: 'COLLAPSE_TO_MARKER',
|
|
40
|
+
[key('ERROR_RESOLVED', 'LOW', 'LOW')]: 'PRESERVE',
|
|
41
|
+
[key('ERROR_RESOLVED', 'NEGLIGIBLE', 'HIGH')]: 'DROP',
|
|
42
|
+
[key('ERROR_RESOLVED', 'NEGLIGIBLE', 'MEDIUM')]: 'COLLAPSE_TO_MARKER',
|
|
43
|
+
[key('ERROR_RESOLVED', 'NEGLIGIBLE', 'LOW')]: 'PRESERVE',
|
|
44
|
+
// TOOL_OUTPUT_STALE — second biggest savings source
|
|
45
|
+
[key('TOOL_OUTPUT_STALE', 'HIGH', 'HIGH')]: 'EXTRACT_RESULT',
|
|
46
|
+
[key('TOOL_OUTPUT_STALE', 'HIGH', 'MEDIUM')]: 'EXTRACT_RESULT',
|
|
47
|
+
[key('TOOL_OUTPUT_STALE', 'HIGH', 'LOW')]: 'PRESERVE',
|
|
48
|
+
[key('TOOL_OUTPUT_STALE', 'MEDIUM', 'HIGH')]: 'EXTRACT_RESULT',
|
|
49
|
+
[key('TOOL_OUTPUT_STALE', 'MEDIUM', 'MEDIUM')]: 'SUMMARIZE',
|
|
50
|
+
[key('TOOL_OUTPUT_STALE', 'MEDIUM', 'LOW')]: 'PRESERVE',
|
|
51
|
+
[key('TOOL_OUTPUT_STALE', 'LOW', 'HIGH')]: 'SUMMARIZE',
|
|
52
|
+
[key('TOOL_OUTPUT_STALE', 'LOW', 'MEDIUM')]: 'DEDUPLICATE',
|
|
53
|
+
[key('TOOL_OUTPUT_STALE', 'LOW', 'LOW')]: 'PRESERVE',
|
|
54
|
+
[key('TOOL_OUTPUT_STALE', 'NEGLIGIBLE', 'HIGH')]: 'DROP',
|
|
55
|
+
[key('TOOL_OUTPUT_STALE', 'NEGLIGIBLE', 'MEDIUM')]: 'SUMMARIZE',
|
|
56
|
+
[key('TOOL_OUTPUT_STALE', 'NEGLIGIBLE', 'LOW')]: 'PRESERVE',
|
|
57
|
+
// TOOL_OUTPUT_ACTIVE — light touch only
|
|
58
|
+
[key('TOOL_OUTPUT_ACTIVE', 'CRITICAL', 'HIGH')]: 'PRESERVE',
|
|
59
|
+
[key('TOOL_OUTPUT_ACTIVE', 'HIGH', 'HIGH')]: 'PRESERVE',
|
|
60
|
+
[key('TOOL_OUTPUT_ACTIVE', 'HIGH', 'MEDIUM')]: 'PRESERVE',
|
|
61
|
+
[key('TOOL_OUTPUT_ACTIVE', 'HIGH', 'LOW')]: 'PRESERVE',
|
|
62
|
+
[key('TOOL_OUTPUT_ACTIVE', 'MEDIUM', 'HIGH')]: 'EXTRACT_RESULT',
|
|
63
|
+
[key('TOOL_OUTPUT_ACTIVE', 'MEDIUM', 'MEDIUM')]: 'PRESERVE',
|
|
64
|
+
[key('TOOL_OUTPUT_ACTIVE', 'MEDIUM', 'LOW')]: 'PRESERVE',
|
|
65
|
+
[key('TOOL_OUTPUT_ACTIVE', 'LOW', 'HIGH')]: 'EXTRACT_RESULT',
|
|
66
|
+
[key('TOOL_OUTPUT_ACTIVE', 'LOW', 'MEDIUM')]: 'PRESERVE',
|
|
67
|
+
[key('TOOL_OUTPUT_ACTIVE', 'LOW', 'LOW')]: 'PRESERVE',
|
|
68
|
+
[key('TOOL_OUTPUT_ACTIVE', 'NEGLIGIBLE', 'HIGH')]: 'SUMMARIZE',
|
|
69
|
+
[key('TOOL_OUTPUT_ACTIVE', 'NEGLIGIBLE', 'MEDIUM')]: 'PRESERVE',
|
|
70
|
+
[key('TOOL_OUTPUT_ACTIVE', 'NEGLIGIBLE', 'LOW')]: 'PRESERVE',
|
|
71
|
+
// REASONING_INTERMEDIATE
|
|
72
|
+
[key('REASONING_INTERMEDIATE', 'HIGH', 'HIGH')]: 'PRESERVE',
|
|
73
|
+
[key('REASONING_INTERMEDIATE', 'HIGH', 'MEDIUM')]: 'PRESERVE',
|
|
74
|
+
[key('REASONING_INTERMEDIATE', 'HIGH', 'LOW')]: 'PRESERVE',
|
|
75
|
+
[key('REASONING_INTERMEDIATE', 'MEDIUM', 'HIGH')]: 'SUMMARIZE',
|
|
76
|
+
[key('REASONING_INTERMEDIATE', 'MEDIUM', 'MEDIUM')]: 'PRESERVE',
|
|
77
|
+
[key('REASONING_INTERMEDIATE', 'MEDIUM', 'LOW')]: 'PRESERVE',
|
|
78
|
+
[key('REASONING_INTERMEDIATE', 'LOW', 'HIGH')]: 'COLLAPSE_TO_MARKER',
|
|
79
|
+
[key('REASONING_INTERMEDIATE', 'LOW', 'MEDIUM')]: 'SUMMARIZE',
|
|
80
|
+
[key('REASONING_INTERMEDIATE', 'LOW', 'LOW')]: 'PRESERVE',
|
|
81
|
+
[key('REASONING_INTERMEDIATE', 'NEGLIGIBLE', 'HIGH')]: 'DROP',
|
|
82
|
+
[key('REASONING_INTERMEDIATE', 'NEGLIGIBLE', 'MEDIUM')]: 'COLLAPSE_TO_MARKER',
|
|
83
|
+
[key('REASONING_INTERMEDIATE', 'NEGLIGIBLE', 'LOW')]: 'PRESERVE',
|
|
84
|
+
// DECISION_FINAL
|
|
85
|
+
[key('DECISION_FINAL', 'CRITICAL', 'HIGH')]: 'PRESERVE',
|
|
86
|
+
[key('DECISION_FINAL', 'HIGH', 'HIGH')]: 'PRESERVE',
|
|
87
|
+
[key('DECISION_FINAL', 'HIGH', 'MEDIUM')]: 'PRESERVE',
|
|
88
|
+
[key('DECISION_FINAL', 'HIGH', 'LOW')]: 'PRESERVE',
|
|
89
|
+
[key('DECISION_FINAL', 'MEDIUM', 'HIGH')]: 'EXTRACT_RESULT',
|
|
90
|
+
[key('DECISION_FINAL', 'MEDIUM', 'MEDIUM')]: 'PRESERVE',
|
|
91
|
+
[key('DECISION_FINAL', 'MEDIUM', 'LOW')]: 'PRESERVE',
|
|
92
|
+
[key('DECISION_FINAL', 'LOW', 'HIGH')]: 'SUMMARIZE',
|
|
93
|
+
[key('DECISION_FINAL', 'LOW', 'MEDIUM')]: 'PRESERVE',
|
|
94
|
+
[key('DECISION_FINAL', 'LOW', 'LOW')]: 'PRESERVE',
|
|
95
|
+
[key('DECISION_FINAL', 'NEGLIGIBLE', 'HIGH')]: 'COLLAPSE_TO_MARKER',
|
|
96
|
+
[key('DECISION_FINAL', 'NEGLIGIBLE', 'MEDIUM')]: 'PRESERVE',
|
|
97
|
+
[key('DECISION_FINAL', 'NEGLIGIBLE', 'LOW')]: 'PRESERVE',
|
|
98
|
+
// PROGRESS_MARKER
|
|
99
|
+
[key('PROGRESS_MARKER', 'HIGH', 'HIGH')]: 'PRESERVE',
|
|
100
|
+
[key('PROGRESS_MARKER', 'HIGH', 'MEDIUM')]: 'PRESERVE',
|
|
101
|
+
[key('PROGRESS_MARKER', 'HIGH', 'LOW')]: 'PRESERVE',
|
|
102
|
+
[key('PROGRESS_MARKER', 'MEDIUM', 'HIGH')]: 'COLLAPSE_TO_MARKER',
|
|
103
|
+
[key('PROGRESS_MARKER', 'MEDIUM', 'MEDIUM')]: 'PRESERVE',
|
|
104
|
+
[key('PROGRESS_MARKER', 'MEDIUM', 'LOW')]: 'PRESERVE',
|
|
105
|
+
[key('PROGRESS_MARKER', 'LOW', 'HIGH')]: 'COLLAPSE_TO_MARKER',
|
|
106
|
+
[key('PROGRESS_MARKER', 'LOW', 'MEDIUM')]: 'COLLAPSE_TO_MARKER',
|
|
107
|
+
[key('PROGRESS_MARKER', 'LOW', 'LOW')]: 'PRESERVE',
|
|
108
|
+
[key('PROGRESS_MARKER', 'NEGLIGIBLE', 'HIGH')]: 'DROP',
|
|
109
|
+
[key('PROGRESS_MARKER', 'NEGLIGIBLE', 'MEDIUM')]: 'COLLAPSE_TO_MARKER',
|
|
110
|
+
[key('PROGRESS_MARKER', 'NEGLIGIBLE', 'LOW')]: 'PRESERVE',
|
|
111
|
+
// CONVERSATIONAL — preserve recent turns; collapse only truly stale ones
|
|
112
|
+
[key('CONVERSATIONAL', 'CRITICAL', 'HIGH')]: 'PRESERVE',
|
|
113
|
+
[key('CONVERSATIONAL', 'HIGH', 'HIGH')]: 'PRESERVE',
|
|
114
|
+
[key('CONVERSATIONAL', 'HIGH', 'MEDIUM')]: 'PRESERVE',
|
|
115
|
+
[key('CONVERSATIONAL', 'HIGH', 'LOW')]: 'PRESERVE',
|
|
116
|
+
[key('CONVERSATIONAL', 'MEDIUM', 'HIGH')]: 'PRESERVE',
|
|
117
|
+
[key('CONVERSATIONAL', 'MEDIUM', 'MEDIUM')]: 'PRESERVE',
|
|
118
|
+
[key('CONVERSATIONAL', 'MEDIUM', 'LOW')]: 'PRESERVE',
|
|
119
|
+
[key('CONVERSATIONAL', 'LOW', 'HIGH')]: 'PRESERVE',
|
|
120
|
+
[key('CONVERSATIONAL', 'LOW', 'MEDIUM')]: 'PRESERVE',
|
|
121
|
+
[key('CONVERSATIONAL', 'LOW', 'LOW')]: 'PRESERVE',
|
|
122
|
+
[key('CONVERSATIONAL', 'NEGLIGIBLE', 'HIGH')]: 'COLLAPSE_TO_MARKER',
|
|
123
|
+
[key('CONVERSATIONAL', 'NEGLIGIBLE', 'MEDIUM')]: 'COLLAPSE_TO_MARKER',
|
|
124
|
+
[key('CONVERSATIONAL', 'NEGLIGIBLE', 'LOW')]: 'PRESERVE',
|
|
125
|
+
};
|
|
126
|
+
// ─── Escalation at Critical Threshold ────────────────────────────────────────
|
|
127
|
+
function escalate(strategy, confidenceTier) {
|
|
128
|
+
if (confidenceTier === 'LOW')
|
|
129
|
+
return strategy;
|
|
130
|
+
const map = {
|
|
131
|
+
'PRESERVE': 'SUMMARIZE',
|
|
132
|
+
'SUMMARIZE': 'EXTRACT_RESULT',
|
|
133
|
+
'EXTRACT_RESULT': 'COLLAPSE_TO_MARKER',
|
|
134
|
+
'COLLAPSE_TO_MARKER': 'DROP',
|
|
135
|
+
};
|
|
136
|
+
return map[strategy] ?? strategy;
|
|
137
|
+
}
|
|
138
|
+
// ─── Selector ─────────────────────────────────────────────────────────────────
|
|
139
|
+
function selectStrategy(message, utilizationPercent) {
|
|
140
|
+
// Pinned types are NEVER compressed or escalated, regardless of utilization
|
|
141
|
+
if ((0, classifier_1.isPinned)(message.classification))
|
|
142
|
+
return 'PRESERVE';
|
|
143
|
+
const confidenceTier = (0, confidence_1.getConfidenceTier)(message.classificationConfidence);
|
|
144
|
+
const relevanceTier = (0, scorer_1.scoreTierFromScore)(message.relevanceScore);
|
|
145
|
+
const tableKey = key(message.classification, relevanceTier, confidenceTier);
|
|
146
|
+
const base = STRATEGY_TABLE[tableKey] ?? 'PRESERVE';
|
|
147
|
+
if (utilizationPercent >= 0.80) {
|
|
148
|
+
return escalate(base, confidenceTier);
|
|
149
|
+
}
|
|
150
|
+
return base;
|
|
151
|
+
}
|
|
152
|
+
// ─── Batch ────────────────────────────────────────────────────────────────────
|
|
153
|
+
function selectAllStrategies(messages, utilizationPercent) {
|
|
154
|
+
for (const message of messages) {
|
|
155
|
+
message.compressionStrategy = selectStrategy(message, utilizationPercent);
|
|
156
|
+
}
|
|
157
|
+
return messages;
|
|
158
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Exact token count using model-appropriate tokenizer.
|
|
3
|
+
* Falls back to character estimation if tokenizer unavailable.
|
|
4
|
+
*/
|
|
5
|
+
export declare function countTokens(text: string, model: string): Promise<number>;
|
|
6
|
+
/**
|
|
7
|
+
* Fast approximate count — used only for UI/real-time displays.
|
|
8
|
+
* Never use this for compression decisions.
|
|
9
|
+
*/
|
|
10
|
+
export declare function estimate(text: string): number;
|
|
11
|
+
/**
|
|
12
|
+
* Count tokens across an entire messages array.
|
|
13
|
+
*/
|
|
14
|
+
export declare function countMessagesTokens(messages: Array<{
|
|
15
|
+
role: string;
|
|
16
|
+
content: string | any[];
|
|
17
|
+
}>, model: string): Promise<number>;
|
|
18
|
+
export declare function getModelMaxTokens(model: string): number;
|