runcap 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -15
- package/bin/runcap.mjs +90 -6
- package/package.json +3 -3
- package/src/alerts.mjs +145 -0
- package/src/cloud.mjs +90 -0
- package/src/compressor.mjs +169 -0
- package/src/mission-control.mjs +496 -81
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
// Runcap token compressor — pure Node, no ML, no native deps.
|
|
2
|
+
//
|
|
3
|
+
// Headroom (the popular Python tool) proves the demand but pays for it with
|
|
4
|
+
// onnxruntime/HF model weights that break installs on macOS Intel, Windows MSVC,
|
|
5
|
+
// etc. Runcap takes the opposite bet: only the deterministic, lossless-by-construction
|
|
6
|
+
// reductions that need zero dependencies and can never silently change an answer.
|
|
7
|
+
//
|
|
8
|
+
// What we compress (and why it is safe):
|
|
9
|
+
// - JSON whitespace inside string-embedded JSON blobs (re-serialize compact).
|
|
10
|
+
// - Repeated blank lines and trailing whitespace in long text blocks.
|
|
11
|
+
// - Long log / stack-trace runs collapsed to head + tail + "(N lines elided)".
|
|
12
|
+
// What we never touch:
|
|
13
|
+
// - The user's actual prose instructions.
|
|
14
|
+
// - Code semantics (we only strip trailing whitespace, never tokens).
|
|
15
|
+
// - Anything under a conservative size threshold (compression has overhead).
|
|
16
|
+
//
|
|
17
|
+
// Every reduction is COUNTED so the gateway can show one honest number:
|
|
18
|
+
// "X tokens saved by compression". Token counts are an estimate (~4 chars/token),
|
|
19
|
+
// labeled `estimated`, never claimed as provider-exact.
|
|
20
|
+
|
|
21
|
+
const CHARS_PER_TOKEN = 4;
|
|
22
|
+
const MIN_FIELD_CHARS = 200; // below this, compression overhead isn't worth it
|
|
23
|
+
const LOG_HEAD_LINES = 12;
|
|
24
|
+
const LOG_TAIL_LINES = 8;
|
|
25
|
+
const LOG_COLLAPSE_THRESHOLD = 40; // collapse runs longer than this
|
|
26
|
+
|
|
27
|
+
export function estimateTokens(text) {
|
|
28
|
+
if (!text) return 0;
|
|
29
|
+
return Math.ceil(String(text).length / CHARS_PER_TOKEN);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Re-serialize an embedded JSON string compactly. Handles two shapes safely:
|
|
33
|
+
// 1. The whole field is JSON ("{...}" or "[...]").
|
|
34
|
+
// 2. A short text prefix followed by a JSON blob ("Here is the data:\n{...}").
|
|
35
|
+
// In case 2 we only touch the JSON tail and keep the prefix verbatim, so prose
|
|
36
|
+
// is never altered. Returns null if nothing valid/smaller was found.
|
|
37
|
+
function compactEmbeddedJson(value) {
|
|
38
|
+
const trimmed = value.trim();
|
|
39
|
+
// Case 1: entire field is JSON.
|
|
40
|
+
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
|
|
41
|
+
try {
|
|
42
|
+
const compact = JSON.stringify(JSON.parse(trimmed));
|
|
43
|
+
if (compact.length < value.length) return compact;
|
|
44
|
+
} catch {
|
|
45
|
+
// fall through to prefix handling
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
// Case 2: a prefix then a JSON blob. Find the first { or [ and try to parse
|
|
49
|
+
// from there to end. Only accept if the tail is valid JSON in full.
|
|
50
|
+
const idx = value.search(/[{[]/);
|
|
51
|
+
if (idx > 0) {
|
|
52
|
+
const prefix = value.slice(0, idx);
|
|
53
|
+
// Keep the prefix small/prose-like; don't swallow huge text blocks.
|
|
54
|
+
if (prefix.length <= 200) {
|
|
55
|
+
const tail = value.slice(idx).trim();
|
|
56
|
+
try {
|
|
57
|
+
const compact = JSON.stringify(JSON.parse(tail));
|
|
58
|
+
const rebuilt = prefix + compact;
|
|
59
|
+
if (rebuilt.length < value.length) return rebuilt;
|
|
60
|
+
} catch {
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const LOG_LINE_RE = /^\s*(\d{4}-\d{2}-\d{2}[T ]|\[?\d{2}:\d{2}:\d{2}|DEBUG|INFO|WARN|ERROR|TRACE|at\s+\w|\s+File ")/;
|
|
69
|
+
|
|
70
|
+
// Collapse a long, log-like block: keep the head and tail (the parts a model
|
|
71
|
+
// actually needs to diagnose), elide the repetitive middle.
|
|
72
|
+
function collapseLogBlock(value) {
|
|
73
|
+
const lines = value.split("\n");
|
|
74
|
+
if (lines.length <= LOG_COLLAPSE_THRESHOLD) return null;
|
|
75
|
+
const logish = lines.filter((l) => LOG_LINE_RE.test(l)).length;
|
|
76
|
+
// Only collapse if it really looks like logs/stack traces, not prose.
|
|
77
|
+
if (logish < lines.length * 0.5) return null;
|
|
78
|
+
const head = lines.slice(0, LOG_HEAD_LINES);
|
|
79
|
+
const tail = lines.slice(-LOG_TAIL_LINES);
|
|
80
|
+
const elided = lines.length - head.length - tail.length;
|
|
81
|
+
if (elided <= 0) return null;
|
|
82
|
+
return [...head, `... (${elided} repetitive log lines elided by Runcap) ...`, ...tail].join("\n");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Collapse 3+ blank lines to 1, and strip trailing whitespace ONLY on lines
|
|
86
|
+
// that are part of a multi-line block. We deliberately leave single-line prose
|
|
87
|
+
// (and its final trailing space) untouched so instructions are never altered.
|
|
88
|
+
function squeezeWhitespace(value) {
|
|
89
|
+
const lines = value.split("\n");
|
|
90
|
+
if (lines.length < 3) return null; // not a structural block; leave prose alone
|
|
91
|
+
const squeezed = lines
|
|
92
|
+
.map((l) => l.replace(/[ \t]+$/g, ""))
|
|
93
|
+
.join("\n")
|
|
94
|
+
.replace(/\n{3,}/g, "\n\n");
|
|
95
|
+
return squeezed.length < value.length ? squeezed : null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Compress a single string field through the safe ladder. Returns the smallest
|
|
99
|
+
// safe result (or the original if nothing helped).
|
|
100
|
+
function compressField(value) {
|
|
101
|
+
if (typeof value !== "string" || value.length < MIN_FIELD_CHARS) return value;
|
|
102
|
+
let out = value;
|
|
103
|
+
const json = compactEmbeddedJson(out);
|
|
104
|
+
if (json !== null) out = json;
|
|
105
|
+
const logs = collapseLogBlock(out);
|
|
106
|
+
if (logs !== null && logs.length < out.length) out = logs;
|
|
107
|
+
const ws = squeezeWhitespace(out);
|
|
108
|
+
if (ws !== null && ws.length < out.length) out = ws;
|
|
109
|
+
return out;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Walk an OpenAI- or Anthropic-shaped request body and compress message content.
|
|
113
|
+
// Returns { body, before, after, savedChars, savedTokens, touched }.
|
|
114
|
+
export function compressRequestBody(body) {
|
|
115
|
+
const result = { body, savedChars: 0, savedTokens: 0, touched: 0, before: 0, after: 0 };
|
|
116
|
+
if (!body || typeof body !== "object") return result;
|
|
117
|
+
|
|
118
|
+
const measureBefore = JSON.stringify(body).length;
|
|
119
|
+
let touched = 0;
|
|
120
|
+
|
|
121
|
+
const compressContent = (content) => {
|
|
122
|
+
if (typeof content === "string") {
|
|
123
|
+
const next = compressField(content);
|
|
124
|
+
if (next !== content) touched += 1;
|
|
125
|
+
return next;
|
|
126
|
+
}
|
|
127
|
+
if (Array.isArray(content)) {
|
|
128
|
+
return content.map((part) => {
|
|
129
|
+
if (part && typeof part === "object" && typeof part.text === "string") {
|
|
130
|
+
const next = compressField(part.text);
|
|
131
|
+
if (next !== part.text) touched += 1;
|
|
132
|
+
return { ...part, text: next };
|
|
133
|
+
}
|
|
134
|
+
return part;
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
return content;
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
let next = body;
|
|
141
|
+
// OpenAI chat.completions: messages[].content
|
|
142
|
+
if (Array.isArray(body.messages)) {
|
|
143
|
+
next = {
|
|
144
|
+
...body,
|
|
145
|
+
messages: body.messages.map((m) =>
|
|
146
|
+
m && typeof m === "object" && "content" in m ? { ...m, content: compressContent(m.content) } : m
|
|
147
|
+
)
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
// Anthropic system prompt (string or block array)
|
|
151
|
+
if (next.system !== undefined) {
|
|
152
|
+
next = { ...next, system: compressContent(next.system) };
|
|
153
|
+
}
|
|
154
|
+
// OpenAI responses API / raw input
|
|
155
|
+
if (typeof next.input === "string") {
|
|
156
|
+
next = { ...next, input: compressContent(next.input) };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const measureAfter = JSON.stringify(next).length;
|
|
160
|
+
const savedChars = Math.max(0, measureBefore - measureAfter);
|
|
161
|
+
return {
|
|
162
|
+
body: next,
|
|
163
|
+
before: measureBefore,
|
|
164
|
+
after: measureAfter,
|
|
165
|
+
savedChars,
|
|
166
|
+
savedTokens: Math.round(savedChars / CHARS_PER_TOKEN),
|
|
167
|
+
touched
|
|
168
|
+
};
|
|
169
|
+
}
|