openhermes 4.9.2 → 4.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CONTEXT.md +1 -1
  2. package/README.md +32 -31
  3. package/bootstrap.ts +262 -45
  4. package/harness/agents/oh-planner.md +1 -1
  5. package/harness/agents/openhermes.md +27 -126
  6. package/harness/codex/AUTOPILOT.md +99 -3
  7. package/harness/codex/CHARTER.md +3 -4
  8. package/harness/lib/background/background.test.ts +197 -0
  9. package/harness/lib/background/index.ts +7 -0
  10. package/harness/lib/background/interfaces.ts +31 -0
  11. package/harness/lib/background/manager.ts +320 -0
  12. package/harness/lib/composer/compose.test.ts +168 -0
  13. package/harness/lib/composer/compose.ts +65 -0
  14. package/harness/lib/composer/fragments/01-identity.md +1 -0
  15. package/harness/lib/composer/fragments/02-delegation.md +6 -0
  16. package/harness/lib/composer/fragments/03-permissions.md +13 -0
  17. package/harness/lib/composer/fragments/04-task-flow.md +15 -0
  18. package/harness/lib/composer/fragments/05-confidence.md +5 -0
  19. package/harness/lib/composer/fragments/06-parallelization.md +17 -0
  20. package/harness/lib/composer/fragments/07-shell.md +41 -0
  21. package/harness/lib/composer/fragments/08-routing.md +8 -0
  22. package/harness/lib/composer/fragments/09-guardrails.md +12 -0
  23. package/harness/lib/composer/index.ts +1 -0
  24. package/harness/lib/hooks/builtins/confidence-gate-hook.ts +70 -0
  25. package/harness/lib/hooks/builtins/delegation-depth-hook.ts +59 -0
  26. package/harness/lib/hooks/builtins/error-recovery-hook.ts +107 -0
  27. package/harness/lib/hooks/builtins/memory-sync-hook.ts +73 -0
  28. package/harness/lib/hooks/builtins/plan-check-hook.ts +43 -0
  29. package/harness/lib/hooks/builtins/route-tracking-hook.ts +147 -0
  30. package/harness/lib/hooks/builtins/sanity-check-hook.ts +52 -0
  31. package/harness/lib/hooks/builtins/shell-detect-hook.ts +96 -0
  32. package/harness/lib/hooks/hooks.test.ts +1016 -0
  33. package/harness/lib/hooks/index.ts +30 -0
  34. package/harness/lib/hooks/registry.ts +416 -0
  35. package/harness/lib/hooks/types.ts +71 -0
  36. package/harness/lib/memory/index.ts +18 -0
  37. package/harness/lib/memory/interfaces.ts +53 -0
  38. package/harness/lib/memory/memory-manager.ts +205 -0
  39. package/harness/lib/memory/memory.test.ts +491 -0
  40. package/harness/lib/memory/plan-store.ts +366 -0
  41. package/harness/lib/recovery/handler.ts +243 -0
  42. package/harness/lib/recovery/index.ts +14 -0
  43. package/harness/lib/recovery/interfaces.ts +48 -0
  44. package/harness/lib/recovery/patterns.ts +149 -0
  45. package/harness/lib/recovery/recovery.test.ts +312 -0
  46. package/harness/lib/sanity/anomaly-tracker.ts +127 -0
  47. package/harness/lib/sanity/checker.ts +178 -0
  48. package/harness/lib/sanity/index.ts +13 -0
  49. package/harness/lib/sanity/interfaces.ts +24 -0
  50. package/harness/lib/sanity/sanity.test.ts +472 -0
  51. package/harness/lib/sync/file-watcher.ts +174 -0
  52. package/harness/lib/sync/index.ts +11 -0
  53. package/harness/lib/sync/interfaces.ts +27 -0
  54. package/harness/lib/sync/plan-sync.ts +536 -0
  55. package/harness/lib/sync/sync.test.ts +832 -0
  56. package/harness/skills/oh-init/DEEP.md +2 -2
  57. package/harness/skills/oh-manifest/SKILL.md +1 -1
  58. package/harness/skills/oh-plan-review/DEEP.md +1 -1
  59. package/harness/skills/oh-planner/DEEP.md +3 -3
  60. package/harness/skills/oh-ship/SKILL.md +1 -1
  61. package/harness/skills/oh-skill-craft/SKILL.md +1 -4
  62. package/package.json +5 -5
  63. package/tsconfig.json +1 -1
  64. package/harness/commands/oh-doctor.md +0 -205
  65. package/harness/commands/oh-log.md +0 -18
  66. package/harness/skills/oh-learn/DEEP.md +0 -44
  67. package/harness/skills/oh-learn/SKILL.md +0 -30
  68. package/scripts/count-tokens.mjs +0 -158
  69. package/scripts/oh-doctor.ps1 +0 -342
@@ -0,0 +1,178 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Output Sanity Checker — detect LLM output degeneration patterns
3
+ // ---------------------------------------------------------------------------
4
+
5
+ import type { SanityResult } from "./interfaces.ts";
6
+ import { AnomalyTracker } from "./anomaly-tracker.ts";
7
+
8
+ /**
9
+ * Check a text string for output degeneration patterns.
10
+ * Returns unhealthy with severity + reason on first matching pattern.
11
+ * Returns healthy if no pattern matches.
12
+ *
13
+ * Check ordering: all critical-severity checks first (most specific first),
14
+ * then warning-severity checks. This ensures the most actionable, severe
15
+ * issues are reported before mild ones.
16
+ *
17
+ * Accepts an optional AnomalyTracker for cross-invocation dedup detection.
18
+ */
19
+ export function checkOutputSanity(
20
+ text: unknown,
21
+ anomalyTracker?: AnomalyTracker,
22
+ ): SanityResult {
23
+ if (typeof text !== "string") {
24
+ return {
25
+ isHealthy: false,
26
+ severity: "critical",
27
+ reason: "Output is not a string (possibly undefined/null)",
28
+ patternName: "empty_output",
29
+ };
30
+ }
31
+ if (text.length === 0) {
32
+ return {
33
+ isHealthy: false,
34
+ severity: "warning",
35
+ reason: "Output is an empty string",
36
+ patternName: "empty_output",
37
+ };
38
+ }
39
+
40
+ // ═══════════════════════════════════════════════════════════════════
41
+ // CRITICAL checks — severe degeneration
42
+ // ═══════════════════════════════════════════════════════════════════
43
+
44
+ // ── 1. Single character repetition ──────────────────────────────
45
+ // 16+ consecutive identical characters
46
+ const singleCharMatch = text.match(/(.)\1{15,}/);
47
+ if (singleCharMatch) {
48
+ return {
49
+ isHealthy: false,
50
+ severity: "critical",
51
+ reason: `Single character repetition detected: "${singleCharMatch[0].slice(0, 20)}..."`,
52
+ patternName: "single_char_repetition",
53
+ };
54
+ }
55
+
56
+ // ── 2. Short pattern loop ───────────────────────────────────────
57
+ // 9+ repetitions of a 2-6 character sequence
58
+ const patternLoopMatch = text.match(/(.{2,6})\1{8,}/);
59
+ if (patternLoopMatch) {
60
+ return {
61
+ isHealthy: false,
62
+ severity: "critical",
63
+ reason: `Pattern loop detected: "${patternLoopMatch[0].slice(0, 30)}..."`,
64
+ patternName: "pattern_loop",
65
+ };
66
+ }
67
+
68
+ // ── 3. Excessive box/block drawing characters ───────────────────
69
+ // Unicode box drawing, block elements, and Braille patterns
70
+ const boxDrawChars = text.match(/[\u2500-\u257f\u2580-\u259f\u2800-\u28ff]/g);
71
+ if (boxDrawChars && boxDrawChars.length > 100) {
72
+ const ratio = boxDrawChars.length / text.length;
73
+ if (ratio > 0.3) {
74
+ return {
75
+ isHealthy: false,
76
+ severity: "critical",
77
+ reason: `Visual gibberish detected: ${boxDrawChars.length} box/block chars (${(ratio * 100).toFixed(1)}% of output)`,
78
+ patternName: "visual_gibberish",
79
+ };
80
+ }
81
+ }
82
+
83
+ // ── 4. CJK character spam ─────────────────────────────────────
84
+ // Lots of CJK characters with very few unique ones
85
+ const cjkChars = text.match(/[\u4e00-\u9fff\u3400-\u4dbf]/g);
86
+ if (cjkChars && cjkChars.length > 200) {
87
+ const uniqueCjk = new Set(cjkChars).size;
88
+ if (uniqueCjk < 10 && cjkChars.length / uniqueCjk > 20) {
89
+ return {
90
+ isHealthy: false,
91
+ severity: "critical",
92
+ reason: `CJK character spam detected: ${cjkChars.length} chars, ${uniqueCjk} unique (ratio ${(cjkChars.length / uniqueCjk).toFixed(0)})`,
93
+ patternName: "cjk_spam",
94
+ };
95
+ }
96
+ }
97
+
98
+ // ── 5. Low character diversity ────────────────────────────────
99
+ // General catch-all for text with very few distinct characters
100
+ if (text.length > 200) {
101
+ const cleanText = text.replace(/\s/g, "");
102
+ if (cleanText.length > 0) {
103
+ const uniqueChars = new Set(cleanText).size;
104
+ const diversity = uniqueChars / cleanText.length;
105
+ if (diversity < 0.02) {
106
+ return {
107
+ isHealthy: false,
108
+ severity: "critical",
109
+ reason: `Low information density: ${uniqueChars} unique chars out of ${cleanText.length} (ratio ${diversity.toFixed(4)} < 0.02)`,
110
+ patternName: "low_diversity",
111
+ };
112
+ }
113
+ }
114
+ }
115
+
116
+ // ═══════════════════════════════════════════════════════════════════
117
+ // WARNING checks — mild or context-dependent issues
118
+ // ═══════════════════════════════════════════════════════════════════
119
+
120
+ // ── 6. Excessive JSON/error stack lines ─────────────────────────
121
+ const errorStackLines = text.split(/\r?\n/).filter(
122
+ (l) => l.includes("Error:") || l.trim().startsWith("at ") || l.includes("Exception:"),
123
+ );
124
+ if (errorStackLines.length > 5) {
125
+ return {
126
+ isHealthy: false,
127
+ severity: "warning",
128
+ reason: `Error stack bleed detected: ${errorStackLines.length} error/stack lines`,
129
+ patternName: "error_stack_bleed",
130
+ };
131
+ }
132
+
133
+ // ── 7. Line-by-line repetition ──────────────────────────────────
134
+ const lines = text.split(/\r?\n/).filter((l) => l.trim().length > 10);
135
+ if (lines.length > 10) {
136
+ const uniqueLines = new Set(lines);
137
+ if (uniqueLines.size < lines.length * 0.2) {
138
+ return {
139
+ isHealthy: false,
140
+ severity: "warning",
141
+ reason: `Excessive line repetition: ${uniqueLines.size} unique lines out of ${lines.length} (${(uniqueLines.size / lines.length * 100).toFixed(0)}% unique)`,
142
+ patternName: "line_repetition",
143
+ };
144
+ }
145
+ }
146
+
147
+ // ── 8. Empty/tiny output ────────────────────────────────────────
148
+ // Only flag if the entire output is small enough to be suspicious
149
+ // (exclude common status messages like "ok", "done")
150
+ if (text.length < 50 && text.length > 0) {
151
+ const minimalWords = ["ok", "done", "yes", "no", "passed", "failed", "error", "null", "undefined", "true", "false"];
152
+ const trimmed = text.trim().toLowerCase();
153
+ if (!minimalWords.includes(trimmed) && !/^[\d.]+$/.test(trimmed)) {
154
+ return {
155
+ isHealthy: false,
156
+ severity: "warning",
157
+ reason: `Output too short: ${text.length} characters`,
158
+ patternName: "output_too_short",
159
+ };
160
+ }
161
+ }
162
+
163
+ // ── 9. Cross-invocation dedup check ────────────────────────────────
164
+ if (anomalyTracker) {
165
+ const isRepeated = anomalyTracker.trackOutput(text);
166
+ if (isRepeated) {
167
+ return {
168
+ isHealthy: false,
169
+ severity: "warning",
170
+ reason: `Output identical to previous ${anomalyTracker.MAX_IDENTICAL_OUTPUTS} invocations`,
171
+ patternName: "repeated_identical_output",
172
+ };
173
+ }
174
+ }
175
+
176
+ // No pattern matched — healthy
177
+ return { isHealthy: true, severity: "ok" };
178
+ }
@@ -0,0 +1,13 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Sanity Checker module — barrel export
3
+ // ---------------------------------------------------------------------------
4
+
5
+ export type {
6
+ Severity,
7
+ SanityResult,
8
+ AnomalyRecord,
9
+ AnomalyTrackerConfig,
10
+ } from "./interfaces.ts";
11
+
12
+ export { checkOutputSanity } from "./checker.ts";
13
+ export { AnomalyTracker } from "./anomaly-tracker.ts";
@@ -0,0 +1,24 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Sanity Checker — type definitions
3
+ // ---------------------------------------------------------------------------
4
+
5
+ export type Severity = "ok" | "warning" | "critical";
6
+
7
+ export interface SanityResult {
8
+ isHealthy: boolean;
9
+ severity: Severity;
10
+ reason?: string;
11
+ patternName?: string;
12
+ }
13
+
14
+ export interface AnomalyRecord {
15
+ sessionId: string;
16
+ count: number;
17
+ lastReason: string;
18
+ lastTimestamp: number;
19
+ }
20
+
21
+ export interface AnomalyTrackerConfig {
22
+ maxConsecutiveAnomalies: number; // default 2
23
+ escalationMessage: string; // default "recovery: compact context"
24
+ }
@@ -0,0 +1,472 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Sanity Checker — tests
3
+ // ---------------------------------------------------------------------------
4
+
5
+ import { describe, it, before, after } from "node:test";
6
+ import assert from "node:assert/strict";
7
+ import { checkOutputSanity } from "./checker.ts";
8
+ import { AnomalyTracker } from "./anomaly-tracker.ts";
9
+ import type { SanityResult } from "./interfaces.ts";
10
+
11
+ // ── Helper ────────────────────────────────────────────────────────────
12
+
13
+ function assertHealthy(result: SanityResult, msg?: string): void {
14
+ assert.ok(result.isHealthy, msg ?? `Expected healthy, got: ${result.reason}`);
15
+ assert.equal(result.severity, "ok");
16
+ }
17
+
18
+ function assertUnhealthy(
19
+ result: SanityResult,
20
+ expectedSeverity: "warning" | "critical",
21
+ expectedPattern?: string,
22
+ msg?: string,
23
+ ): void {
24
+ assert.equal(result.isHealthy, false, msg ?? "Expected unhealthy");
25
+ assert.equal(
26
+ result.severity,
27
+ expectedSeverity,
28
+ msg ?? `Expected severity ${expectedSeverity}, got ${result.severity}`,
29
+ );
30
+ if (expectedPattern) {
31
+ assert.equal(
32
+ result.patternName,
33
+ expectedPattern,
34
+ msg ?? `Expected pattern ${expectedPattern}, got ${result.patternName}`,
35
+ );
36
+ }
37
+ }
38
+
39
+ // ── Tests ──────────────────────────────────────────────────────────────
40
+
41
+ describe("checkOutputSanity — detection patterns", () => {
42
+ // ── 1. Single character repetition ────────────────────────────────
43
+
44
+ it("detects single character repetition (16+ same chars)", () => {
45
+ const result = checkOutputSanity(
46
+ "Leading text " + "a".repeat(20) + " trailing text to exceed 50 chars total and avoid short output detection",
47
+ );
48
+ assertUnhealthy(result, "critical", "single_char_repetition");
49
+ });
50
+
51
+ it("allows short character repetition (< 16)", () => {
52
+ // At least 50 chars to avoid short-output warning
53
+ const text =
54
+ "This line has " +
55
+ "a".repeat(15) +
56
+ " but not 16+ same chars, so it should definitely pass this check fine.";
57
+ assert.ok(
58
+ text.length >= 50,
59
+ `Test string must be >= 50 chars (was ${text.length})`,
60
+ );
61
+ const result = checkOutputSanity(text);
62
+ assertHealthy(result);
63
+ });
64
+
65
+ it("detects repeated spaces", () => {
66
+ const text =
67
+ "hello" +
68
+ " ".repeat(20) +
69
+ "world and some more text here to make the string exceed 50 chars in total so we avoid the short check";
70
+ const result = checkOutputSanity(text);
71
+ assertUnhealthy(result, "critical", "single_char_repetition");
72
+ });
73
+
74
+ // ── 2. Short pattern loop ─────────────────────────────────────────
75
+
76
+ it("detects short pattern loop (9+ repeats)", () => {
77
+ const result = checkOutputSanity(
78
+ "prefix " + "ab".repeat(12) + " suffix that brings total well past 50 characters to avoid short output detection",
79
+ );
80
+ assertUnhealthy(result, "critical", "pattern_loop");
81
+ });
82
+
83
+ it("allows short repetitions (< 9)", () => {
84
+ // 8 of each letter = 8*7 = 56 chars, no single-char run >= 16
85
+ const text = "A".repeat(8) + "B".repeat(8) + "C".repeat(8) + "D".repeat(8) + "E".repeat(8) + "F".repeat(8) + "G".repeat(8);
86
+ assert.ok(text.length >= 50, `Test string must be >= 50 chars (was ${text.length})`);
87
+ const result = checkOutputSanity(text);
88
+ assertHealthy(result);
89
+ });
90
+
91
+ it("detects longer pattern loop", () => {
92
+ const result = checkOutputSanity(
93
+ "start " + "hello".repeat(10) + " end with more text to exceed 50 character limit for short detection sure",
94
+ );
95
+ assertUnhealthy(result, "critical", "pattern_loop");
96
+ });
97
+
98
+ // ── 3. Low character diversity ────────────────────────────────────
99
+
100
+ it("detects low character diversity", () => {
101
+ // 600 chars with only 10 unique chars, no pattern loop (10-char pattern doesn't match 2-6)
102
+ const text = "abcdefghij".repeat(60);
103
+ const result = checkOutputSanity(text);
104
+ assertUnhealthy(result, "critical", "low_diversity");
105
+ });
106
+
107
+ it("allows diverse text", () => {
108
+ const text =
109
+ "The quick brown fox jumps over the lazy dog. This sentence contains every letter of the alphabet at least once. ".repeat(
110
+ 5,
111
+ );
112
+ const result = checkOutputSanity(text);
113
+ assertHealthy(result);
114
+ });
115
+
116
+ it("does not flag short text (< 200 chars)", () => {
117
+ const text = "ab".repeat(99); // 198 chars, just under 200
118
+ assert.ok(text.length < 200, `Text must be < 200 chars (was ${text.length})`);
119
+ const result = checkOutputSanity(text);
120
+ // Should not have low_diversity pattern
121
+ assert.ok(result.isHealthy || result.patternName !== "low_diversity");
122
+ });
123
+
124
+ // ── 4. Visual gibberish / box drawing ─────────────────────────────
125
+
126
+ it("detects excessive box drawing characters", () => {
127
+ // Pure box art should match visual_gibberish (comes before low_diversity)
128
+ const boxArt = "┌─┐│└─┘├─┤┬┴┼".repeat(50);
129
+ const result = checkOutputSanity(boxArt);
130
+ assertUnhealthy(result, "critical", "visual_gibberish");
131
+ });
132
+
133
+ it("does not flag moderate box drawing in context", () => {
134
+ // Must be > 50 chars and not > 100 box chars with > 30% ratio
135
+ const text =
136
+ "┌───┐\n│ │\n└───┘\n" +
137
+ "Here is a simple diagram frame with plenty of surrounding context text " +
138
+ "that makes this string exceed 50 characters and avoids any pattern detection.";
139
+ assert.ok(text.length >= 50, `Test string must be >= 50 chars (was ${text.length})`);
140
+ const result = checkOutputSanity(text);
141
+ assertHealthy(result);
142
+ });
143
+
144
+ // ── 5. CJK character spam ────────────────────────────────────────
145
+
146
+ it("detects CJK character spam", () => {
147
+ // Use a 10-char CJK string (avoids 2-6 char pattern loop) repeated
148
+ const cjkSpam = "天地玄黄宇宙洪荒日".repeat(40); // 400 CJK chars, 10 unique
149
+ const result = checkOutputSanity(cjkSpam);
150
+ assertUnhealthy(result, "critical", "cjk_spam");
151
+ });
152
+
153
+ it("allows legitimate CJK text", () => {
154
+ const cjkText = "这是一个正常的句子。它有各种各样的字符和不同的表达方式。".repeat(5);
155
+ const result = checkOutputSanity(cjkText);
156
+ assertHealthy(result);
157
+ });
158
+
159
+ // ── 6. Empty/tiny output ──────────────────────────────────────────
160
+
161
+ it("flags suspicious short output", () => {
162
+ const result = checkOutputSanity("x");
163
+ assertUnhealthy(result, "warning", "output_too_short");
164
+ });
165
+
166
+ it("allows single-word status output", () => {
167
+ assertHealthy(checkOutputSanity("ok"));
168
+ assertHealthy(checkOutputSanity("done"));
169
+ assertHealthy(checkOutputSanity("passed"));
170
+ assertHealthy(checkOutputSanity("true"));
171
+ });
172
+
173
+ it("allows numeric output", () => {
174
+ assertHealthy(checkOutputSanity("42"));
175
+ assertHealthy(checkOutputSanity("3.14159"));
176
+ });
177
+
178
+ // ── 7. Error stack bleed ──────────────────────────────────────────
179
+
180
+ it("detects excessive error stack lines", () => {
181
+ const errorStack = [
182
+ "Error: something went wrong",
183
+ " at Object.<anonymous> (file.ts:10:5)",
184
+ " at Module._compile (module.js:653:30)",
185
+ " at Object.Module._extensions (module.js:664:10)",
186
+ " at Module.load (module.js:566:32)",
187
+ " at tryModuleLoad (module.js:506:12)",
188
+ ].join("\n");
189
+ assert.ok(errorStack.length >= 50, `Test string must be >= 50 chars (was ${errorStack.length})`);
190
+ const result = checkOutputSanity(errorStack);
191
+ assertUnhealthy(result, "warning", "error_stack_bleed");
192
+ });
193
+
194
+ it("allows normal error mentions", () => {
195
+ const text =
196
+ "We got an Error: not found, but handled it gracefully with fallback logic " +
197
+ "that continues execution without any problems whatsoever.";
198
+ const result = checkOutputSanity(text);
199
+ assertHealthy(result);
200
+ });
201
+
202
+ // ── 8. Line-by-line repetition ────────────────────────────────────
203
+
204
+ it("detects excessive line repetition", () => {
205
+ // Use a line with many unique characters to avoid low_diversity
206
+ const line =
207
+ "Sphinx of black quartz, judge my vow! The five boxing wizards jump quickly. 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ";
208
+ const repeatedLines = Array.from({ length: 20 }, () => line).join("\n");
209
+ const result = checkOutputSanity(repeatedLines);
210
+ assertUnhealthy(result, "warning", "line_repetition");
211
+ });
212
+
213
+ it("allows normal line variation", () => {
214
+ const normalLines = [
215
+ "First line of unique content here for the test case.",
216
+ "Second line is different from all the rest.",
217
+ "Third line is also unique in its own way.",
218
+ "Fourth line continues the thought process forward.",
219
+ "Fifth line wraps up the opening section nicely.",
220
+ "Sixth line adds more context to the discussion.",
221
+ "Seventh line explores new ideas and concepts.",
222
+ "Eighth line approaches the question differently.",
223
+ "Ninth line concludes the main arguments well.",
224
+ "Tenth line is the final summary statement.",
225
+ "Eleventh line surprises everyone with extra depth.",
226
+ "Twelfth line brings the total to a solid dozen.",
227
+ ].join("\n");
228
+ const result = checkOutputSanity(normalLines);
229
+ assertHealthy(result);
230
+ });
231
+
232
+ // ── Mixed / edge cases ───────────────────────────────────────────
233
+
234
+ it("returns healthy for normal prose", () => {
235
+ const prose =
236
+ "This is a normal paragraph of text that should pass all sanity checks. " +
237
+ "It contains varied characters and meaningful content. The quick brown fox jumps over the lazy dog. " +
238
+ "No patterns of degeneration should be detected here.";
239
+ assertHealthy(checkOutputSanity(prose));
240
+ });
241
+
242
+ it("flags empty string as warning", () => {
243
+ const result = checkOutputSanity("");
244
+ assertUnhealthy(result, "warning", "empty_output");
245
+ });
246
+
247
+ it("flags null/undefined as critical", () => {
248
+ const r1 = checkOutputSanity(null);
249
+ assertUnhealthy(r1, "critical", "empty_output");
250
+ const r2 = checkOutputSanity(undefined);
251
+ assertUnhealthy(r2, "critical", "empty_output");
252
+ });
253
+
254
+ it("detects multiple patterns (first match wins)", () => {
255
+ // Text with both pattern loop and low diversity — should report pattern_loop first
256
+ const degenerate = "ab".repeat(50) + " extra unique text that varies the output so it stays above fifty characters";
257
+ const result = checkOutputSanity(degenerate);
258
+ assertUnhealthy(result, "critical");
259
+ // Pattern loop should win since it comes first
260
+ assert.equal(result.patternName, "pattern_loop");
261
+ });
262
+ });
263
+
264
+ // ── AnomalyTracker tests ──────────────────────────────────────────────
265
+
266
+ describe("AnomalyTracker", () => {
267
+ let tracker: AnomalyTracker;
268
+
269
+ before(() => {
270
+ tracker = AnomalyTracker.getInstance();
271
+ tracker.resetAll();
272
+ });
273
+
274
+ after(() => {
275
+ tracker.resetAll();
276
+ });
277
+
278
+ // ── Recording ────────────────────────────────────────────────────
279
+
280
+ it("records healthy output — resets counter", () => {
281
+ const healthy: SanityResult = { isHealthy: true, severity: "ok" };
282
+ const result = tracker.record("s1", healthy);
283
+ assert.equal(result.shouldEscalate, false);
284
+ assert.equal(result.consecutiveAnomalies, 0);
285
+ });
286
+
287
+ it("records single anomaly — no escalation", () => {
288
+ tracker.resetAll();
289
+ const unhealthy: SanityResult = {
290
+ isHealthy: false,
291
+ severity: "critical",
292
+ reason: "Single character repetition",
293
+ patternName: "single_char_repetition",
294
+ };
295
+
296
+ const result = tracker.record("s2", unhealthy);
297
+ assert.equal(result.shouldEscalate, false);
298
+ assert.equal(result.consecutiveAnomalies, 1);
299
+ });
300
+
301
+ it("triggers escalation on 2+ consecutive anomalies", () => {
302
+ tracker.resetAll();
303
+ const unhealthy: SanityResult = {
304
+ isHealthy: false,
305
+ severity: "critical",
306
+ reason: "Pattern loop detected",
307
+ patternName: "pattern_loop",
308
+ };
309
+
310
+ // First anomaly — no escalation
311
+ const first = tracker.record("s3", unhealthy);
312
+ assert.equal(first.shouldEscalate, false);
313
+ assert.equal(first.consecutiveAnomalies, 1);
314
+
315
+ // Second consecutive anomaly — escalation
316
+ const second = tracker.record("s3", unhealthy);
317
+ assert.equal(second.shouldEscalate, true);
318
+ assert.equal(second.consecutiveAnomalies, 2);
319
+ assert.ok(second.recoveryMessage, "recovery message should be present");
320
+ assert.equal(second.recoveryMessage, "recovery: compact context");
321
+ });
322
+
323
+ it("resets counter on healthy output between anomalies", () => {
324
+ tracker.resetAll();
325
+ const unhealthy: SanityResult = {
326
+ isHealthy: false,
327
+ severity: "warning",
328
+ reason: "Output too short",
329
+ patternName: "output_too_short",
330
+ };
331
+ const healthy: SanityResult = { isHealthy: true, severity: "ok" };
332
+
333
+ tracker.record("s4", unhealthy); // count=1
334
+ tracker.record("s4", healthy); // reset to 0
335
+ const result = tracker.record("s4", unhealthy); // count=1 again
336
+ assert.equal(result.shouldEscalate, false);
337
+ assert.equal(result.consecutiveAnomalies, 1);
338
+ });
339
+
340
+ it("persists 3+ consecutive anomalies", () => {
341
+ tracker.resetAll();
342
+ const unhealthy: SanityResult = {
343
+ isHealthy: false,
344
+ severity: "warning",
345
+ reason: "Error stack bleed",
346
+ patternName: "error_stack_bleed",
347
+ };
348
+
349
+ tracker.record("s5", unhealthy); // 1
350
+ tracker.record("s5", unhealthy); // 2 → escalation
351
+ const third = tracker.record("s5", unhealthy); // 3 → escalation
352
+ assert.equal(third.shouldEscalate, true);
353
+ assert.equal(third.consecutiveAnomalies, 3);
354
+ });
355
+
356
+ // ── getRecord ────────────────────────────────────────────────────
357
+
358
+ it("getRecord() returns record for existing session", () => {
359
+ tracker.resetAll();
360
+ const unhealthy: SanityResult = {
361
+ isHealthy: false,
362
+ severity: "critical",
363
+ reason: "Low info density",
364
+ patternName: "low_diversity",
365
+ };
366
+ tracker.record("s6", unhealthy);
367
+
368
+ const record = tracker.getRecord("s6");
369
+ assert.ok(record);
370
+ assert.equal(record.count, 1);
371
+ assert.equal(record.lastReason, "Low info density");
372
+ });
373
+
374
+ it("getRecord() returns undefined for unknown session", () => {
375
+ const record = tracker.getRecord("nonexistent");
376
+ assert.equal(record, undefined);
377
+ });
378
+
379
+ // ── clearSession ──────────────────────────────────────────────────
380
+
381
+ it("clearSession() removes record for a session", () => {
382
+ tracker.resetAll();
383
+ const unhealthy: SanityResult = {
384
+ isHealthy: false,
385
+ severity: "critical",
386
+ reason: "Pattern loop",
387
+ patternName: "pattern_loop",
388
+ };
389
+
390
+ tracker.record("s7", unhealthy);
391
+ assert.ok(tracker.getRecord("s7"));
392
+
393
+ tracker.clearSession("s7");
394
+ assert.equal(tracker.getRecord("s7"), undefined);
395
+ });
396
+
397
+ it("clearSession() does not affect other sessions", () => {
398
+ tracker.resetAll();
399
+ const unhealthy: SanityResult = {
400
+ isHealthy: false,
401
+ severity: "critical",
402
+ reason: "Test",
403
+ patternName: "single_char_repetition",
404
+ };
405
+
406
+ tracker.record("s8_a", unhealthy);
407
+ tracker.record("s8_b", unhealthy);
408
+
409
+ tracker.clearSession("s8_a");
410
+ assert.equal(tracker.getRecord("s8_a"), undefined);
411
+ assert.ok(tracker.getRecord("s8_b"));
412
+ });
413
+
414
+ // ── resetAll ─────────────────────────────────────────────────────
415
+
416
+ it("resetAll() clears all records", () => {
417
+ const unhealthy: SanityResult = {
418
+ isHealthy: false,
419
+ severity: "critical",
420
+ reason: "Test",
421
+ patternName: "single_char_repetition",
422
+ };
423
+
424
+ tracker.record("s9", unhealthy);
425
+ tracker.resetAll();
426
+ assert.equal(tracker.getRecord("s9"), undefined);
427
+ });
428
+
429
+ // ── config ────────────────────────────────────────────────────────
430
+
431
+ it("uses configurable maxConsecutiveAnomalies", () => {
432
+ tracker.resetAll();
433
+ tracker.setConfig({ maxConsecutiveAnomalies: 3 });
434
+
435
+ const unhealthy: SanityResult = {
436
+ isHealthy: false,
437
+ severity: "warning",
438
+ reason: "Test threshold",
439
+ patternName: "output_too_short",
440
+ };
441
+
442
+ const first = tracker.record("s10", unhealthy); // 1
443
+ assert.equal(first.shouldEscalate, false);
444
+
445
+ const second = tracker.record("s10", unhealthy); // 2
446
+ assert.equal(second.shouldEscalate, false);
447
+
448
+ const third = tracker.record("s10", unhealthy); // 3 → escalation
449
+ assert.equal(third.shouldEscalate, true);
450
+
451
+ // Restore default
452
+ tracker.setConfig({ maxConsecutiveAnomalies: 2 });
453
+ });
454
+
455
+ it("uses configurable escalationMessage", () => {
456
+ tracker.resetAll();
457
+ tracker.setConfig({ escalationMessage: "custom recovery action: deep reset" });
458
+
459
+ const unhealthy: SanityResult = {
460
+ isHealthy: false,
461
+ severity: "critical",
462
+ reason: "Test message",
463
+ patternName: "pattern_loop",
464
+ };
465
+
466
+ tracker.record("s11", unhealthy); // 1
467
+ const result = tracker.record("s11", unhealthy); // 2 → escalation
468
+ assert.equal(result.recoveryMessage, "custom recovery action: deep reset");
469
+
470
+ tracker.setConfig({ escalationMessage: "recovery: compact context" }); // restore
471
+ });
472
+ });