openhermes 4.12.1 → 4.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTEXT.md +6 -6
- package/ETHOS.md +2 -2
- package/README.md +11 -17
- package/bootstrap.ts +118 -126
- package/docs/HOW-IT-WORKS.md +162 -0
- package/docs/adr/ADR-0001-rebuild-vs-increment.md +30 -0
- package/docs/adr/ADR-0002-routing-graph-vs-linear-chain.md +36 -0
- package/docs/adr/ADR-0003-per-directory-plan-storage.md +34 -0
- package/docs/adr/ADR-0004-composer-fragment-architecture.md +42 -0
- package/docs/adr/ADR-0005-hook-system-design.md +42 -0
- package/docs/adr/README.md +9 -0
- package/harness/codex/AUTOPILOT.md +35 -40
- package/harness/codex/CHARTER.md +3 -3
- package/harness/lib/composer/compose.test.ts +29 -29
- package/harness/lib/composer/fragments/02-delegation.md +5 -5
- package/harness/lib/composer/fragments/04-task-flow.md +13 -13
- package/harness/lib/composer/fragments/08-routing.md +1 -1
- package/harness/lib/composer/fragments/09-guardrails.md +25 -25
- package/harness/lib/composer/index.ts +1 -1
- package/harness/lib/guards/guard-config.ts +72 -72
- package/harness/lib/hooks/builtins/confidence-gate-hook.ts +9 -9
- package/harness/lib/hooks/builtins/delegation-depth-hook.ts +1 -1
- package/harness/lib/hooks/builtins/dynamic-route-hook.ts +99 -99
- package/harness/lib/hooks/builtins/next-route-hook.ts +24 -24
- package/harness/lib/hooks/builtins/plan-check-hook.ts +5 -5
- package/harness/lib/hooks/builtins/route-tracking-hook.ts +1 -1
- package/harness/lib/hooks/hooks.test.ts +160 -324
- package/harness/lib/hooks/index.ts +38 -42
- package/harness/lib/hooks/registry.ts +309 -416
- package/harness/lib/hooks/types.ts +116 -119
- package/harness/lib/plans/plan-location.ts +134 -134
- package/harness/lib/routing/index.ts +21 -21
- package/harness/lib/routing/route-guidance.ts +147 -147
- package/harness/lib/routing/route-resolver.ts +58 -58
- package/harness/lib/routing/routing.test.ts +195 -195
- package/harness/lib/routing/skill-frontmatter.ts +125 -125
- package/harness/lib/routing/types.ts +52 -52
- package/harness/skills/oh-ascii/SKILL.md +1 -1
- package/harness/skills/oh-fusion/DEEP.md +109 -109
- package/harness/skills/oh-fusion/SKILL.md +47 -47
- package/harness/skills/oh-init/DEEP.md +2 -2
- package/harness/skills/oh-plan-review/DEEP.md +1 -1
- package/harness/skills/oh-planner/DEEP.md +3 -3
- package/harness/skills/oh-review/DEEP.md +5 -5
- package/package.json +56 -53
- package/harness/lib/background/background.test.ts +0 -216
- package/harness/lib/background/index.ts +0 -7
- package/harness/lib/background/interfaces.ts +0 -31
- package/harness/lib/background/manager.ts +0 -320
- package/harness/lib/hooks/builtins/error-recovery-hook.ts +0 -107
- package/harness/lib/hooks/builtins/memory-sync-hook.ts +0 -73
- package/harness/lib/hooks/builtins/sanity-check-hook.ts +0 -52
- package/harness/lib/hooks/builtins/subagent-failure-hook.ts +0 -93
- package/harness/lib/memory/index.ts +0 -18
- package/harness/lib/memory/interfaces.ts +0 -53
- package/harness/lib/memory/memory-manager.ts +0 -205
- package/harness/lib/memory/memory.test.ts +0 -485
- package/harness/lib/memory/plan-store.ts +0 -346
- package/harness/lib/recovery/handler.ts +0 -243
- package/harness/lib/recovery/index.ts +0 -14
- package/harness/lib/recovery/interfaces.ts +0 -48
- package/harness/lib/recovery/patterns.ts +0 -149
- package/harness/lib/recovery/recovery.test.ts +0 -312
- package/harness/lib/sanity/anomaly-tracker.ts +0 -127
- package/harness/lib/sanity/checker.ts +0 -189
- package/harness/lib/sanity/index.ts +0 -13
- package/harness/lib/sanity/interfaces.ts +0 -24
- package/harness/lib/sanity/sanity.test.ts +0 -472
- package/harness/lib/sync/file-watcher.ts +0 -175
- package/harness/lib/sync/index.ts +0 -11
- package/harness/lib/sync/interfaces.ts +0 -27
- package/harness/lib/sync/plan-sync.ts +0 -533
- package/harness/lib/sync/sync.test.ts +0 -858
|
@@ -1,189 +0,0 @@
|
|
|
1
|
-
// ---------------------------------------------------------------------------
|
|
2
|
-
// Output Sanity Checker — detect LLM output degeneration patterns
|
|
3
|
-
// ---------------------------------------------------------------------------
|
|
4
|
-
|
|
5
|
-
import type { SanityResult } from "./interfaces.ts";
|
|
6
|
-
import { AnomalyTracker } from "./anomaly-tracker.ts";
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Check a text string for output degeneration patterns.
|
|
10
|
-
* Returns unhealthy with severity + reason on first matching pattern.
|
|
11
|
-
* Returns healthy if no pattern matches.
|
|
12
|
-
*
|
|
13
|
-
* Check ordering: all critical-severity checks first (most specific first),
|
|
14
|
-
* then warning-severity checks. This ensures the most actionable, severe
|
|
15
|
-
* issues are reported before mild ones.
|
|
16
|
-
*
|
|
17
|
-
* Accepts an optional AnomalyTracker for cross-invocation dedup detection.
|
|
18
|
-
*/
|
|
19
|
-
export function checkOutputSanity(
|
|
20
|
-
text: unknown,
|
|
21
|
-
anomalyTracker?: AnomalyTracker,
|
|
22
|
-
): SanityResult {
|
|
23
|
-
if (typeof text !== "string") {
|
|
24
|
-
return {
|
|
25
|
-
isHealthy: false,
|
|
26
|
-
severity: "critical",
|
|
27
|
-
reason: "Output is not a string (possibly undefined/null)",
|
|
28
|
-
patternName: "empty_output",
|
|
29
|
-
};
|
|
30
|
-
}
|
|
31
|
-
if (text.length === 0) {
|
|
32
|
-
return {
|
|
33
|
-
isHealthy: false,
|
|
34
|
-
severity: "warning",
|
|
35
|
-
reason: "Output is an empty string",
|
|
36
|
-
patternName: "empty_output",
|
|
37
|
-
};
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// ═══════════════════════════════════════════════════════════════════
|
|
41
|
-
// CRITICAL checks — severe degeneration
|
|
42
|
-
// ═══════════════════════════════════════════════════════════════════
|
|
43
|
-
|
|
44
|
-
// ── 1. Single character repetition ──────────────────────────────
|
|
45
|
-
// 16+ consecutive identical characters
|
|
46
|
-
const singleCharMatch = text.match(/(.)\1{15,}/);
|
|
47
|
-
if (singleCharMatch) {
|
|
48
|
-
return {
|
|
49
|
-
isHealthy: false,
|
|
50
|
-
severity: "critical",
|
|
51
|
-
reason: `Single character repetition detected: "${singleCharMatch[0].slice(0, 20)}..."`,
|
|
52
|
-
patternName: "single_char_repetition",
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// ── 2. Short pattern loop ───────────────────────────────────────
|
|
57
|
-
// 9+ repetitions of a 2-6 character sequence
|
|
58
|
-
const patternLoopMatch = text.match(/(.{2,6})\1{8,}/);
|
|
59
|
-
if (patternLoopMatch) {
|
|
60
|
-
return {
|
|
61
|
-
isHealthy: false,
|
|
62
|
-
severity: "critical",
|
|
63
|
-
reason: `Pattern loop detected: "${patternLoopMatch[0].slice(0, 30)}..."`,
|
|
64
|
-
patternName: "pattern_loop",
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
// ── 3. Excessive box/block drawing characters ───────────────────
|
|
69
|
-
// Unicode box drawing, block elements, and Braille patterns
|
|
70
|
-
const boxDrawChars = text.match(/[\u2500-\u257f\u2580-\u259f\u2800-\u28ff]/g);
|
|
71
|
-
if (boxDrawChars && boxDrawChars.length > 100) {
|
|
72
|
-
const ratio = boxDrawChars.length / text.length;
|
|
73
|
-
if (ratio > 0.3) {
|
|
74
|
-
return {
|
|
75
|
-
isHealthy: false,
|
|
76
|
-
severity: "critical",
|
|
77
|
-
reason: `Visual gibberish detected: ${boxDrawChars.length} box/block chars (${(ratio * 100).toFixed(1)}% of output)`,
|
|
78
|
-
patternName: "visual_gibberish",
|
|
79
|
-
};
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
// ── 4. CJK character spam ─────────────────────────────────────
|
|
84
|
-
// Lots of CJK characters with very few unique ones
|
|
85
|
-
const cjkChars = text.match(/[\u4e00-\u9fff\u3400-\u4dbf]/g);
|
|
86
|
-
if (cjkChars && cjkChars.length > 200) {
|
|
87
|
-
const uniqueCjk = new Set(cjkChars).size;
|
|
88
|
-
if (uniqueCjk < 10 && cjkChars.length / uniqueCjk > 20) {
|
|
89
|
-
return {
|
|
90
|
-
isHealthy: false,
|
|
91
|
-
severity: "critical",
|
|
92
|
-
reason: `CJK character spam detected: ${cjkChars.length} chars, ${uniqueCjk} unique (ratio ${(cjkChars.length / uniqueCjk).toFixed(0)})`,
|
|
93
|
-
patternName: "cjk_spam",
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
// ── 5. Low character diversity ────────────────────────────────
|
|
99
|
-
// General catch-all for text with very few distinct characters
|
|
100
|
-
if (text.length > 200) {
|
|
101
|
-
const cleanText = text.replace(/\s/g, "");
|
|
102
|
-
if (cleanText.length > 0) {
|
|
103
|
-
const uniqueChars = new Set(cleanText).size;
|
|
104
|
-
const diversity = uniqueChars / cleanText.length;
|
|
105
|
-
if (diversity < 0.02) {
|
|
106
|
-
return {
|
|
107
|
-
isHealthy: false,
|
|
108
|
-
severity: "critical",
|
|
109
|
-
reason: `Low information density: ${uniqueChars} unique chars out of ${cleanText.length} (ratio ${diversity.toFixed(4)} < 0.02)`,
|
|
110
|
-
patternName: "low_diversity",
|
|
111
|
-
};
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// ═══════════════════════════════════════════════════════════════════
|
|
117
|
-
// WARNING checks — mild or context-dependent issues
|
|
118
|
-
// ═══════════════════════════════════════════════════════════════════
|
|
119
|
-
|
|
120
|
-
// ── 6. Excessive JSON/error stack lines ─────────────────────────
|
|
121
|
-
const lines = text.split(/\r?\n/);
|
|
122
|
-
let errorStackLineCount = 0;
|
|
123
|
-
const repetitionLines: string[] = [];
|
|
124
|
-
|
|
125
|
-
for (const line of lines) {
|
|
126
|
-
const trimmed = line.trim();
|
|
127
|
-
if (line.includes("Error:") || trimmed.startsWith("at ") || line.includes("Exception:")) {
|
|
128
|
-
errorStackLineCount++;
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
if (trimmed.length > 10) {
|
|
132
|
-
repetitionLines.push(line);
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
if (errorStackLineCount > 5) {
|
|
137
|
-
return {
|
|
138
|
-
isHealthy: false,
|
|
139
|
-
severity: "warning",
|
|
140
|
-
reason: `Error stack bleed detected: ${errorStackLineCount} error/stack lines`,
|
|
141
|
-
patternName: "error_stack_bleed",
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
// ── 7. Line-by-line repetition ──────────────────────────────────
|
|
146
|
-
if (repetitionLines.length > 10) {
|
|
147
|
-
const uniqueLines = new Set(repetitionLines);
|
|
148
|
-
if (uniqueLines.size < repetitionLines.length * 0.2) {
|
|
149
|
-
return {
|
|
150
|
-
isHealthy: false,
|
|
151
|
-
severity: "warning",
|
|
152
|
-
reason: `Excessive line repetition: ${uniqueLines.size} unique lines out of ${repetitionLines.length} (${(uniqueLines.size / repetitionLines.length * 100).toFixed(0)}% unique)`,
|
|
153
|
-
patternName: "line_repetition",
|
|
154
|
-
};
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// ── 8. Empty/tiny output ────────────────────────────────────────
|
|
159
|
-
// Only flag if the entire output is small enough to be suspicious
|
|
160
|
-
// (exclude common status messages like "ok", "done")
|
|
161
|
-
if (text.length < 50 && text.length > 0) {
|
|
162
|
-
const minimalWords = ["ok", "done", "yes", "no", "passed", "failed", "error", "null", "undefined", "true", "false"];
|
|
163
|
-
const trimmed = text.trim().toLowerCase();
|
|
164
|
-
if (!minimalWords.includes(trimmed) && !/^[\d.]+$/.test(trimmed)) {
|
|
165
|
-
return {
|
|
166
|
-
isHealthy: false,
|
|
167
|
-
severity: "warning",
|
|
168
|
-
reason: `Output too short: ${text.length} characters`,
|
|
169
|
-
patternName: "output_too_short",
|
|
170
|
-
};
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
// ── 9. Cross-invocation dedup check ────────────────────────────────
|
|
175
|
-
if (anomalyTracker) {
|
|
176
|
-
const isRepeated = anomalyTracker.trackOutput(text);
|
|
177
|
-
if (isRepeated) {
|
|
178
|
-
return {
|
|
179
|
-
isHealthy: false,
|
|
180
|
-
severity: "warning",
|
|
181
|
-
reason: `Output identical to previous ${anomalyTracker.MAX_IDENTICAL_OUTPUTS} invocations`,
|
|
182
|
-
patternName: "repeated_identical_output",
|
|
183
|
-
};
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// No pattern matched — healthy
|
|
188
|
-
return { isHealthy: true, severity: "ok" };
|
|
189
|
-
}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
// ---------------------------------------------------------------------------
|
|
2
|
-
// Sanity Checker module — barrel export
|
|
3
|
-
// ---------------------------------------------------------------------------
|
|
4
|
-
|
|
5
|
-
export type {
|
|
6
|
-
Severity,
|
|
7
|
-
SanityResult,
|
|
8
|
-
AnomalyRecord,
|
|
9
|
-
AnomalyTrackerConfig,
|
|
10
|
-
} from "./interfaces.ts";
|
|
11
|
-
|
|
12
|
-
export { checkOutputSanity } from "./checker.ts";
|
|
13
|
-
export { AnomalyTracker } from "./anomaly-tracker.ts";
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
// ---------------------------------------------------------------------------
|
|
2
|
-
// Sanity Checker — type definitions
|
|
3
|
-
// ---------------------------------------------------------------------------
|
|
4
|
-
|
|
5
|
-
export type Severity = "ok" | "warning" | "critical";
|
|
6
|
-
|
|
7
|
-
export interface SanityResult {
|
|
8
|
-
isHealthy: boolean;
|
|
9
|
-
severity: Severity;
|
|
10
|
-
reason?: string;
|
|
11
|
-
patternName?: string;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export interface AnomalyRecord {
|
|
15
|
-
sessionId: string;
|
|
16
|
-
count: number;
|
|
17
|
-
lastReason: string;
|
|
18
|
-
lastTimestamp: number;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export interface AnomalyTrackerConfig {
|
|
22
|
-
maxConsecutiveAnomalies: number; // default 2
|
|
23
|
-
escalationMessage: string; // default "recovery: compact context"
|
|
24
|
-
}
|