loreli 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +66 -26
  2. package/package.json +17 -14
  3. package/packages/action/prompts/action.md +172 -0
  4. package/packages/action/src/index.js +33 -5
  5. package/packages/agent/README.md +107 -18
  6. package/packages/agent/src/backends/claude.js +111 -11
  7. package/packages/agent/src/backends/codex.js +78 -5
  8. package/packages/agent/src/backends/cursor.js +104 -27
  9. package/packages/agent/src/backends/index.js +162 -5
  10. package/packages/agent/src/cli.js +80 -3
  11. package/packages/agent/src/discover.js +396 -0
  12. package/packages/agent/src/factory.js +39 -34
  13. package/packages/agent/src/models.js +24 -6
  14. package/packages/classify/README.md +136 -0
  15. package/packages/classify/prompts/blocker.md +12 -0
  16. package/packages/classify/prompts/feedback.md +14 -0
  17. package/packages/classify/prompts/pane-state.md +20 -0
  18. package/packages/classify/src/index.js +81 -0
  19. package/packages/config/README.md +156 -91
  20. package/packages/config/src/defaults.js +32 -21
  21. package/packages/config/src/index.js +33 -2
  22. package/packages/config/src/schema.js +57 -39
  23. package/packages/hub/src/github.js +59 -20
  24. package/packages/identity/README.md +1 -1
  25. package/packages/identity/src/index.js +2 -2
  26. package/packages/knowledge/README.md +86 -106
  27. package/packages/knowledge/src/index.js +56 -225
  28. package/packages/mcp/README.md +51 -7
  29. package/packages/mcp/instructions.md +6 -1
  30. package/packages/mcp/scaffolding/loreli.yml +115 -77
  31. package/packages/mcp/scaffolding/mcp-configs/.codex/config.toml +1 -0
  32. package/packages/mcp/scaffolding/mcp-configs/.cursor/mcp.json +4 -1
  33. package/packages/mcp/scaffolding/mcp-configs/.mcp.json +4 -1
  34. package/packages/mcp/src/index.js +45 -16
  35. package/packages/mcp/src/tools/agent-context.js +44 -0
  36. package/packages/mcp/src/tools/agents.js +34 -13
  37. package/packages/mcp/src/tools/context.js +3 -2
  38. package/packages/mcp/src/tools/github.js +11 -47
  39. package/packages/mcp/src/tools/hitl.js +19 -6
  40. package/packages/mcp/src/tools/index.js +2 -1
  41. package/packages/mcp/src/tools/refactor.js +227 -0
  42. package/packages/mcp/src/tools/repo.js +44 -0
  43. package/packages/mcp/src/tools/start.js +159 -90
  44. package/packages/mcp/src/tools/status.js +5 -2
  45. package/packages/mcp/src/tools/work.js +18 -8
  46. package/packages/orchestrator/src/index.js +345 -79
  47. package/packages/planner/README.md +84 -1
  48. package/packages/planner/prompts/plan-reviewer.md +109 -0
  49. package/packages/planner/prompts/planner.md +191 -0
  50. package/packages/planner/prompts/tiebreaker-reviewer.md +71 -0
  51. package/packages/planner/src/index.js +326 -111
  52. package/packages/review/README.md +2 -2
  53. package/packages/review/prompts/reviewer.md +158 -0
  54. package/packages/review/src/index.js +196 -76
  55. package/packages/risk/README.md +81 -22
  56. package/packages/risk/prompts/risk.md +272 -0
  57. package/packages/risk/src/index.js +44 -33
  58. package/packages/tmux/src/index.js +61 -12
  59. package/packages/workflow/README.md +18 -14
  60. package/packages/workflow/prompts/preamble.md +14 -0
  61. package/packages/workflow/src/index.js +191 -12
  62. package/packages/workspace/README.md +2 -2
  63. package/packages/workspace/src/index.js +69 -18
@@ -4,21 +4,82 @@ Risk assessment workflow for Loreli's orchestration pipeline. Extends the `Workf
4
4
 
5
5
  ## Research Findings
6
6
 
7
- No existing npm packages perform LLM-driven PR risk assessment with label-based routing. This is domain-specific to Loreli's adversarial review model.
7
+ No existing npm packages perform LLM-driven PR risk assessment with label-based routing. This is domain-specific to Loreli's adversarial review model. Calibration techniques (file categorization, test coverage gap detection, numeric anchors, structured per-criterion scoring) were informed by the [PR Impact](https://github.com/ducdmdev/pr-impact) skill.
8
8
 
9
9
  ## How It Works
10
10
 
11
11
  RiskWorkflow runs **before** ReviewWorkflow in the reactor chain. When a new PR appears from an action agent, the risk workflow:
12
12
 
13
- 1. **Assess** — Dispatches an opposing-provider risk agent with the PR's diff, file stats, linked issue body, and planning objective.
13
+ 1. **Assess** — Dispatches an opposing-provider risk agent with the PR's diff, file stats, aggregate stats, linked issue body, and planning objective.
14
14
  2. **Verdict** — Reads the risk agent's verdict comment (posted via the `comment` MCP tool with `risk: true`). The comment tool applies the appropriate GitHub label as a side effect.
15
15
  3. **Route** — Based on the verdict label:
16
16
  - `loreli:low-risk` — PR passes through to `ReviewWorkflow.scan()` for normal reviewer dispatch.
17
17
  - `loreli:medium-risk` — PR passes through with risk context attached to the reviewer prompt.
18
18
  - `loreli:critical-risk` — PR is escalated to HITL. No reviewer is dispatched.
19
+ - `loreli:risk-unassessed` — Assessment failed (dispatch error, timeout, dead agent). Escalated to HITL as a fail-safe.
19
20
 
20
21
  The contract between risk and review is **GitHub labels** — visible, auditable, and filterable. No shared in-memory state is needed.
21
22
 
23
+ ### Fail-Safe Behavior
24
+
25
+ When risk assessment cannot complete (dispatch failure, stall timeout, or dead agent reap), the workflow applies `loreli:risk-unassessed` instead of `loreli:low-risk`. This ensures unassessed PRs are escalated to human review rather than silently passing through to automated review.
26
+
27
+ ## Prompt Structure
28
+
29
+ The risk prompt (`prompts/risk.md`) uses a structured multi-step evaluation process:
30
+
31
+ ### Step 0 — Evidence Sufficiency
32
+
33
+ Before any evaluation, the agent checks whether the diff and context are complete. Truncated diffs or missing context prevent LOW verdicts and may force CRITICAL when combined with destructive signals.
34
+
35
+ ### Step 1 — File Categorization
36
+
37
+ Every changed file is classified into one of five categories:
38
+
39
+ | Category | Examples |
40
+ |----------|----------|
41
+ | **source** | `.js`, `.ts`, `.jsx`, `.tsx`, `.mjs`, `.cjs` (excluding tests) |
42
+ | **test** | Files in `test/`, `__tests__/`, or matching `*.test.*`, `*.spec.*` |
43
+ | **docs** | `.md`, `.mdx`, `README`, `CHANGELOG` |
44
+ | **config** | `package.json`, lock files, CI workflows, `Dockerfile`, `tsconfig.json` |
45
+ | **other** | Assets, generated files, data |
46
+
47
+ Tallies are referenced by subsequent criteria.
48
+
49
+ ### Step 2 — Evaluate Criteria
50
+
51
+ Seven criteria are evaluated in order, each assigned a signal level:
52
+
53
+ | Level | Meaning |
54
+ |-------|---------|
55
+ | **none** | No concern detected |
56
+ | **minor** | Small signal, unlikely to affect verdict |
57
+ | **elevated** | Meaningful concern, contributes to MEDIUM |
58
+ | **severe** | Strong signal, contributes to CRITICAL or demands justification |
59
+
60
+ The criteria:
61
+
62
+ 1. **Proportionality** — Are changes proportional to the objective? Calibration: >20 files = large, >500 lines = substantial, >1000 = outsized.
63
+ 2. **Destructiveness** — Does the PR remove significant code without replacement?
64
+ 3. **Infrastructure Impact** — Does it modify files with outsized blast radius (`package.json`, lock files, CI, `Dockerfile`, etc.)?
65
+ 4. **Reversibility** — How difficult to undo?
66
+ 5. **Scope Creep** — Changes unrelated to the issue?
67
+ 6. **Documentation Gap** — Architectural changes without corresponding docs?
68
+ 7. **Test Coverage Gap** — Source changes without corresponding test file changes? Zero test files alongside source changes = elevated at minimum.
69
+
70
+ ### Hard Escalation Triggers
71
+
72
+ CRITICAL is assigned immediately when any of these hold:
73
+
74
+ 1. Objective requests minor/docs-only work but PR is severely destructive or infrastructure-impacting.
75
+ 2. Critical infrastructure/security files are removed without objective support.
76
+ 3. Two or more criteria rated severe.
77
+ 4. Evidence is incomplete with elevated/severe destructiveness or infrastructure risk.
78
+
79
+ ### Step 3 — Structured Verdict
80
+
81
+ The agent posts a structured comment containing file category tallies, per-criterion signal levels with evidence, escalation triggers fired, and a verdict paragraph.
82
+
22
83
  ## API Reference
23
84
 
24
85
  ### `RiskWorkflow` (extends Workflow)
@@ -41,13 +102,13 @@ const risk = new RiskWorkflow(orchestrator, hub);
41
102
  #### `risk.assess(repo)` → Promise\<Array\<{pr, agent}\>\>
42
103
 
43
104
  Scan for new PRs from action agents and dispatch risk agents. Skips PRs that:
44
- - Already have a risk label (previously assessed)
105
+ - Already have a risk label or `risk-unassessed` label
45
106
  - Are currently being assessed (`_assessing` map)
46
107
  - Were already assessed (`_assessed` set)
47
108
 
48
- Returns early with an empty array when `review.skipRiskAssessment` is `true`.
109
+ Returns early with an empty array when `workflows.risk.skip` is `true`.
49
110
 
50
- When enlisting a risk agent fails, the PR is marked as assessed so `ReviewWorkflow.scan()` can proceed without the risk gate blocking it.
111
+ When dispatch fails, the PR receives `loreli:risk-unassessed` (fail-safe) and is marked assessed so `ReviewWorkflow.scan()` can escalate to HITL.
51
112
 
52
113
  #### `risk.verdict(repo)` → Promise\<Array\<{pr, level}\>\>
53
114
 
@@ -57,11 +118,11 @@ Check for risk verdicts on PRs being assessed. For each PR in `_assessing`:
57
118
  - Kills the risk agent
58
119
  - Moves the PR from `_assessing` to `_assessed`
59
120
 
60
- If the stall timeout is exceeded without a verdict, the PR is marked as assessed with level `TIMEOUT`.
121
+ If the stall timeout is exceeded without a verdict, the PR receives `loreli:risk-unassessed` (fail-safe) and is marked assessed with level `TIMEOUT`.
61
122
 
62
123
  #### `risk.reap(repo)` → Promise\<void\>
63
124
 
64
- Clean up stale risk assessments. Risk agents that die without posting a verdict leave PRs stuck in `_assessing`. This handler checks for dead agents (no longer in the orchestrator's agents map), marks their PRs as assessed, and applies a fallback `loreli:low-risk` label so `ReviewWorkflow.scan()` can proceed. Without the fallback label, the PR would be assessed in memory but unlabeled on GitHub — permanently skipped by the review label gate.
125
+ Clean up stale risk assessments. Risk agents that die without posting a verdict leave PRs stuck in `_assessing`. This handler checks for dead agents (no longer in the orchestrator's agents map), marks their PRs as assessed, and applies `loreli:risk-unassessed` so `ReviewWorkflow.scan()` escalates to HITL. Without the fail-safe label, the PR would be assessed in memory but unlabeled on GitHub — permanently skipped by the review label gate.
65
126
 
66
127
  #### `risk.closesIssue(body)` → number|null
67
128
 
@@ -91,29 +152,27 @@ These must be registered **before** review handlers in the orchestrator so label
91
152
 
92
153
  | Key | Type | Default | Description |
93
154
  |-----|------|---------|-------------|
94
- | `review.skipRiskAssessment` | `boolean` | `false` | When `true`, disables the risk workflow entirely. `assess()` returns early and `ReviewWorkflow.scan()` skips the label gate. |
95
-
96
- ## Prompt Template
155
+ | `workflows.risk.skip` | `boolean` | `false` | When `true`, disables the risk workflow entirely. `assess()` returns early and `ReviewWorkflow.scan()` skips the label gate. |
97
156
 
98
- The risk prompt (`prompts/risk.md`) provides the risk agent with:
99
- - The original planning objective
100
- - The linked issue body
101
- - PR metadata (number, title, branch, author)
102
- - File change stats (filename, status, additions, deletions)
103
- - Unified diff
157
+ ## Label Contract
104
158
 
105
- The agent posts its verdict using the `comment` MCP tool with `risk: true` and the appropriate `level`. The comment tool applies the `loreli:{level}-risk` label automatically.
159
+ | Label | Applied By | Meaning | Review Routing |
160
+ |-------|-----------|---------|----------------|
161
+ | `loreli:low-risk` | Comment tool (agent verdict) | PR cleared | Normal reviewer dispatch |
162
+ | `loreli:medium-risk` | Comment tool (agent verdict) | PR cleared with concerns | Reviewer dispatch with risk context warning |
163
+ | `loreli:critical-risk` | Comment tool (agent verdict) | PR dangerous | Escalated to HITL, no reviewer |
164
+ | `loreli:risk-unassessed` | RiskWorkflow (fail-safe) | Assessment failed | Escalated to HITL, no reviewer |
106
165
 
107
166
  ## Errors
108
167
 
109
168
  | Error | When | Resolution |
110
169
  |-------|------|------------|
111
- | Enlist failure | No opposing-provider backend available | PR marked as assessed; review proceeds without risk gate |
112
- | Dispatch failure | Context gathering (diff, files, issue) fails | Risk agent killed; PR marked as assessed |
113
- | Stall timeout | Risk agent doesn't respond within stall timeout | PR marked as assessed with `TIMEOUT` level |
170
+ | Enlist failure | No opposing-provider backend available | PR labeled `risk-unassessed`; escalated to HITL |
171
+ | Dispatch failure | Context gathering (diff, files, issue) fails | Risk agent killed; PR labeled `risk-unassessed` |
172
+ | Stall timeout | Risk agent doesn't respond within stall timeout | PR labeled `risk-unassessed` with `TIMEOUT` level |
114
173
 
115
174
  ## Scope Boundary
116
175
 
117
- **In scope**: Risk agent dispatch, verdict parsing, label routing, stale assessment cleanup, objective resolution, issue linkage parsing.
176
+ **In scope**: Risk agent dispatch, verdict parsing, label routing, stale assessment cleanup, objective resolution, issue linkage parsing, fail-safe escalation for unassessed PRs.
118
177
 
119
- **Out of scope**: Reviewer dispatch (review package), label application (comment tool in mcp package), HITL escalation mechanics (review package).
178
+ **Out of scope**: Reviewer dispatch (review package), label application for verdicts (comment tool in mcp package), HITL escalation mechanics (review package).
@@ -0,0 +1,272 @@
1
+ You are a risk assessment agent evaluating a pull request for safety before a reviewer is dispatched.
2
+
3
+ <instructions>
4
+
5
+ ## Task
6
+
7
+ Analyze the PR below and render a risk verdict: **LOW**, **MEDIUM**, or **CRITICAL**.
8
+
9
+ - **LOW**: Changes are proportional, non-destructive, well-scoped, and adequately tested. Proceed to normal review.
10
+ - **MEDIUM**: Some concerns exist (large scope, infrastructure files touched, moderate deletion, missing test coverage) but the changes appear justified. Proceed to review with a risk warning attached.
11
+ - **CRITICAL**: The PR is disproportionate, destructive, or dangerous. Escalate to a human immediately. Do NOT proceed to automated review.
12
+
13
+ Your verdict determines the PR's path: LOW/MEDIUM proceed to review, CRITICAL escalates to a human. Be calibrated — false CRITICAL verdicts block work, false LOW verdicts let dangerous changes through to automated review.
14
+
15
+ ## Step 0 — Evidence Sufficiency (Fail-Safe)
16
+
17
+ Before assigning any level, check whether the provided evidence is complete enough for a confident decision.
18
+
19
+ - If the unified diff includes a truncation marker (for example, `... [diff truncated at N bytes]`), evidence is incomplete.
20
+ - If objective or issue context is missing/placeholder-only, evidence is incomplete.
21
+ - If file changes appear disproportionately large relative to visible diff content, treat evidence as incomplete.
22
+
23
+ Rules:
24
+ - With incomplete evidence, you **must not** assign LOW.
25
+ - If incomplete evidence combines with elevated/severe destructiveness or infrastructure signals, assign **CRITICAL**.
26
+ - Otherwise, assign **MEDIUM** and include a warning that escalation may be required after full diff inspection.
27
+
28
+ ## Step 1 — Categorize Files
29
+
30
+ Before evaluating risk, classify every file in the File Changes list into one of these categories:
31
+
32
+ - **source**: Production code (`.js`, `.ts`, `.jsx`, `.tsx`, `.mjs`, `.cjs` — excluding test files)
33
+ - **test**: Files in `test/`, `__tests__/`, or matching `*.test.*`, `*.spec.*`
34
+ - **docs**: Documentation (`.md`, `.mdx`, `README`, `CHANGELOG`)
35
+ - **config**: `package.json`, lock files, `.github/workflows/*`, `Dockerfile`, `tsconfig.json`, `.eslintrc.*`, build/bundler configs
36
+ - **other**: Everything else (assets, generated files, data)
37
+
38
+ Tally each category with its aggregate additions and deletions. You will reference these tallies throughout the evaluation.
39
+
40
+ ## Step 2 — Evaluate Criteria
41
+
42
+ Evaluate each criterion below in order. For each, assign a signal level and cite specific evidence from the file list and diff:
43
+
44
+ - **none**: No concern detected.
45
+ - **minor**: Small signal, unlikely to affect the verdict.
46
+ - **elevated**: Meaningful concern that contributes to a MEDIUM verdict.
47
+ - **severe**: Strong signal that contributes to CRITICAL or demands justification to avoid it.
48
+
49
+ ### 1. Proportionality
50
+
51
+ Are the changes proportional to the objective and issue?
52
+
53
+ Calibration anchors:
54
+ - **>20 files touched** is large scope — justified only if the objective is broad (migration, rename, new package).
55
+ - **>500 net lines changed** is a substantial PR. >1000 is outsized unless the objective explicitly calls for it.
56
+ - A PR touching 50 files for a one-line bug fix is disproportionate. A PR deleting an entire directory is acceptable if the objective explicitly requests it.
57
+
58
+ ### 2. Destructiveness
59
+
60
+ Does the PR remove significant code, files, or directories?
61
+
62
+ Mass deletion must be justified by the objective and issue. Removing test fixtures, generated files, or explicitly requested removals is acceptable. Deleting production source without replacement or migration is severe.
63
+
64
+ ### 3. Infrastructure Impact
65
+
66
+ Does the PR modify files with outsized blast radius?
67
+
68
+ Watch for these specifically: `package.json`, `pnpm-lock.yaml`/`package-lock.json`, `.github/workflows/*`, `Dockerfile`, `tsconfig.json`, `.eslintrc.*`, `.env*`, `docker-compose.yml`. A broken CI config blocks all work; a broken lock file breaks every developer's install.
69
+
70
+ If the config category from Step 1 contains modified files, this criterion is elevated at minimum. Justify if the objective warrants it.
71
+
72
+ ### 4. Reversibility
73
+
74
+ How difficult would it be to undo this change?
75
+
76
+ A single file edit is trivially reversible. Deleting an entire package with its history is not. Renaming or moving files across directories is moderately reversible (git tracks renames, but downstream references break).
77
+
78
+ ### 5. Scope Creep
79
+
80
+ Does the PR introduce changes unrelated to the issue?
81
+
82
+ Compare every changed file against the objective and issue. Unscoped work bypasses the planning process and creates conflicts with parallel agents. Opportunistic cleanup in touched files is acceptable; touching unrelated subsystems is not.
83
+
84
+ ### 6. Documentation Gap
85
+
86
+ Do architectural or API changes have corresponding documentation updates?
87
+
88
+ Check: if source files introduce new exports, public functions, config keys, or structural changes — do any docs-category files appear in the file list? Architectural changes without documentation carry elevated risk because future agents and humans lack context for the decisions.
89
+
90
+ ### 7. Test Coverage Gap
91
+
92
+ Do source file changes have corresponding test file changes?
93
+
94
+ Check the file categorization from Step 1. If source files changed, test files should also appear in the changeset. This is the single strongest signal of change confidence.
95
+
96
+ Calibration anchors:
97
+ - If **zero test files** changed alongside source changes, this is elevated at minimum.
98
+ - If **>50% of changed source files** have no corresponding test file change (e.g., `src/foo.js` changed but no `test/foo.test.js` in the list), signal elevated.
99
+ - Docs-only or config-only PRs are exempt from this criterion.
100
+
101
+ ## Hard Escalation Triggers
102
+
103
+ Assign **CRITICAL** immediately when any of the following hold, even if some other criteria are mild:
104
+
105
+ 1. Objective/issue requests minor or docs-only work, but PR performs severe destructive or infrastructure-impacting changes.
106
+ 2. Critical infrastructure/security files are removed or dangerously modified without explicit objective support (e.g., package manifest/lockfiles, CI workflows, auth/security config, deployment config).
107
+ 3. Two or more criteria are rated **severe**.
108
+ 4. Evidence is incomplete and visible changes already show elevated/severe destructiveness or infrastructure risk.
109
+
110
+ ## Step 3 — Render Verdict
111
+
112
+ Synthesize your per-criterion findings into a final verdict. Use the **comment** tool with `risk: true` and the appropriate `level`.
113
+
114
+ Structure your comment body as:
115
+
116
+ ### File Categories
117
+ - source: N files (+X −Y)
118
+ - test: N files (+X −Y)
119
+ - docs: N files (+X −Y)
120
+ - config: N files (+X −Y)
121
+ - other: N files (+X −Y)
122
+
123
+ ### Evaluation
124
+ 1. **Proportionality**: [none/minor/elevated/severe] — evidence
125
+ 2. **Destructiveness**: [none/minor/elevated/severe] — evidence
126
+ 3. **Infrastructure**: [none/minor/elevated/severe] — evidence
127
+ 4. **Reversibility**: [none/minor/elevated/severe] — evidence
128
+ 5. **Scope creep**: [none/minor/elevated/severe] — evidence
129
+ 6. **Documentation gap**: [none/minor/elevated/severe] — evidence
130
+ 7. **Test coverage gap**: [none/minor/elevated/severe] — evidence
131
+
132
+ ### Escalation Triggers Fired
133
+ - List any hard escalation triggers that fired.
134
+ - If none fired, state: `None`.
135
+
136
+ ### Verdict: [LOW/MEDIUM/CRITICAL]
137
+ One paragraph synthesizing the above into your routing decision. If MEDIUM, include a **Warning for reviewer** line highlighting what the reviewer should pay attention to.
138
+
139
+ </instructions>
140
+
141
+ <examples>
142
+
143
+ <example title="LOW — proportional, tested, non-destructive">
144
+ ### File Categories
145
+ - source: 1 file (+80 −3)
146
+ - test: 1 file (+65 −0)
147
+ - docs: 0 files
148
+ - config: 0 files
149
+
150
+ ### Evaluation
151
+ 1. **Proportionality**: none — 2 files, +145 −3 lines for a retry logic feature. Well-scoped.
152
+ 2. **Destructiveness**: none — 3 lines removed (replaced with retry wrapper). No deletions of substance.
153
+ 3. **Infrastructure**: none — no config files touched.
154
+ 4. **Reversibility**: none — single module change, trivially revertable.
155
+ 5. **Scope creep**: none — all changes relate to the retry objective.
156
+ 6. **Documentation gap**: minor — no README update for the new retry config option, but the feature is internal. Acceptable for LOW.
157
+ 7. **Test coverage gap**: none — `test/client.test.js` added with 65 new lines covering retry scenarios.
158
+
159
+ ### Escalation Triggers Fired
160
+ None.
161
+
162
+ ### Verdict: LOW
163
+ Changes are proportional to the objective, non-destructive, scoped, and tested. Proceed to normal review.
164
+ </example>
165
+
166
+ <example title="MEDIUM — justified scope but missing test coverage">
167
+ ### File Categories
168
+ - source: 8 files (+320 −45)
169
+ - test: 0 files (+0 −0)
170
+ - docs: 1 file (+12 −0)
171
+ - config: 0 files
172
+
173
+ ### Evaluation
174
+ 1. **Proportionality**: minor — 9 files for a new caching layer is reasonable scope, though on the larger side.
175
+ 2. **Destructiveness**: none — 45 lines removed are replaced with the new cache calls. No mass deletion.
176
+ 3. **Infrastructure**: none — no config files touched.
177
+ 4. **Reversibility**: minor — changes span 8 source files but each is a contained edit.
178
+ 5. **Scope creep**: none — all changes relate to the caching objective.
179
+ 6. **Documentation gap**: none — README updated with cache configuration docs.
180
+ 7. **Test coverage gap**: elevated — 8 source files changed with zero test files in the changeset. No `test/cache.test.js` or updates to existing test files. The caching layer introduces new behavior paths that are untested.
181
+
182
+ ### Escalation Triggers Fired
183
+ None.
184
+
185
+ ### Verdict: MEDIUM
186
+ The implementation is proportional and well-documented, but the complete absence of test coverage for 8 changed source files is a significant gap. The caching layer introduces TTL expiry, invalidation, and fallback paths — all of which need test coverage.
187
+
188
+ **Warning for reviewer**: Verify test coverage exists or request it. Pay special attention to cache invalidation edge cases and TTL boundary behavior.
189
+ </example>
190
+
191
+ <example title="MEDIUM — justified but elevated blast radius">
192
+ ### File Categories
193
+ - source: 20 files (+150 −140)
194
+ - test: 3 files (+45 −40)
195
+ - docs: 0 files
196
+ - config: 2 files (+8 −6)
197
+
198
+ ### Evaluation
199
+ 1. **Proportionality**: minor — 25 files for an ESM migration is proportional to the objective.
200
+ 2. **Destructiveness**: none — line counts are balanced (+150 −140), indicating transformation rather than deletion.
201
+ 3. **Infrastructure**: elevated — `package.json` modified (type: module) and `jest.config.js` updated. The `package.json` change affects all imports project-wide.
202
+ 4. **Reversibility**: minor — ESM migration is reversible but would require touching all the same files again.
203
+ 5. **Scope creep**: none — all changes are ESM-related.
204
+ 6. **Documentation gap**: elevated — no docs updated to reflect the ESM migration. Import examples in README may now be wrong.
205
+ 7. **Test coverage gap**: minor — 3 test files updated out of 20 source files, but most source changes are mechanical import/export syntax.
206
+
207
+ ### Escalation Triggers Fired
208
+ None.
209
+
210
+ ### Verdict: MEDIUM
211
+ The ESM migration is proportional to the objective, but infrastructure files carry outsized blast radius and documentation hasn't been updated.
212
+
213
+ **Warning for reviewer**: Verify that all import paths use `.js` extensions, that the test runner config is correct for ESM, and that README examples reflect ESM syntax.
214
+ </example>
215
+
216
+ <example title="CRITICAL — disproportionate destruction">
217
+ ### File Categories
218
+ - source: 0 files (+0 −2,400)
219
+ - test: 0 files
220
+ - docs: 1 file (+1 −0)
221
+ - config: 1 file (+0 −35)
222
+
223
+ ### Evaluation
224
+ 1. **Proportionality**: severe — the issue requests fixing a typo in the README. The PR deletes the entire `src/` directory and `package.json`.
225
+ 2. **Destructiveness**: severe — 2,400 lines of production code removed. `package.json` deleted entirely.
226
+ 3. **Infrastructure**: severe — `package.json` removal breaks the entire package.
227
+ 4. **Reversibility**: severe — directory deletion with all contents. History exists in git but recovery requires manual reconstruction.
228
+ 5. **Scope creep**: severe — typo fix does not justify any source or config changes.
229
+ 6. **Documentation gap**: none — N/A given the destruction.
230
+ 7. **Test coverage gap**: none — N/A, no source files added.
231
+
232
+ ### Escalation Triggers Fired
233
+ 1. Objective requests minor docs-only work but PR performs severe destructive changes.
234
+ 2. Critical infrastructure files removed without objective support (`package.json`).
235
+ 3. Four criteria rated severe (proportionality, destructiveness, infrastructure, reversibility).
236
+
237
+ ### Verdict: CRITICAL
238
+ The destruction is completely disproportionate to the objective. A one-line typo fix does not justify deleting the entire source tree and package manifest. Escalate immediately.
239
+ </example>
240
+
241
+ </examples>
242
+
243
+ <context>
244
+
245
+ ### Original Objective
246
+
247
+ {{{objective}}}
248
+
249
+ ### Issue
250
+
251
+ {{{issue}}}
252
+
253
+ ### PR Metadata
254
+
255
+ - **PR**: #{{number}} — {{title}}
256
+ - **Branch**: `{{head}}` → `{{base}}`
257
+ - **Author**: {{author}} ({{authorProvider}})
258
+ - **Stats**: {{stats.total}} files changed, +{{stats.additions}} −{{stats.deletions}} lines
259
+
260
+ ### File Changes
261
+
262
+ {{#files}}
263
+ - `{{{filename}}}` ({{status}}) +{{additions}} −{{deletions}}
264
+ {{/files}}
265
+
266
+ ### Unified Diff
267
+
268
+ ```diff
269
+ {{{diff}}}
270
+ ```
271
+
272
+ </context>
@@ -27,6 +27,7 @@ const CLOSES_RE = /(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s+#(\d+)/i;
27
27
  * - `loreli:low-risk` — PR cleared for normal review
28
28
  * - `loreli:medium-risk` — PR cleared with risk context warning
29
29
  * - `loreli:critical-risk` — PR escalated to HITL, no reviewer dispatched
30
+ * - `loreli:risk-unassessed` — assessment failed; escalated to HITL (fail-safe)
30
31
  *
31
32
  * @extends Workflow
32
33
  */
@@ -61,7 +62,7 @@ export class RiskWorkflow extends Workflow {
61
62
  * @returns {Promise<{workload: number, supply: number, deficit: number}>}
62
63
  */
63
64
  async demand(repo) {
64
- const skip = this.orchestrator.cfg?.get?.('review.skipRiskAssessment') ?? false;
65
+ const skip = this.orchestrator.cfg?.get?.('workflows.risk.skip') ?? false;
65
66
  if (skip) return { workload: 0, supply: 0, deficit: 0 };
66
67
 
67
68
  const prs = await this.hub.pulls(repo, { state: 'open' });
@@ -73,7 +74,7 @@ export class RiskWorkflow extends Workflow {
73
74
 
74
75
  const hasRiskLabel = pr.labels?.some(function isRisk(l) {
75
76
  const name = l.name ?? l;
76
- return name.endsWith('-risk') && name.startsWith('loreli:');
77
+ return (name.endsWith('-risk') || name === 'loreli:risk-unassessed') && name.startsWith('loreli:');
77
78
  });
78
79
  if (hasRiskLabel) continue;
79
80
 
@@ -127,7 +128,7 @@ export class RiskWorkflow extends Workflow {
127
128
 
128
129
  const hasRiskLabel = pr.labels?.some(function isRisk(l) {
129
130
  const name = l.name ?? l;
130
- return name.endsWith('-risk') && name.startsWith('loreli:');
131
+ return (name.endsWith('-risk') || name === 'loreli:risk-unassessed') && name.startsWith('loreli:');
131
132
  });
132
133
  if (hasRiskLabel) {
133
134
  this._assessed.add(pr.number);
@@ -206,7 +207,7 @@ export class RiskWorkflow extends Workflow {
206
207
  * @returns {Promise<Array<{pr: number, agent: string}>>}
207
208
  */
208
209
  async assess(repo) {
209
- const skip = this.orchestrator.cfg?.get?.('review.skipRiskAssessment') ?? false;
210
+ const skip = this.orchestrator.cfg?.get?.('workflows.risk.skip') ?? false;
210
211
  if (skip) return [];
211
212
 
212
213
  const dispatched = [];
@@ -219,9 +220,10 @@ export class RiskWorkflow extends Workflow {
219
220
  if (this._assessed.has(pr.number)) continue;
220
221
 
221
222
  // Skip PRs that already have a risk label (from a previous assessment)
223
+ // or a risk-unassessed label (from a failed assessment)
222
224
  const hasRiskLabel = pr.labels?.some(function isRisk(l) {
223
225
  const name = l.name ?? l;
224
- return name.endsWith('-risk') && name.startsWith('loreli:');
226
+ return (name.endsWith('-risk') || name === 'loreli:risk-unassessed') && name.startsWith('loreli:');
225
227
  });
226
228
  if (hasRiskLabel) continue;
227
229
 
@@ -281,6 +283,21 @@ export class RiskWorkflow extends Workflow {
281
283
  pr: pr.number
282
284
  });
283
285
 
286
+ const mapped = files.map(function fmt(f) {
287
+ return {
288
+ filename: f.filename,
289
+ status: f.status,
290
+ additions: f.additions ?? 0,
291
+ deletions: f.deletions ?? 0
292
+ };
293
+ });
294
+
295
+ const stats = {
296
+ total: mapped.length,
297
+ additions: mapped.reduce(function sum(s, f) { return s + f.additions; }, 0),
298
+ deletions: mapped.reduce(function sum(s, f) { return s + f.deletions; }, 0)
299
+ };
300
+
284
301
  const prompt = await this.render({
285
302
  objective: obj,
286
303
  issue: issueBody,
@@ -290,14 +307,8 @@ export class RiskWorkflow extends Workflow {
290
307
  base: pr.base,
291
308
  author: agent.identity.name,
292
309
  authorProvider: agent.identity.provider,
293
- files: files.map(function fmt(f) {
294
- return {
295
- filename: f.filename,
296
- status: f.status,
297
- additions: f.additions ?? 0,
298
- deletions: f.deletions ?? 0
299
- };
300
- }),
310
+ files: mapped,
311
+ stats,
301
312
  diff
302
313
  });
303
314
 
@@ -313,13 +324,14 @@ export class RiskWorkflow extends Workflow {
313
324
  log.warn(`assess: dispatch failed for PR #${pr.number}: ${err.message}`);
314
325
  try { await this.orchestrator.kill(riskAgent.identity.name); } catch { /* best-effort */ }
315
326
 
316
- // Apply fallback low-risk label so review's label gate passes
327
+ // Fail-safe: unassessed PRs must not pass as low-risk.
328
+ // The risk-unassessed label routes to HITL in ReviewWorkflow.scan().
317
329
  try {
318
- const label = 'loreli:low-risk';
319
- await this.hub.ensure(repo, [{ name: label, color: '0e8a16', description: 'Risk: LOW' }]);
330
+ const label = 'loreli:risk-unassessed';
331
+ await this.hub.ensure(repo, [{ name: label, color: 'e11d48', description: 'Risk: assessment failed — requires human review' }]);
320
332
  await this.hub.label(repo, pr.number, [label]);
321
- log.info(`assess: applied fallback ${label} to PR #${pr.number}`);
322
- } catch { /* best-effort — review may still proceed on next scan */ }
333
+ log.info(`assess: applied fail-safe ${label} to PR #${pr.number}`);
334
+ } catch { /* best-effort */ }
323
335
 
324
336
  this._assessed.add(pr.number);
325
337
  }
@@ -352,22 +364,22 @@ export class RiskWorkflow extends Workflow {
352
364
  for (const [prNum, tracking] of this._assessing) {
353
365
  const elapsed = now - tracking.dispatchedAt;
354
366
 
355
- // Stall timeout — risk agent didn't respond. Apply a fallback
356
- // low-risk label so ReviewWorkflow.scan() can proceed — without
357
- // a label the PR stays gated indefinitely.
367
+ // Stall timeout — risk agent didn't respond. Fail-safe: apply
368
+ // risk-unassessed label so ReviewWorkflow escalates to HITL
369
+ // instead of silently passing as low-risk.
358
370
  if (elapsed > stallTimeout) {
359
- log.warn(`verdict: risk assessment timed out for PR #${prNum} — applying low-risk fallback`);
371
+ log.warn(`verdict: risk assessment timed out for PR #${prNum} — applying fail-safe label`);
360
372
  this._assessing.delete(prNum);
361
373
  this._assessed.add(prNum);
362
374
  try { await this.orchestrator.kill(tracking.riskAgent); } catch { /* best-effort */ }
363
375
 
364
376
  try {
365
- const label = 'loreli:low-risk';
366
- await this.hub.ensure(repo, [{ name: label, color: '0e8a16', description: 'Risk: LOW' }]);
377
+ const label = 'loreli:risk-unassessed';
378
+ await this.hub.ensure(repo, [{ name: label, color: 'e11d48', description: 'Risk: assessment failed — requires human review' }]);
367
379
  await this.hub.label(repo, prNum, [label]);
368
- log.info(`verdict: applied fallback ${label} to PR #${prNum}`);
380
+ log.info(`verdict: applied fail-safe ${label} to PR #${prNum}`);
369
381
  } catch (err) {
370
- log.warn(`verdict: failed to apply fallback label to PR #${prNum}: ${err.message}`);
382
+ log.warn(`verdict: failed to apply fail-safe label to PR #${prNum}: ${err.message}`);
371
383
  }
372
384
 
373
385
  results.push({ pr: prNum, level: 'TIMEOUT' });
@@ -411,16 +423,15 @@ export class RiskWorkflow extends Workflow {
411
423
  this._assessing.delete(prNum);
412
424
  this._assessed.add(prNum);
413
425
 
414
- // Apply fallback low-risk label so ReviewWorkflow's label gate
415
- // can proceed. Without this, the PR enters a dead zone: assessed
416
- // in memory but unlabeled on GitHub, permanently skipped by scan().
426
+ // Fail-safe: unassessed PRs must not pass as low-risk.
427
+ // The risk-unassessed label routes to HITL in ReviewWorkflow.scan().
417
428
  try {
418
- const label = 'loreli:low-risk';
419
- await this.hub.ensure(repo, [{ name: label, color: '0e8a16', description: 'Risk: LOW' }]);
429
+ const label = 'loreli:risk-unassessed';
430
+ await this.hub.ensure(repo, [{ name: label, color: 'e11d48', description: 'Risk: assessment failed — requires human review' }]);
420
431
  await this.hub.label(repo, prNum, [label]);
421
- log.info(`reap: applied fallback ${label} to PR #${prNum}`);
432
+ log.info(`reap: applied fail-safe ${label} to PR #${prNum}`);
422
433
  } catch (err) {
423
- log.warn(`reap: failed to apply fallback label to PR #${prNum}: ${err.message}`);
434
+ log.warn(`reap: failed to apply fail-safe label to PR #${prNum}: ${err.message}`);
424
435
  }
425
436
  }
426
437
  }