llm-mock-server 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/.claude/skills/desloppify/SKILL.md +308 -0
  2. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/canonical_import_20260315_000801.json +242 -0
  3. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/canonical_import_20260315_000905.json +248 -0
  4. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/canonical_import_20260315_000917.json +248 -0
  5. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/canonical_import_20260315_000950.json +311 -0
  6. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/claude_launch_prompt.md +17 -0
  7. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/review_result.json +255 -0
  8. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/review_result.template.json +22 -0
  9. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/reviewer_instructions.md +20 -0
  10. package/.desloppify/external_review_sessions/ext_20260315_000339_a6cdc3e6/session.json +20 -0
  11. package/.desloppify/query.json +284 -0
  12. package/.desloppify/review_packet_blind.json +1303 -0
  13. package/.desloppify/review_packets/holistic_packet_20260315_000339.json +1471 -0
  14. package/.desloppify/state-typescript.json +5114 -0
  15. package/.desloppify/state-typescript.json.bak +5108 -0
  16. package/dist/cli.js +5 -2
  17. package/dist/cli.js.map +1 -1
  18. package/dist/formats/anthropic/index.js +1 -1
  19. package/dist/formats/anthropic/index.js.map +1 -1
  20. package/dist/formats/anthropic/parse.d.ts +1 -1
  21. package/dist/formats/anthropic/parse.d.ts.map +1 -1
  22. package/dist/formats/anthropic/parse.js +1 -1
  23. package/dist/formats/anthropic/parse.js.map +1 -1
  24. package/dist/formats/anthropic/serialize.d.ts +2 -2
  25. package/dist/formats/anthropic/serialize.d.ts.map +1 -1
  26. package/dist/formats/anthropic/serialize.js +6 -3
  27. package/dist/formats/anthropic/serialize.js.map +1 -1
  28. package/dist/formats/openai/index.js +1 -1
  29. package/dist/formats/openai/index.js.map +1 -1
  30. package/dist/formats/openai/parse.d.ts +1 -1
  31. package/dist/formats/openai/parse.d.ts.map +1 -1
  32. package/dist/formats/openai/parse.js +1 -1
  33. package/dist/formats/openai/parse.js.map +1 -1
  34. package/dist/formats/openai/serialize.d.ts +2 -2
  35. package/dist/formats/openai/serialize.d.ts.map +1 -1
  36. package/dist/formats/openai/serialize.js +12 -15
  37. package/dist/formats/openai/serialize.js.map +1 -1
  38. package/dist/formats/request-helpers.d.ts +13 -0
  39. package/dist/formats/request-helpers.d.ts.map +1 -0
  40. package/dist/formats/request-helpers.js +28 -0
  41. package/dist/formats/request-helpers.js.map +1 -0
  42. package/dist/formats/responses/index.js +1 -1
  43. package/dist/formats/responses/index.js.map +1 -1
  44. package/dist/formats/responses/parse.d.ts +1 -1
  45. package/dist/formats/responses/parse.d.ts.map +1 -1
  46. package/dist/formats/responses/parse.js +1 -1
  47. package/dist/formats/responses/parse.js.map +1 -1
  48. package/dist/formats/responses/schema.d.ts +1 -20
  49. package/dist/formats/responses/schema.d.ts.map +1 -1
  50. package/dist/formats/responses/schema.js.map +1 -1
  51. package/dist/formats/responses/serialize.d.ts +2 -2
  52. package/dist/formats/responses/serialize.d.ts.map +1 -1
  53. package/dist/formats/responses/serialize.js +6 -3
  54. package/dist/formats/responses/serialize.js.map +1 -1
  55. package/dist/formats/serialize-helpers.d.ts +14 -0
  56. package/dist/formats/serialize-helpers.d.ts.map +1 -0
  57. package/dist/formats/serialize-helpers.js +25 -0
  58. package/dist/formats/serialize-helpers.js.map +1 -0
  59. package/dist/formats/types.d.ts +3 -3
  60. package/dist/formats/types.d.ts.map +1 -1
  61. package/dist/loader.d.ts +3 -2
  62. package/dist/loader.d.ts.map +1 -1
  63. package/dist/loader.js +6 -9
  64. package/dist/loader.js.map +1 -1
  65. package/dist/logger.d.ts +1 -0
  66. package/dist/logger.d.ts.map +1 -1
  67. package/dist/logger.js +17 -23
  68. package/dist/logger.js.map +1 -1
  69. package/dist/mock-server.d.ts.map +1 -1
  70. package/dist/mock-server.js +8 -15
  71. package/dist/mock-server.js.map +1 -1
  72. package/dist/route-handler.d.ts +2 -1
  73. package/dist/route-handler.d.ts.map +1 -1
  74. package/dist/rule-engine.d.ts +12 -1
  75. package/dist/rule-engine.d.ts.map +1 -1
  76. package/dist/rule-engine.js +14 -0
  77. package/dist/rule-engine.js.map +1 -1
  78. package/dist/types/reply.d.ts +6 -10
  79. package/dist/types/reply.d.ts.map +1 -1
  80. package/dist/types/request.d.ts +7 -11
  81. package/dist/types/request.d.ts.map +1 -1
  82. package/dist/types/rule.d.ts +3 -10
  83. package/dist/types/rule.d.ts.map +1 -1
  84. package/dist/types.d.ts +3 -1
  85. package/dist/types.d.ts.map +1 -1
  86. package/package.json +1 -1
  87. package/scorecard.png +0 -0
  88. package/src/cli.ts +6 -2
  89. package/src/formats/anthropic/index.ts +1 -1
  90. package/src/formats/anthropic/parse.ts +1 -1
  91. package/src/formats/anthropic/serialize.ts +9 -5
  92. package/src/formats/openai/index.ts +1 -1
  93. package/src/formats/openai/parse.ts +1 -1
  94. package/src/formats/openai/serialize.ts +15 -17
  95. package/src/formats/{parse-helpers.ts → request-helpers.ts} +2 -31
  96. package/src/formats/responses/index.ts +1 -1
  97. package/src/formats/responses/parse.ts +1 -1
  98. package/src/formats/responses/schema.ts +1 -3
  99. package/src/formats/responses/serialize.ts +9 -5
  100. package/src/formats/serialize-helpers.ts +30 -0
  101. package/src/formats/types.ts +3 -3
  102. package/src/loader.ts +7 -11
  103. package/src/logger.ts +19 -25
  104. package/src/mock-server.ts +10 -14
  105. package/src/route-handler.ts +1 -1
  106. package/src/rule-engine.ts +23 -1
  107. package/src/types/reply.ts +6 -10
  108. package/src/types/request.ts +7 -11
  109. package/src/types/rule.ts +3 -10
  110. package/src/types.ts +3 -5
  111. package/test/formats/anthropic.test.ts +4 -4
  112. package/test/formats/parse-helpers.test.ts +275 -0
  113. package/test/formats/responses.test.ts +4 -4
  114. package/test/helpers/make-req.ts +18 -0
  115. package/test/history.test.ts +348 -0
  116. package/test/loader.test.ts +11 -27
  117. package/test/logger.test.ts +294 -0
  118. package/test/mock-server.test.ts +1 -1
  119. package/test/rule-engine.test.ts +8 -22
  120. package/src/types/index.ts +0 -4
@@ -0,0 +1,308 @@
1
+ ---
2
+ name: desloppify
3
+ description: >
4
+ Codebase health scanner and technical debt tracker. Use when the user asks
5
+ about code quality, technical debt, dead code, large files, god classes,
6
+ duplicate functions, code smells, naming issues, import cycles, or coupling
7
+ problems. Also use when asked for a health score, what to fix next, or to
8
+ create a cleanup plan. Supports 29 languages.
9
+ allowed-tools: Bash(desloppify *)
10
+ ---
11
+
12
+ <!-- desloppify-begin -->
13
+ <!-- desloppify-skill-version: 5 -->
14
+
15
+ # Desloppify
16
+
17
+ ## 1. Your Job
18
+
19
+ Maximise the **strict score** honestly. Your main cycle: **scan → plan → execute → rescan**. Follow the scan output's **INSTRUCTIONS FOR AGENTS** — don't substitute your own analysis.
20
+
21
+ **Don't be lazy.** Do large refactors and small detailed fixes with equal energy. If it takes touching 20 files, touch 20 files. If it's a one-line change, make it. No task is too big or too small — fix things properly, not minimally.
22
+
23
+ ## 2. The Workflow
24
+
25
+ Three phases, repeated as a cycle.
26
+
27
+ ### Phase 1: Scan and review — understand the codebase
28
+
29
+ ```bash
30
+ desloppify scan --path . # analyse the codebase
31
+ desloppify status # check scores — are we at target?
32
+ ```
33
+
34
+ The scan will tell you if subjective dimensions need review. Follow its instructions. To trigger a review manually:
35
+ ```bash
36
+ desloppify review --prepare # then follow your runner's review workflow
37
+ ```
38
+
39
+ ### Phase 2: Plan — decide what to work on
40
+
41
+ After reviews, triage stages and plan creation appear in the execution queue surfaced by `next`. Complete them in order — `next` tells you what each stage expects in the `--report`:
42
+ ```bash
43
+ desloppify next # shows the next execution workflow step
44
+ desloppify plan triage --stage observe --report "themes and root causes..."
45
+ desloppify plan triage --stage reflect --report "comparison against completed work..."
46
+ desloppify plan triage --stage organize --report "summary of priorities..."
47
+ desloppify plan triage --complete --strategy "execution plan..."
48
+ ```
49
+
50
+ For automated triage: `desloppify plan triage --run-stages --runner codex` (Codex) or `--runner claude` (Claude). Options: `--only-stages`, `--dry-run`, `--stage-timeout-seconds`.
51
+
52
+ Then shape the queue. **The plan shapes everything `next` gives you** — `next` is the execution queue, not the full backlog. Don't skip this step.
53
+
54
+ ```bash
55
+ desloppify plan # see the living plan details
56
+ desloppify plan queue # compact execution queue view
57
+ desloppify plan reorder <pat> top # reorder — what unblocks the most?
58
+ desloppify plan cluster create <name> # group related issues to batch-fix
59
+ desloppify plan focus <cluster> # scope next to one cluster
60
+ desloppify plan skip <pat> # defer — hide from next
61
+ ```
62
+
63
+ ### Phase 3: Execute — grind the queue to completion
64
+
65
+ Trust the plan and execute. Don't rescan mid-queue — finish the queue first.
66
+
67
+ **Branch first.** Create a dedicated branch — never commit health work directly to main:
68
+ ```bash
69
+ git checkout -b desloppify/code-health # or desloppify/<focus-area>
70
+ desloppify config set commit_pr 42 # link a PR for auto-updated descriptions
71
+ ```
72
+
73
+ **The loop:**
74
+ ```bash
75
+ # 1. Get the next item from the execution queue
76
+ desloppify next
77
+
78
+ # 2. Fix the issue in code
79
+
80
+ # 3. Resolve it (next shows the exact command including required attestation)
81
+
82
+ # 4. When you have a logical batch, commit and record
83
+ git add <files> && git commit -m "desloppify: fix 3 deferred_import findings"
84
+ desloppify plan commit-log record # moves findings uncommitted → committed, updates PR
85
+
86
+ # 5. Push periodically
87
+ git push -u origin desloppify/code-health
88
+
89
+ # 6. Repeat until the queue is empty
90
+ ```
91
+
92
+ Score may temporarily drop after fixes — cascade effects are normal, keep going.
93
+ If `next` suggests an auto-fixer, run `desloppify autofix <fixer> --dry-run` to preview, then apply.
94
+
95
+ **When the queue is clear, go back to Phase 1.** New issues will surface, cascades will have resolved, priorities will have shifted. This is the cycle.
96
+
97
+ ## 3. Reference
98
+
99
+ ### Key concepts
100
+
101
+ - **Tiers**: T1 auto-fix → T2 quick manual → T3 judgment call → T4 major refactor.
102
+ - **Auto-clusters**: related findings are auto-grouped in `next`. Drill in with `next --cluster <name>`.
103
+ - **Zones**: production/script (scored), test/config/generated/vendor (not scored). Fix with `zone set`.
104
+ - **Wontfix cost**: widens the lenient↔strict gap. Challenge past decisions when the gap grows.
105
+
106
+ ### Scoring
107
+
108
+ Overall score = **25% mechanical** + **75% subjective**.
109
+
110
+ - **Mechanical (25%)**: auto-detected issues — duplication, dead code, smells, unused imports, security. Fixed by changing code and rescanning.
111
+ - **Subjective (75%)**: design quality review — naming, error handling, abstractions, clarity. Starts at **0%** until reviewed. The scan will prompt you when a review is needed.
112
+ - **Strict score** is the north star: wontfix items count as open. The gap between overall and strict is your wontfix debt.
113
+ - **Score types**: overall (lenient), strict (wontfix counts), objective (mechanical only), verified (confirmed fixes only).
114
+
115
+ ### Reviews
116
+
117
+ Four paths to get subjective scores:
118
+
119
+ - **Local runner (Codex)**: `desloppify review --run-batches --runner codex --parallel --scan-after-import` — automated end-to-end.
120
+ - **Local runner (Claude)**: `desloppify review --prepare` → launch parallel subagents → `desloppify review --import merged.json` — see skill doc overlay for details.
121
+ - **Cloud/external**: `desloppify review --external-start --external-runner claude` → follow session template → `--external-submit`.
122
+ - **Manual path**: `desloppify review --prepare` → review per dimension → `desloppify review --import file.json`.
123
+
124
+ - Import first, fix after — import creates tracked state entries for correlation.
125
+ - Target-matching scores trigger auto-reset to prevent gaming. Use the blind-review workflow described in your agent overlay doc (e.g. `docs/CLAUDE.md`, `docs/HERMES.md`).
126
+ - Even moderate scores (60-80) dramatically improve overall health.
127
+ - Stale dimensions auto-surface in `next` — just follow the queue.
128
+
129
+ **Integrity rules:** Score from evidence only — no prior chat context, score history, or target-threshold anchoring. When evidence is mixed, score lower and explain uncertainty. Assess every requested dimension; never drop one.
130
+
131
+ #### Review output format
132
+
133
+ Return machine-readable JSON for review imports. For `--external-submit`, include `session` from the generated template:
134
+
135
+ ```json
136
+ {
137
+ "session": {
138
+ "id": "<session_id_from_template>",
139
+ "token": "<session_token_from_template>"
140
+ },
141
+ "assessments": {
142
+ "<dimension_from_query>": 0
143
+ },
144
+ "findings": [
145
+ {
146
+ "dimension": "<dimension_from_query>",
147
+ "identifier": "short_id",
148
+ "summary": "one-line defect summary",
149
+ "related_files": ["relative/path/to/file.py"],
150
+ "evidence": ["specific code observation"],
151
+ "suggestion": "concrete fix recommendation",
152
+ "confidence": "high|medium|low"
153
+ }
154
+ ]
155
+ }
156
+ ```
157
+
158
+ `findings` MUST match `query.system_prompt` exactly (including `related_files`, `evidence`, and `suggestion`). Use `"findings": []` when no defects found. Import is fail-closed: invalid findings abort unless `--allow-partial` is passed. Assessment scores are auto-applied from trusted internal or cloud session imports. Legacy `--attested-external` remains supported.
159
+
160
+ #### Import paths
161
+
162
+ - Robust session flow (recommended): `desloppify review --external-start --external-runner claude` → use generated prompt/template → run printed `--external-submit` command.
163
+ - Durable scored import (legacy): `desloppify review --import findings.json --attested-external --attest "I validated this review was completed without awareness of overall score and is unbiased."`
164
+ - Findings-only fallback: `desloppify review --import findings.json`
165
+
166
+ #### Reviewer agent prompt
167
+
168
+ Runners that support agent definitions (Cursor, Copilot, Gemini) can create a dedicated reviewer agent. Use this system prompt:
169
+
170
+ ```
171
+ You are a code quality reviewer. You will be given a codebase path, a set of
172
+ dimensions to score, and what each dimension means. Read the code, score each
173
+ dimension 0-100 from evidence only, and return JSON in the required format.
174
+ Do not anchor to target thresholds. When evidence is mixed, score lower and
175
+ explain uncertainty.
176
+ ```
177
+
178
+ See your editor's overlay section below for the agent config format.
179
+
180
+ ### Plan commands
181
+
182
+ ```bash
183
+ desloppify plan reorder <cluster> top # move all cluster members at once
184
+ desloppify plan reorder <a> <b> top # mix clusters + findings in one reorder
185
+ desloppify plan reorder <pat> before -t X # position relative to another item/cluster
186
+ desloppify plan cluster reorder a,b top # reorder multiple clusters as one block
187
+ desloppify plan resolve <pat> # mark complete
188
+ desloppify plan reopen <pat> # reopen
189
+ desloppify backlog # broader non-execution backlog
190
+ ```
191
+
192
+ ### Commit tracking
193
+
194
+ ```bash
195
+ desloppify plan commit-log # see uncommitted + committed status
196
+ desloppify plan commit-log record # record HEAD commit, update PR description
197
+ desloppify plan commit-log record --note "why" # with rationale
198
+ desloppify plan commit-log record --only "smells::*" # record specific findings only
199
+ desloppify plan commit-log history # show commit records
200
+ desloppify plan commit-log pr # preview PR body markdown
201
+ desloppify config set commit_tracking_enabled false # disable guidance
202
+ ```
203
+
204
+ After resolving findings as `fixed`, the tool shows uncommitted work, committed history, and a suggested commit message. After committing externally, run `record` to move findings from uncommitted to committed and auto-update the linked PR description.
205
+
206
+ ### Quick reference
207
+
208
+ ```bash
209
+ desloppify next --count 5 # top 5 execution items
210
+ desloppify next --cluster <name> # drill into a cluster
211
+ desloppify backlog --count 5 # top 5 backlog items outside execution
212
+ desloppify show <pattern> # filter by file/detector/ID
213
+ desloppify show --status open # all open findings
214
+ desloppify plan skip --permanent "<id>" --note "reason" --attest "..." # accept debt
215
+ desloppify exclude <path> # exclude a directory from scanning
216
+ desloppify config show # show all config including excludes
217
+ desloppify scan --path . --reset-subjective # reset subjective baseline to 0
218
+ ```
219
+
220
+ ## 4. Fix Tool Issues Upstream
221
+
222
+ When desloppify itself appears wrong or inconsistent — a bug, a bad detection, a crash, confusing output — **fix it and open a PR**. If you can't confidently fix it, file an issue instead.
223
+
224
+ ### Fix and PR (preferred)
225
+
226
+ Clone the tool repo to a temp directory, make the fix there, and verify it works against the project you're scanning before pushing.
227
+
228
+ ```bash
229
+ git clone https://github.com/peteromallet/desloppify.git /tmp/desloppify-fix
230
+ cd /tmp/desloppify-fix
231
+ git checkout -b fix/<short-description>
232
+ ```
233
+
234
+ Make your changes, then run the test suite and verify the fix against the original project:
235
+
236
+ ```bash
237
+ python -m pytest desloppify/tests/ -q
238
+ python -m desloppify scan --path <project-root> # the project you were scanning
239
+ ```
240
+
241
+ Once it looks good, push and open a PR:
242
+
243
+ ```bash
244
+ git add <files> && git commit -m "fix: <what and why>"
245
+ git push -u origin fix/<short-description>
246
+ gh pr create --title "fix: <short description>" --body "$(cat <<'EOF'
247
+ ## Problem
248
+ <what went wrong — include the command and output>
249
+
250
+ ## Fix
251
+ <what you changed and why>
252
+ EOF
253
+ )"
254
+ ```
255
+
256
+ Clean up after: `rm -rf /tmp/desloppify-fix`
257
+
258
+ ### File an issue (fallback)
259
+
260
+ If the fix is unclear or the change needs discussion, open an issue at `https://github.com/peteromallet/desloppify/issues` with a minimal repro: command, path, expected output, actual output.
261
+
262
+ ## Prerequisite
263
+
264
+ `command -v desloppify >/dev/null 2>&1 && echo "desloppify: installed" || echo "NOT INSTALLED — run: pip install --upgrade git+https://github.com/peteromallet/desloppify.git"`
265
+
266
+ <!-- desloppify-end -->
267
+
268
+ ## Claude Code Overlay
269
+
270
+ Use Claude subagents for subjective scoring work. **Do not use `--runner codex`** — use Claude subagents exclusively.
271
+
272
+ ### Review workflow
273
+
274
+ Run `desloppify review --prepare` first to generate review data, then use Claude subagents:
275
+
276
+ 1. **Prepare**: `desloppify review --prepare` — writes `query.json` and `.desloppify/review_packet_blind.json`.
277
+ 2. **Launch subagents**: Split the review across N parallel Claude subagents (one message, multiple Task calls). Each agent reviews a subset of dimensions.
278
+ 3. **Merge & import**: Merge agent outputs, then `desloppify review --import merged.json --manual-override --attest "Claude subagents ran blind reviews against review_packet_blind.json" --scan-after-import`.
279
+
280
+ #### How to split dimensions across subagents
281
+
282
+ - Read `dimension_prompts` from `query.json` for dimensions with definitions and seed files.
283
+ - Read `.desloppify/review_packet_blind.json` for the blind packet (no score targets, no anchoring data).
284
+ - Group dimensions into 3-4 batches by theme (e.g., architecture, code quality, testing, conventions).
285
+ - Launch one Task agent per batch with `subagent_type: "general-purpose"`. Each agent gets:
286
+ - The codebase path and list of dimensions to score
287
+ - The blind packet path to read
288
+ - Instruction to score from code evidence only, not from targets
289
+ - Each agent writes output to a separate file. Merge assessments (average overlapping dimension scores) and concatenate findings.
290
+
291
+ ### Subagent rules
292
+
293
+ 1. Each agent must be context-isolated — do not pass conversation history or score targets.
294
+ 2. Agents must consume `.desloppify/review_packet_blind.json` (not full `query.json`) to avoid score anchoring.
295
+
296
+ ### Triage workflow
297
+
298
+ Orchestrate triage with per-stage subagents:
299
+ 1. `desloppify plan triage --run-stages --runner claude` — prints orchestrator instructions
300
+ 2. For each stage (observe → reflect → organize → enrich):
301
+ - Get prompt: `desloppify plan triage --stage-prompt <stage>`
302
+ - Launch a subagent with that prompt
303
+ - Verify: `desloppify plan triage` (check dashboard)
304
+ - Confirm: `desloppify plan triage --confirm <stage> --attestation "..."`
305
+ 3. Complete: `desloppify plan triage --complete --strategy "..." --attestation "..."`
306
+
307
+ <!-- desloppify-overlay: claude -->
308
+ <!-- desloppify-end -->
@@ -0,0 +1,242 @@
1
+ {
2
+ "assessments": {
3
+ "cross_module_architecture": 92.0,
4
+ "convention_outlier": 88.5,
5
+ "error_consistency": 84.0,
6
+ "abstraction_fitness": 93.0,
7
+ "api_surface_coherence": 86.0,
8
+ "authorization_consistency": 100.0,
9
+ "ai_generated_debt": 82.0,
10
+ "incomplete_migration": 97.0,
11
+ "package_organization": 93.0,
12
+ "high_level_elegance": 93.0,
13
+ "mid_level_elegance": 90.0,
14
+ "low_level_elegance": 87.0,
15
+ "design_coherence": 84.0
16
+ },
17
+ "findings": [
18
+ {
19
+ "dimension": "cross_module_architecture",
20
+ "identifier": "types_barrel_reexport_indirection",
21
+ "summary": "src/types.ts is a pure re-export barrel with 19 importers, adding an unnecessary indirection hop",
22
+ "related_files": [
23
+ "src/types.ts",
24
+ "src/types/request.ts",
25
+ "src/types/reply.ts",
26
+ "src/types/rule.ts"
27
+ ],
28
+ "evidence": [
29
+ "src/types.ts has 19 importers (highest in the codebase) and consists of exactly 3 re-export lines with zero logic or value-add",
30
+ "Both src/ modules and src/formats/ modules import from src/types.ts rather than from the actual definition files in src/types/",
31
+ "This creates a hub module that any type change routes through, inflating apparent coupling"
32
+ ],
33
+ "suggestion": "This is borderline since barrel exports are common in TypeScript libraries, but the dual-layer (src/types.ts re-exporting from src/types/*.ts, plus src/index.ts re-exporting again) adds navigational friction. Consider having internal modules import directly from src/types/request.ts, src/types/reply.ts, src/types/rule.ts and reserving src/types.ts only for the public API surface.",
34
+ "confidence": "low"
35
+ },
36
+ {
37
+ "dimension": "convention_outlier",
38
+ "identifier": "makeReq_helper_inconsistent_fields",
39
+ "summary": "Test helper makeReq() has inconsistent field sets across test files -- some include headers/path, others do not",
40
+ "related_files": [
41
+ "test/rule-engine.test.ts",
42
+ "test/history.test.ts",
43
+ "test/loader.test.ts"
44
+ ],
45
+ "evidence": [
46
+ "test/rule-engine.test.ts makeReq() omits headers and path fields entirely",
47
+ "test/history.test.ts and test/loader.test.ts makeReq() includes headers: {} and path: '/v1/chat/completions'",
48
+ "This means rule-engine tests create MockRequest objects that are structurally incomplete versus the interface defined in src/types/request.ts which has required headers and path fields"
49
+ ],
50
+ "suggestion": "Extract a shared test factory into a test/helpers/ module (e.g., test/helpers/make-req.ts) that always provides all required MockRequest fields. Import it in all test files to ensure consistency and prevent type drift.",
51
+ "confidence": "medium"
52
+ },
53
+ {
54
+ "dimension": "error_consistency",
55
+ "identifier": "resolver_error_swallowed_silently",
56
+ "summary": "When a resolver function throws, the error is logged but the server silently returns the fallback reply with 200 OK",
57
+ "related_files": [
58
+ "src/route-handler.ts"
59
+ ],
60
+ "evidence": [
61
+ "In resolveReply() at lines 34-37: catch (err) logs the error then returns the fallback reply with ruleDesc still set, giving no signal to the caller that something went wrong",
62
+ "The matched rule's remaining count was already decremented (line 95 in rule-engine.ts) before the resolver was called, so the rule is consumed even on failure",
63
+ "This means a resolver that intermittently throws will consume its match count, return a fallback, and give the test client no indication that the intended reply was not produced"
64
+ ],
65
+ "suggestion": "Consider making resolver errors more visible: either (a) propagate the error as an error reply (e.g., 500 status) so callers can detect it, or (b) at minimum, record the error in the history entry so test assertions can detect resolver failures. The current silent fallback makes debugging flaky test setups difficult.",
66
+ "confidence": "medium"
67
+ },
68
+ {
69
+ "dimension": "error_consistency",
70
+ "identifier": "cli_validators_mixed_sync_async",
71
+ "summary": "parseHost is async while all sibling validators (parsePort, parseLogLevel, parseChunkSize, parseLatency) are synchronous",
72
+ "related_files": [
73
+ "src/cli-validators.ts"
74
+ ],
75
+ "evidence": [
76
+ "parseHost performs DNS lookup via await lookup(value) making it the only async function among 5 sibling validators",
77
+ "The caller in cli.ts must use await for parseHost but not for the others, creating an asymmetric call pattern",
78
+ "The api_surface holistic_context explicitly flags src/cli-validators.ts as having a sync_async_mix"
79
+ ],
80
+ "suggestion": "This is inherent to the DNS lookup requirement, but document it explicitly with a JSDoc comment explaining why parseHost is async while siblings are sync. Alternatively, consider making all validators async for a uniform API, or doing the DNS check at server.start() time rather than at parse time.",
81
+ "confidence": "low"
82
+ },
83
+ {
84
+ "dimension": "ai_generated_debt",
85
+ "identifier": "type_files_high_comment_ratio",
86
+ "summary": "Type definition files have 35-38% comment ratio vs 5% codebase average -- JSDoc comments restate obvious field names",
87
+ "related_files": [
88
+ "src/types/reply.ts",
89
+ "src/types/request.ts",
90
+ "src/types/rule.ts"
91
+ ],
92
+ "evidence": [
93
+ "src/types/reply.ts: '/** Text content to send back. */' on a field named 'text' (line 9)",
94
+ "src/types/reply.ts: '/** Tool calls the model wants to make. */' on a field named 'tools' (line 13)",
95
+ "src/types/request.ts: '/** The last user message text. This is what most matchers check. */' -- first sentence restates the name, second adds genuine value (line 14)",
96
+ "src/types/rule.ts: '/** Human-readable description of what the rule matches. */' on a field named 'description' (line 63)",
97
+ "Codebase average comment ratio is 0.05 (5%), these files are at 0.35-0.38 (35-38%)"
98
+ ],
99
+ "suggestion": "Keep comments that add non-obvious context (like 'This is what most matchers check' or 'Falls back to { input: 10, output: 5 } if omitted') but remove comments that merely restate the type+name (e.g., '/** Text content to send back. */' on a field called text?: string). A good rule: if deleting the comment loses zero information beyond what the name+type already convey, delete it.",
100
+ "confidence": "medium"
101
+ },
102
+ {
103
+ "dimension": "ai_generated_debt",
104
+ "identifier": "logger_high_log_density",
105
+ "summary": "Logger class has a log density of 4.0 -- every method is a formatting wrapper around console with identical structure",
106
+ "related_files": [
107
+ "src/logger.ts"
108
+ ],
109
+ "evidence": [
110
+ "All four methods (error, warn, info, debug) follow an identical pattern: check threshold, destructure style, format with template literal, call console",
111
+ "The only variation between methods is which LEVEL_STYLE entry and which console method is used",
112
+ "This repetitive structure is a hallmark of AI-generated code where each method is written independently rather than factored"
113
+ ],
114
+ "suggestion": "Extract a private _log(level, consoleFn, msg, args) method that contains the shared threshold-check and formatting logic. Each public method becomes a one-liner delegating to _log with the appropriate level key and console method. This reduces the four near-identical 5-line methods to four 1-line methods plus one shared implementation.",
115
+ "confidence": "medium"
116
+ },
117
+ {
118
+ "dimension": "design_coherence",
119
+ "identifier": "openai_serialize_usage_duplication",
120
+ "summary": "Usage object construction is duplicated verbatim between serialize() and serializeComplete() in all three format serializers",
121
+ "related_files": [
122
+ "src/formats/openai/serialize.ts",
123
+ "src/formats/anthropic/serialize.ts",
124
+ "src/formats/responses/serialize.ts"
125
+ ],
126
+ "evidence": [
127
+ "OpenAI serialize.ts lines 41-47 and lines 80-86 construct identical prompt_tokens_details and completion_tokens_details objects",
128
+ "Anthropic serialize.ts lines 55 and 85 both construct { input_tokens: usage.input, output_tokens: usage.output }",
129
+ "Responses serialize.ts lines 98 and 123 both construct { input_tokens: usage.input, output_tokens: usage.output, total_tokens: usage.input + usage.output }",
130
+ "Each format repeats its usage construction logic in both the streaming and non-streaming serialization paths"
131
+ ],
132
+ "suggestion": "Extract format-specific usage builders into small helpers within each serializer file. For example in openai/serialize.ts, add a buildUsage(usage) function that returns the full usage object with details, then call it from both serialize() and serializeComplete(). This eliminates the duplicated object literals.",
133
+ "confidence": "high"
134
+ },
135
+ {
136
+ "dimension": "design_coherence",
137
+ "identifier": "replySequence_duplicated_in_loader_and_server",
138
+ "summary": "Sequence replay logic (index tracking, last-entry fallback, options mutation) is duplicated between MockServer.when().replySequence() and loader.ts addSequenceRule()",
139
+ "related_files": [
140
+ "src/mock-server.ts",
141
+ "src/loader.ts"
142
+ ],
143
+ "evidence": [
144
+ "mock-server.ts lines 107-121: replySequence creates a closure with index++, entries[index] ?? last, and mutates rule.options",
145
+ "loader.ts lines 102-129: addSequenceRule creates a nearly identical closure with index++, resolved[index] ?? lastStep, and mutates rule.options",
146
+ "Both implementations also set rule.remaining = entries.length"
147
+ ],
148
+ "suggestion": "Extract a createSequenceResolver(entries) function into a shared location (e.g., rule-engine.ts or a new sequence-helpers.ts) that returns { resolver, entryCount }. Both mock-server.ts replySequence() and loader.ts addSequenceRule() can call this shared function, eliminating the duplicated closure logic.",
149
+ "confidence": "high"
150
+ },
151
+ {
152
+ "dimension": "low_level_elegance",
153
+ "identifier": "cli_start_function_mixed_concerns",
154
+ "summary": "The cli.ts start() function mixes server setup, banner printing, file watching, and signal handling in a single 80-line function",
155
+ "related_files": [
156
+ "src/cli.ts"
157
+ ],
158
+ "evidence": [
159
+ "Lines 24-104: start() handles option parsing, server construction, rule/handler loading, fallback setup, banner printing, fs.watch setup with debouncing, and SIGINT/SIGTERM handlers",
160
+ "The file watch setup (lines 70-89) is nested inside start() with its own state (reloading flag, setTimeout)",
161
+ "Signal handlers (lines 91-103) are also wired inline"
162
+ ],
163
+ "suggestion": "Extract the file-watch setup into a watchRulesPath(server, rulesPath, fallback, logger) function, and the signal handling into a setupShutdown(server, logger) function. The start() function should read as a linear composition of setup steps rather than containing all the implementation details inline.",
164
+ "confidence": "medium"
165
+ },
166
+ {
167
+ "dimension": "low_level_elegance",
168
+ "identifier": "responses_serialize_deeply_nested_object_literals",
169
+ "summary": "responses/serialize.ts builds deeply nested inline object literals making the streaming event structure hard to read",
170
+ "related_files": [
171
+ "src/formats/responses/serialize.ts"
172
+ ],
173
+ "evidence": [
174
+ "Line 42-43: textStreamBlock constructs a 3-level deep inline object: { type: 'response.output_item.added', output_index: i, item: { type: 'message', id: itemId, status: 'in_progress', role: 'assistant', content: [] } }",
175
+ "Line 62: toolStreamBlock spreads an outputItem into a modified copy inline: { ...outputItem, status: 'in_progress', arguments: '' }",
176
+ "These long inline constructions make it difficult to scan the event structure at a glance"
177
+ ],
178
+ "suggestion": "Extract the repeated item shapes into named local variables or small builder functions. For example, const inProgressItem = { ...outputItem, status: 'in_progress', arguments: '' } on a separate line before passing it to c(). This improves scanability without adding abstraction overhead.",
179
+ "confidence": "low"
180
+ },
181
+ {
182
+ "dimension": "api_surface_coherence",
183
+ "identifier": "format_serialize_return_type_unknown",
184
+ "summary": "serializeComplete and serializeError return 'unknown' in the Format interface, losing type information at the boundary",
185
+ "related_files": [
186
+ "src/formats/types.ts",
187
+ "src/formats/openai/serialize.ts",
188
+ "src/formats/anthropic/serialize.ts",
189
+ "src/formats/responses/serialize.ts"
190
+ ],
191
+ "evidence": [
192
+ "Format interface line 15: serializeComplete returns 'unknown'",
193
+ "Format interface line 16: serializeError returns 'unknown'",
194
+ "Each concrete implementation also returns 'unknown' rather than a typed object",
195
+ "Callers in route-handler.ts pass the result directly to reply.send() without any type checking"
196
+ ],
197
+ "suggestion": "Since these are JSON response bodies sent to the client, at minimum type them as Record<string, unknown> or JsonValue. Ideally, use a generic Format<TComplete, TError> or define a union type covering the three format response shapes. This prevents accidental primitive returns and improves IDE support for callers.",
198
+ "confidence": "medium"
199
+ },
200
+ {
201
+ "dimension": "mid_level_elegance",
202
+ "identifier": "route_handler_history_records_before_streaming",
203
+ "summary": "History records the request before the streaming response is written, so history.count() may increment before the client has received any data",
204
+ "related_files": [
205
+ "src/route-handler.ts"
206
+ ],
207
+ "evidence": [
208
+ "Line 89: history.record(mockReq, ruleDesc) is called unconditionally before the streaming/non-streaming branch",
209
+ "Line 85: For error replies, history.record is called before returning the error response",
210
+ "If writeSSE (line 111) fails mid-stream, the history already recorded the request as handled"
211
+ ],
212
+ "suggestion": "This is a minor timing issue but worth acknowledging. Consider recording the history entry after the response is fully written (after writeSSE completes or after reply.send() returns). Alternatively, add a 'status' field to RecordedRequest indicating whether the response was successfully delivered.",
213
+ "confidence": "low"
214
+ },
215
+ {
216
+ "dimension": "convention_outlier",
217
+ "identifier": "parse_helpers_mixed_responsibility",
218
+ "summary": "parse-helpers.ts mixes request-building utilities (buildMockRequest) with serialization utilities (splitText, genId, toolId, finishReason, shouldEmitText)",
219
+ "related_files": [
220
+ "src/formats/parse-helpers.ts"
221
+ ],
222
+ "evidence": [
223
+ "buildMockRequest (line 53) is used only by parse.ts files for constructing MockRequest objects from incoming requests",
224
+ "splitText, genId, toolId, shouldEmitText, finishReason are used only by serialize.ts files for constructing outgoing responses",
225
+ "isStreaming (line 37) is used by both parsing (index.ts) and is a third concern (request detection)",
226
+ "The file has 11 importers across very different use contexts"
227
+ ],
228
+ "suggestion": "Split parse-helpers.ts into two files: serialize-helpers.ts (for splitText, genId, toolId, shouldEmitText, finishReason, DEFAULT_USAGE, MS_PER_SECOND) and request-helpers.ts (for buildMockRequest, isStreaming, RequestMeta). This aligns each file with a single direction of data flow.",
229
+ "confidence": "medium"
230
+ }
231
+ ],
232
+ "provenance": {
233
+ "kind": "blind_review_batch_import",
234
+ "blind": true,
235
+ "runner": "claude",
236
+ "run_stamp": "ext_20260315_000339_a6cdc3e6",
237
+ "created_at": "2026-03-15T00:08:01+00:00",
238
+ "packet_path": "/Users/suyash.x.srijan/Documents/Personal_Projects/llm-mock-server/.desloppify/review_packet_blind.json",
239
+ "packet_sha256": "61563983650626757ab21c4b1e8ea4db0d723c4ce1fde659f2bbff0a59e56044",
240
+ "external_session_id": "ext_20260315_000339_a6cdc3e6"
241
+ }
242
+ }