npm - llm-mock-server - Versions diffs - 1.0.2 → 1.0.4 - Mend

llm-mock-server 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/canonical_import_20260315_050000.json +286 -0
package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/canonical_import_20260315_050028.json +303 -0
package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/claude_launch_prompt.md +17 -0
package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/review_result.json +297 -0
package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/review_result.template.json +22 -0
package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/reviewer_instructions.md +20 -0
package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/session.json +20 -0
package/.desloppify/query.json +31 -103
package/.desloppify/review_packet_blind.json +134 -188
package/.desloppify/review_packets/holistic_packet_20260315_045546.json +1480 -0
package/.desloppify/state-typescript.json +2285 -846
package/.desloppify/state-typescript.json.bak +2252 -840
package/.editorconfig +12 -0
package/.github/workflows/test.yml +3 -0
package/.oxfmtrc.json +9 -0
package/README.md +5 -0
package/package.json +5 -2
package/scorecard.png +0 -0
package/src/cli-validators.ts +12 -4
package/src/cli.ts +25 -11
package/src/formats/anthropic/parse.ts +24 -5
package/src/formats/anthropic/schema.ts +16 -8
package/src/formats/anthropic/serialize.ts +112 -28
package/src/formats/openai/parse.ts +12 -2
package/src/formats/openai/schema.ts +43 -30
package/src/formats/openai/serialize.ts +73 -17
package/src/formats/request-helpers.ts +2 -1
package/src/formats/responses/parse.ts +17 -3
package/src/formats/responses/schema.ts +34 -20
package/src/formats/responses/serialize.ts +235 -40
package/src/formats/serialize-helpers.ts +10 -2
package/src/formats/types.ts +16 -3
package/src/index.ts +3 -1
package/src/loader.ts +48 -12
package/src/logger.ts +25 -7
package/src/mock-server.ts +28 -7
package/src/route-handler.ts +49 -14
package/src/rule-engine.ts +43 -12
package/src/types/reply.ts +6 -2
package/src/types.ts +24 -3
package/test/cli-validators.test.ts +16 -4
package/test/formats/anthropic.test.ts +95 -19
package/test/formats/openai.test.ts +85 -24
package/test/formats/parse-helpers.test.ts +47 -7
package/test/formats/responses.test.ts +111 -30
package/test/history.test.ts +18 -5
package/test/loader.test.ts +52 -17
package/test/logger.test.ts +59 -9
package/test/mock-server.test.ts +76 -22
package/test/rule-engine.test.ts +49 -19
/package/{ARCHITECTURE.md → docs/ARCHITECTURE.md} +0 -0

package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/canonical_import_20260315_050000.json ADDED Viewed

@@ -0,0 +1,286 @@
+{
+  "assessments": {
+    "cross_module_architecture": 93.0,
+    "convention_outlier": 90.0,
+    "error_consistency": 82.0,
+    "abstraction_fitness": 91.0,
+    "api_surface_coherence": 85.0,
+    "authorization_consistency": 100.0,
+    "ai_generated_debt": 88.0,
+    "incomplete_migration": 95.0,
+    "package_organization": 94.0,
+    "high_level_elegance": 92.0,
+    "mid_level_elegance": 88.0,
+    "low_level_elegance": 86.0,
+    "design_coherence": 87.0
+  },
+  "findings": [
+    {
+      "dimension": "error_consistency",
+      "identifier": "resolver_error_swallowed_silently",
+      "summary": "Resolver errors are logged then silently replaced with fallback, losing error context for callers",
+      "related_files": [
+        "src/route-handler.ts"
+      ],
+      "evidence": [
+        "In resolveReply() (lines 36-46), when matched.resolve throws, the error is caught, logged, and the fallback reply is returned. The caller has no way to distinguish a successful fallback from a resolver failure. History records the rule description but not the error state, making debugging difficult in tests."
+      ],
+      "suggestion": "Record the error state in the history entry (e.g. add an `error` field to RecordedRequest), or at minimum set a distinct ruleDesc like `${matched.description} (error)` so that test assertions can detect resolver failures vs. normal fallback usage.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "error_consistency",
+      "identifier": "history_recorded_before_streaming_completes",
+      "summary": "Error reply records history before send, but streaming path also records before writeSSE may fail",
+      "related_files": [
+        "src/route-handler.ts"
+      ],
+      "evidence": [
+        "Line 109 and 116: history.record() is called before the response is fully sent. For error replies (line 109) this is fine, but for the streaming path (line 116), if writeSSE throws (e.g. client disconnect), the request is recorded as successfully handled. The recording point is inconsistent between error and success paths -- error records at line 109 then returns, normal records at 116 then may stream or return JSON."
+      ],
+      "suggestion": "Move history.record() after the response is fully sent (after writeSSE completes for streaming, after reply.send for JSON), or add a status/success field to the recorded entry. This ensures history reflects actual outcome.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "error_consistency",
+      "identifier": "loader_silent_skip_on_unknown_extension",
+      "summary": "loadRulesFromPath silently skips files with unrecognized extensions instead of warning",
+      "related_files": [
+        "src/loader.ts"
+      ],
+      "evidence": [
+        "Lines 222-225: When a file path is given with an extension not in loaderByExtension (e.g. '.yaml', '.txt'), the function silently returns without loading anything or signaling an issue. The directory-loading path (line 234) also calls loadRulesFromPath recursively, so stray files in a rules directory are silently ignored."
+      ],
+      "suggestion": "When loading a single file (info.isFile()) with an unsupported extension, either throw an error ('Unsupported file extension...') or accept a Logger parameter and log a warning. Silent skipping is surprising when a user explicitly passes a file path.",
+      "confidence": "high"
+    },
+    {
+      "dimension": "convention_outlier",
+      "identifier": "buildUsage_duplicated_across_serializers",
+      "summary": "buildUsage helper is independently defined in all three format serializers with slightly different shapes",
+      "related_files": [
+        "src/formats/openai/serialize.ts",
+        "src/formats/anthropic/serialize.ts",
+        "src/formats/responses/serialize.ts"
+      ],
+      "evidence": [
+        "OpenAI buildUsage (line 12) returns {prompt_tokens, completion_tokens, total_tokens, ..._details}. Anthropic buildUsage (line 12) returns {input_tokens, output_tokens}. Responses buildUsage (line 12) returns {input_tokens, output_tokens, total_tokens}. Each file defines its own private buildUsage function with the same name, same input signature ({input: number, output: number}), but different output shapes. This is a sibling behavioral inconsistency -- three siblings all do the same conceptual transformation but aren't coordinated."
+      ],
+      "suggestion": "This is intentional variation (different API formats require different shapes), so no structural change is needed, but consider renaming the functions to be more specific (e.g. buildOpenAIUsage, buildAnthropicUsage) or adding a brief comment noting the format-specific shape is deliberate. The identical naming across three files creates a false sense of fungibility.",
+      "confidence": "low"
+    },
+    {
+      "dimension": "api_surface_coherence",
+      "identifier": "cli_validators_mixed_sync_async",
+      "summary": "parseHost is async while all sibling validators (parsePort, parseLogLevel, etc.) are sync",
+      "related_files": [
+        "src/cli-validators.ts"
+      ],
+      "evidence": [
+        "parsePort (line 13), parseLogLevel (line 21), parseChunkSize (line 49), parseLatency (line 59) are all synchronous functions. parseHost (line 30) is async because it calls dns.lookup. This forces the caller (cli.ts line 38) to await parseHost while the others are called synchronously. The mixed sync/async surface in a cohesive set of validation functions is surprising."
+      ],
+      "suggestion": "Consider making parseHost synchronous by using isIP() for IP addresses and a regex for valid hostname format, deferring actual resolution to the server's listen() call. If DNS validation is essential, document in parseHost's JSDoc that it is async unlike its siblings, and consider grouping it separately.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "api_surface_coherence",
+      "identifier": "isStreaming_default_true_surprising",
+      "summary": "isStreaming defaults to true for any non-object input including null/undefined, which is a surprising API contract",
+      "related_files": [
+        "src/formats/request-helpers.ts"
+      ],
+      "evidence": [
+        "isStreaming (line 9-11) returns `asRecord(body)['stream'] !== false`, meaning any body that isn't an object with `stream: false` is treated as streaming. Passing null, undefined, a string, or a number all return true. This is an implicit opt-out contract rather than opt-in, which can silently produce streaming responses when the caller expected JSON."
+      ],
+      "suggestion": "Consider making streaming explicit: return true only when stream is explicitly true or when it's a valid object without a stream field. At minimum, add a JSDoc comment explaining the opt-out default. The current behavior is consistent with some LLM API defaults but could be surprising to mock server users.",
+      "confidence": "low"
+    },
+    {
+      "dimension": "low_level_elegance",
+      "identifier": "genId_collision_risk",
+      "summary": "genId uses Date.now() in base36, creating collision risk for IDs generated in the same millisecond",
+      "related_files": [
+        "src/formats/serialize-helpers.ts"
+      ],
+      "evidence": [
+        "genId (line 16-18) generates IDs as `${prefix}_${Date.now().toString(36)}`. Two calls within the same millisecond produce identical IDs. This affects toolId (line 20-26) as well -- multiple tools in the same reply will get IDs that differ only by the index suffix, but if toolId is called without an index (or with 0 for multiple tools), IDs collide. In serializeComplete for Anthropic (line 152), all tools use index 0: `toolId(tool, 'toolu', 0)` causing collisions when multiple tools are present."
+      ],
+      "suggestion": "Add a monotonic counter or random suffix to genId: `${prefix}_${Date.now().toString(36)}_${(counter++).toString(36)}`. Alternatively, use crypto.randomUUID() or nanoid (already a transitive dependency). Also fix the Anthropic serializeComplete to pass the actual tool index instead of hardcoded 0.",
+      "confidence": "high"
+    },
+    {
+      "dimension": "low_level_elegance",
+      "identifier": "anthropic_serializeComplete_tool_index_hardcoded",
+      "summary": "Anthropic serializeComplete passes hardcoded index 0 for all tool IDs, causing ID collisions",
+      "related_files": [
+        "src/formats/anthropic/serialize.ts"
+      ],
+      "evidence": [
+        "Line 152: `(reply.tools ?? []).map((tool) => ({ ... id: toolId(tool, 'toolu', 0), ... }))` -- every tool in the array gets index 0 passed to toolId. When multiple tools are present and none have explicit IDs, they all get the same generated ID (same prefix, same timestamp, same index). The streaming path (toolBlocks, line 65) correctly uses `startIndex + i` for each tool."
+      ],
+      "suggestion": "Change line 152 to use the map index: `(reply.tools ?? []).map((tool, i) => ({ ... id: toolId(tool, 'toolu', i), ... }))`",
+      "confidence": "high"
+    },
+    {
+      "dimension": "low_level_elegance",
+      "identifier": "responses_serializeComplete_tool_index_hardcoded",
+      "summary": "Responses serializeComplete also uses hardcoded index 0 for all tool call IDs",
+      "related_files": [
+        "src/formats/responses/serialize.ts"
+      ],
+      "evidence": [
+        "Lines 291-300: `(reply.tools ?? []).map((tool) => { const callId = toolId(tool, 'call', 0); ... })` -- same issue as Anthropic. When multiple tools lack explicit IDs, they all get the same generated ID. The streaming path (toolStreamBlock) correctly uses the incrementing `i` variable."
+      ],
+      "suggestion": "Change to use the map index: `(reply.tools ?? []).map((tool, i) => { const callId = toolId(tool, 'call', i); ... })`",
+      "confidence": "high"
+    },
+    {
+      "dimension": "mid_level_elegance",
+      "identifier": "sequence_resolver_mutates_rule_options",
+      "summary": "createSequenceResolver mutates rule.options on each call, creating a side-channel between the resolver and the route handler",
+      "related_files": [
+        "src/rule-engine.ts",
+        "src/route-handler.ts"
+      ],
+      "evidence": [
+        "In createSequenceResolver (lines 91-107), the returned resolver function mutates `rule.options` on each invocation (line 102: `rule.options = step.options ?? {}`). In route-handler.ts line 119, `effectiveOptions` is computed as `{ ...defaultOptions, ...matched?.options }` AFTER resolveReply has already run. This works because resolveReply calls matched.resolve which mutates matched.options before effectiveOptions is read. But this temporal coupling is fragile -- the reader must understand that resolveReply has a side effect on matched.options that the subsequent line depends on."
+      ],
+      "suggestion": "Return the per-step options from the resolver instead of mutating the rule object. For example, have the resolver return `{ reply, options }` and let resolveReply propagate both values. This makes the data flow explicit rather than relying on mutation timing.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "design_coherence",
+      "identifier": "mock_server_when_builds_rule_handle_inline",
+      "summary": "MockServer.when() constructs PendingRule and RuleHandle inline with closure-captured state, mixing builder logic into the server",
+      "related_files": [
+        "src/mock-server.ts"
+      ],
+      "evidence": [
+        "Lines 103-133: The when() method constructs a PendingRule object with reply() and replySequence() methods inline. The makeHandle closure creates RuleHandle objects. The replySequence method (lines 121-132) duplicates logic from the loader's addSequenceRule (loader.ts lines 108-131) -- both normalize sequence entries, call engine.add, then call createSequenceResolver and patch the rule. This is two implementations of the same concept."
+      ],
+      "suggestion": "Extract the sequence-rule creation into a shared function (e.g. in rule-engine.ts: `addSequenceRule(engine, match, steps)`) and call it from both MockServer.when().replySequence() and loader.ts addSequenceRule(). This eliminates the duplicated normalization and rule-patching logic.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "design_coherence",
+      "identifier": "route_handler_function_does_too_much",
+      "summary": "createRouteHandler's returned handler function performs parsing, matching, resolving, error handling, streaming decision, and response sending",
+      "related_files": [
+        "src/route-handler.ts"
+      ],
+      "evidence": [
+        "The handler function (lines 63-147) is 85 lines and handles: (1) header extraction, (2) request parsing with Zod validation, (3) rule matching, (4) reply resolution, (5) error reply handling, (6) history recording, (7) streaming vs. non-streaming decision, (8) logging, (9) SSE writing. While each step is relatively straightforward, the single function accumulates all orchestration responsibilities."
+      ],
+      "suggestion": "The function is cohesive enough that splitting it could be worse than the status quo. However, consider extracting the header-extraction and request-parsing into a helper (e.g. `parseIncomingRequest(format, request)`) to reduce the cognitive load of the main handler. The current structure is functional but approaches the point where a new engineer would need to read the entire function to understand any part of it.",
+      "confidence": "low"
+    },
+    {
+      "dimension": "ai_generated_debt",
+      "identifier": "type_files_high_comment_ratio",
+      "summary": "Type definition files have disproportionately high comment ratios (31-32%) relative to codebase average (4.2%)",
+      "related_files": [
+        "src/types/request.ts",
+        "src/types/rule.ts"
+      ],
+      "evidence": [
+        "src/types/request.ts has 32% comment ratio vs 4.2% codebase average. src/types/rule.ts has 31% comment ratio. Many comments restate what the type signature already communicates. For example, in request.ts: `readonly lastMessage: string` has comment `/** The last user message's text. This is what most matchers check. */` -- the second sentence adds value but the first sentence restates the name. In rule.ts: `/** Returned by `when()`. Call `.reply()` or `.replySequence()` on it to complete the rule. */` on PendingRule -- the interface definition below makes this obvious."
+      ],
+      "suggestion": "Trim JSDoc comments on types to only include non-obvious information. Keep comments that explain 'why' or usage guidance (like 'This is what most matchers check'), but remove comments that restate the type name or signature. For example, `readonly tools?: readonly ToolDef[] | undefined;` needs no doc comment; the name and type are self-documenting.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "incomplete_migration",
+      "identifier": "single_require_in_esm_codebase",
+      "summary": "cli.ts uses createRequire to read package.json version, the only require() in an otherwise pure ESM codebase",
+      "related_files": [
+        "src/cli.ts"
+      ],
+      "evidence": [
+        "Line 4: `import { createRequire } from 'node:module'` and line 17: `const require = createRequire(import.meta.url); const { version } = require('../package.json') as { version: string };`. The entire codebase is ESM (type: module in package.json, all other imports use ESM syntax). This is the sole use of require()."
+      ],
+      "suggestion": "Use an import assertion/attribute to load the package.json: `import pkg from '../package.json' with { type: 'json' };` (Node 22+ supports this). This eliminates the require shim and makes the codebase fully ESM.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "cross_module_architecture",
+      "identifier": "dynamic_import_of_loader_in_server",
+      "summary": "MockServer.load() uses a dynamic import for loader.js, creating a hidden runtime dependency that static analysis cannot trace",
+      "related_files": [
+        "src/mock-server.ts",
+        "src/loader.ts"
+      ],
+      "evidence": [
+        "Line 187: `const { loadRulesFromPath } = await import('./loader.js');`. This dynamic import means loader.ts is not in the static dependency graph of mock-server.ts. While this may be intentional for tree-shaking (users who never call load() don't pay for the loader code), it creates an invisible architecture edge that tools and developers cannot see without reading the implementation."
+      ],
+      "suggestion": "Add a comment explaining the intent (tree-shaking/code-splitting). If tree-shaking is not a goal, convert to a static import. If it is intentional, document this in the module's JSDoc: `/** Dynamically imports loader.js to keep it out of the bundle for users who don't use file-based rules. */`",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "mid_level_elegance",
+      "identifier": "cli_watch_debounce_races",
+      "summary": "CLI watch mode uses a boolean flag for debouncing that can miss rapid successive changes",
+      "related_files": [
+        "src/cli.ts"
+      ],
+      "evidence": [
+        "Lines 88-103: The watch handler uses a `reloading` boolean flag and setTimeout with WATCH_DEBOUNCE_MS=100. If a change fires while reloading is true (during the 100ms timeout or during the async reload), it is silently dropped. A rapid sequence of saves could result in the first change being loaded and the last (most current) being missed entirely, leaving stale rules loaded."
+      ],
+      "suggestion": "Use a proper debounce pattern that resets the timer on each new event, ensuring the final change is always processed: store the timeout ID and clear/reset it on each watch callback. Example: `let timer: NodeJS.Timeout | undefined; watch(..., () => { clearTimeout(timer); timer = setTimeout(async () => { ... }, WATCH_DEBOUNCE_MS); });`",
+      "confidence": "high"
+    },
+    {
+      "dimension": "high_level_elegance",
+      "identifier": "format_interface_serializeError_status_unused",
+      "summary": "Format.serializeError receives status but all three implementations ignore it in the response body",
+      "related_files": [
+        "src/formats/types.ts",
+        "src/formats/openai/serialize.ts",
+        "src/formats/anthropic/serialize.ts",
+        "src/formats/responses/serialize.ts"
+      ],
+      "evidence": [
+        "The Format interface (types.ts line 25) defines serializeError with `status: number` in the parameter. But all three implementations ignore the status field when constructing the error body -- OpenAI (serialize.ts line 134-146) only uses message and type, Anthropic (serialize.ts line 170-179) only uses message and type, Responses (serialize.ts line 315-328) only uses message and type. The status is used by route-handler.ts (line 113) to set the HTTP status code, not by the serializer. The parameter creates a false expectation that the serializer should use it."
+      ],
+      "suggestion": "Remove `status` from the serializeError parameter signature in the Format interface and all implementations, since it is only used by the route handler for the HTTP status code, not by the serialization logic. This makes the boundary clearer.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "abstraction_fitness",
+      "identifier": "buildMockRequest_seven_params",
+      "summary": "buildMockRequest takes 7 positional parameters, making call sites hard to read",
+      "related_files": [
+        "src/formats/request-helpers.ts"
+      ],
+      "evidence": [
+        "buildMockRequest (line 25-51) accepts: format, body, messages, tools, defaultModel, raw, meta. All three callers (openai/parse.ts:35, anthropic/parse.ts:58, responses/parse.ts:61) pass 7 positional arguments. The call sites read like `buildMockRequest('openai', req, parseMessages(req), parseTools(req), 'gpt-5.4', body, meta)` which requires counting positions to understand what each argument is."
+      ],
+      "suggestion": "Group the parameters into an options object: `buildMockRequest({ format: 'openai', body: req, messages: parseMessages(req), tools: parseTools(req), defaultModel: 'gpt-5.4', raw: body, meta })`. This is a common pattern for functions with more than 4-5 parameters and makes call sites self-documenting.",
+      "confidence": "medium"
+    }
+  ],
+  "dimension_notes": {
+    "authorization_consistency": "This is a mock server intended for local testing. Routes are intentionally unauthenticated by design -- the server's purpose is to simulate LLM API responses in test environments. No auth gaps exist because auth is not part of the domain.",
+    "cross_module_architecture": "The codebase has clean dependency direction: types flow outward, formats depend on shared helpers, mock-server orchestrates. The one notable edge is the dynamic import of loader.js, which is reported as a finding. Otherwise, boundaries are well-defined.",
+    "convention_outlier": "The three format modules (openai, anthropic, responses) follow an identical file structure (index.ts, parse.ts, schema.ts, serialize.ts) which is excellent consistency. Minor outlier is the duplicated buildUsage function naming across serializers.",
+    "incomplete_migration": "Nearly fully ESM. The single createRequire usage in cli.ts is the only CJS residue.",
+    "package_organization": "For a project of this size (~40 files), the directory layout is well-organized with clear domain boundaries. The formats/ subdirectory with per-provider modules is clean.",
+    "high_level_elegance": "The decomposition is clear: types define contracts, formats handle protocol translation, rule-engine handles matching, mock-server orchestrates. The Format interface provides a clean plugin boundary. Minor concern about serializeError parameter mismatch is reported.",
+    "abstraction_fitness": "Abstractions are generally well-fitted. The Format interface earns its keep across three implementations. The buildMockRequest parameter count is the main concern.",
+    "api_surface_coherence": "The public API (MockServer class) is coherent. The mixed sync/async in cli-validators is the notable inconsistency.",
+    "mid_level_elegance": "Handoffs between modules are mostly clean. The sequence resolver mutation and the CLI watch debounce pattern are the notable rough edges.",
+    "low_level_elegance": "Implementation craft is generally good. The ID generation collision risk and the hardcoded tool index in serializeComplete are the main issues.",
+    "design_coherence": "Functions are mostly focused. The duplicated sequence-rule logic between MockServer.when() and loader.ts is the clearest design coherence issue.",
+    "ai_generated_debt": "The codebase is generally clean of AI-generated patterns. The type files have somewhat elevated comment ratios with some restating comments, but the comments do generally add value.",
+    "error_consistency": "Error handling is functional but inconsistent in a few places: resolver errors are silently swallowed with fallback, unknown file extensions are silently skipped, and history recording timing is inconsistent between error and success paths."
+  },
+  "provenance": {
+    "kind": "blind_review_batch_import",
+    "blind": true,
+    "runner": "claude",
+    "run_stamp": "ext_20260315_045546_0587ea3b",
+    "created_at": "2026-03-15T05:00:00+00:00",
+    "packet_path": "/Users/suyash.x.srijan/Documents/Personal_Projects/llm-mock-server/.desloppify/review_packet_blind.json",
+    "packet_sha256": "1405a7ac30145db0e952bfb0b7abd92e594e863c7dd65c60dadbfe28680cf423",
+    "external_session_id": "ext_20260315_045546_0587ea3b"
+  }
+}

package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/canonical_import_20260315_050028.json ADDED Viewed

@@ -0,0 +1,303 @@
+{
+  "assessments": {
+    "cross_module_architecture": 93.0,
+    "convention_outlier": 90.0,
+    "error_consistency": 82.0,
+    "abstraction_fitness": 91.0,
+    "api_surface_coherence": 85.0,
+    "authorization_consistency": 100.0,
+    "ai_generated_debt": 88.0,
+    "incomplete_migration": 95.0,
+    "package_organization": 94.0,
+    "high_level_elegance": 92.0,
+    "mid_level_elegance": 88.0,
+    "low_level_elegance": 86.0,
+    "design_coherence": 87.0
+  },
+  "findings": [
+    {
+      "dimension": "error_consistency",
+      "identifier": "resolver_error_swallowed_silently",
+      "summary": "Resolver errors are logged then silently replaced with fallback, losing error context for callers",
+      "related_files": [
+        "src/route-handler.ts"
+      ],
+      "evidence": [
+        "In resolveReply() (lines 36-46), when matched.resolve throws, the error is caught, logged, and the fallback reply is returned. The caller has no way to distinguish a successful fallback from a resolver failure. History records the rule description but not the error state, making debugging difficult in tests."
+      ],
+      "suggestion": "Record the error state in the history entry (e.g. add an `error` field to RecordedRequest), or at minimum set a distinct ruleDesc like `${matched.description} (error)` so that test assertions can detect resolver failures vs. normal fallback usage.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "error_consistency",
+      "identifier": "history_recorded_before_streaming_completes",
+      "summary": "Error reply records history before send, but streaming path also records before writeSSE may fail",
+      "related_files": [
+        "src/route-handler.ts"
+      ],
+      "evidence": [
+        "Line 109 and 116: history.record() is called before the response is fully sent. For error replies (line 109) this is fine, but for the streaming path (line 116), if writeSSE throws (e.g. client disconnect), the request is recorded as successfully handled. The recording point is inconsistent between error and success paths -- error records at line 109 then returns, normal records at 116 then may stream or return JSON."
+      ],
+      "suggestion": "Move history.record() after the response is fully sent (after writeSSE completes for streaming, after reply.send for JSON), or add a status/success field to the recorded entry. This ensures history reflects actual outcome.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "error_consistency",
+      "identifier": "loader_silent_skip_on_unknown_extension",
+      "summary": "loadRulesFromPath silently skips files with unrecognized extensions instead of warning",
+      "related_files": [
+        "src/loader.ts"
+      ],
+      "evidence": [
+        "Lines 222-225: When a file path is given with an extension not in loaderByExtension (e.g. '.yaml', '.txt'), the function silently returns without loading anything or signaling an issue. The directory-loading path (line 234) also calls loadRulesFromPath recursively, so stray files in a rules directory are silently ignored."
+      ],
+      "suggestion": "When loading a single file (info.isFile()) with an unsupported extension, either throw an error ('Unsupported file extension...') or accept a Logger parameter and log a warning. Silent skipping is surprising when a user explicitly passes a file path.",
+      "confidence": "high"
+    },
+    {
+      "dimension": "convention_outlier",
+      "identifier": "buildUsage_duplicated_across_serializers",
+      "summary": "buildUsage helper is independently defined in all three format serializers with slightly different shapes",
+      "related_files": [
+        "src/formats/openai/serialize.ts",
+        "src/formats/anthropic/serialize.ts",
+        "src/formats/responses/serialize.ts"
+      ],
+      "evidence": [
+        "OpenAI buildUsage (line 12) returns {prompt_tokens, completion_tokens, total_tokens, ..._details}. Anthropic buildUsage (line 12) returns {input_tokens, output_tokens}. Responses buildUsage (line 12) returns {input_tokens, output_tokens, total_tokens}. Each file defines its own private buildUsage function with the same name, same input signature ({input: number, output: number}), but different output shapes. This is a sibling behavioral inconsistency -- three siblings all do the same conceptual transformation but aren't coordinated."
+      ],
+      "suggestion": "This is intentional variation (different API formats require different shapes), so no structural change is needed, but consider renaming the functions to be more specific (e.g. buildOpenAIUsage, buildAnthropicUsage) or adding a brief comment noting the format-specific shape is deliberate. The identical naming across three files creates a false sense of fungibility.",
+      "confidence": "low"
+    },
+    {
+      "dimension": "api_surface_coherence",
+      "identifier": "cli_validators_mixed_sync_async",
+      "summary": "parseHost is async while all sibling validators (parsePort, parseLogLevel, etc.) are sync",
+      "related_files": [
+        "src/cli-validators.ts"
+      ],
+      "evidence": [
+        "parsePort (line 13), parseLogLevel (line 21), parseChunkSize (line 49), parseLatency (line 59) are all synchronous functions. parseHost (line 30) is async because it calls dns.lookup. This forces the caller (cli.ts line 38) to await parseHost while the others are called synchronously. The mixed sync/async surface in a cohesive set of validation functions is surprising."
+      ],
+      "suggestion": "Consider making parseHost synchronous by using isIP() for IP addresses and a regex for valid hostname format, deferring actual resolution to the server's listen() call. If DNS validation is essential, document in parseHost's JSDoc that it is async unlike its siblings, and consider grouping it separately.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "api_surface_coherence",
+      "identifier": "isStreaming_default_true_surprising",
+      "summary": "isStreaming defaults to true for any non-object input including null/undefined, which is a surprising API contract",
+      "related_files": [
+        "src/formats/request-helpers.ts"
+      ],
+      "evidence": [
+        "isStreaming (line 9-11) returns `asRecord(body)['stream'] !== false`, meaning any body that isn't an object with `stream: false` is treated as streaming. Passing null, undefined, a string, or a number all return true. This is an implicit opt-out contract rather than opt-in, which can silently produce streaming responses when the caller expected JSON."
+      ],
+      "suggestion": "Consider making streaming explicit: return true only when stream is explicitly true or when it's a valid object without a stream field. At minimum, add a JSDoc comment explaining the opt-out default. The current behavior is consistent with some LLM API defaults but could be surprising to mock server users.",
+      "confidence": "low"
+    },
+    {
+      "dimension": "low_level_elegance",
+      "identifier": "genId_collision_risk",
+      "summary": "genId uses Date.now() in base36, creating collision risk for IDs generated in the same millisecond",
+      "related_files": [
+        "src/formats/serialize-helpers.ts"
+      ],
+      "evidence": [
+        "genId (line 16-18) generates IDs as `${prefix}_${Date.now().toString(36)}`. Two calls within the same millisecond produce identical IDs. This affects toolId (line 20-26) as well -- multiple tools in the same reply will get IDs that differ only by the index suffix, but if toolId is called without an index (or with 0 for multiple tools), IDs collide. In serializeComplete for Anthropic (line 152), all tools use index 0: `toolId(tool, 'toolu', 0)` causing collisions when multiple tools are present."
+      ],
+      "suggestion": "Add a monotonic counter or random suffix to genId: `${prefix}_${Date.now().toString(36)}_${(counter++).toString(36)}`. Alternatively, use crypto.randomUUID() or nanoid (already a transitive dependency). Also fix the Anthropic serializeComplete to pass the actual tool index instead of hardcoded 0.",
+      "confidence": "high"
+    },
+    {
+      "dimension": "low_level_elegance",
+      "identifier": "anthropic_serializeComplete_tool_index_hardcoded",
+      "summary": "Anthropic serializeComplete passes hardcoded index 0 for all tool IDs, causing ID collisions",
+      "related_files": [
+        "src/formats/anthropic/serialize.ts"
+      ],
+      "evidence": [
+        "Line 152: `(reply.tools ?? []).map((tool) => ({ ... id: toolId(tool, 'toolu', 0), ... }))` -- every tool in the array gets index 0 passed to toolId. When multiple tools are present and none have explicit IDs, they all get the same generated ID (same prefix, same timestamp, same index). The streaming path (toolBlocks, line 65) correctly uses `startIndex + i` for each tool."
+      ],
+      "suggestion": "Change line 152 to use the map index: `(reply.tools ?? []).map((tool, i) => ({ ... id: toolId(tool, 'toolu', i), ... }))`",
+      "confidence": "high"
+    },
+    {
+      "dimension": "low_level_elegance",
+      "identifier": "responses_serializeComplete_tool_index_hardcoded",
+      "summary": "Responses serializeComplete also uses hardcoded index 0 for all tool call IDs",
+      "related_files": [
+        "src/formats/responses/serialize.ts"
+      ],
+      "evidence": [
+        "Lines 291-300: `(reply.tools ?? []).map((tool) => { const callId = toolId(tool, 'call', 0); ... })` -- same issue as Anthropic. When multiple tools lack explicit IDs, they all get the same generated ID. The streaming path (toolStreamBlock) correctly uses the incrementing `i` variable."
+      ],
+      "suggestion": "Change to use the map index: `(reply.tools ?? []).map((tool, i) => { const callId = toolId(tool, 'call', i); ... })`",
+      "confidence": "high"
+    },
+    {
+      "dimension": "mid_level_elegance",
+      "identifier": "sequence_resolver_mutates_rule_options",
+      "summary": "createSequenceResolver mutates rule.options on each call, creating a side-channel between the resolver and the route handler",
+      "related_files": [
+        "src/rule-engine.ts",
+        "src/route-handler.ts"
+      ],
+      "evidence": [
+        "In createSequenceResolver (lines 91-107), the returned resolver function mutates `rule.options` on each invocation (line 102: `rule.options = step.options ?? {}`). In route-handler.ts line 119, `effectiveOptions` is computed as `{ ...defaultOptions, ...matched?.options }` AFTER resolveReply has already run. This works because resolveReply calls matched.resolve which mutates matched.options before effectiveOptions is read. But this temporal coupling is fragile -- the reader must understand that resolveReply has a side effect on matched.options that the subsequent line depends on."
+      ],
+      "suggestion": "Return the per-step options from the resolver instead of mutating the rule object. For example, have the resolver return `{ reply, options }` and let resolveReply propagate both values. This makes the data flow explicit rather than relying on mutation timing.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "design_coherence",
+      "identifier": "mock_server_when_builds_rule_handle_inline",
+      "summary": "MockServer.when() constructs PendingRule and RuleHandle inline with closure-captured state, mixing builder logic into the server",
+      "related_files": [
+        "src/mock-server.ts"
+      ],
+      "evidence": [
+        "Lines 103-133: The when() method constructs a PendingRule object with reply() and replySequence() methods inline. The makeHandle closure creates RuleHandle objects. The replySequence method (lines 121-132) duplicates logic from the loader's addSequenceRule (loader.ts lines 108-131) -- both normalize sequence entries, call engine.add, then call createSequenceResolver and patch the rule. This is two implementations of the same concept."
+      ],
+      "suggestion": "Extract the sequence-rule creation into a shared function (e.g. in rule-engine.ts: `addSequenceRule(engine, match, steps)`) and call it from both MockServer.when().replySequence() and loader.ts addSequenceRule(). This eliminates the duplicated normalization and rule-patching logic.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "design_coherence",
+      "identifier": "route_handler_function_does_too_much",
+      "summary": "createRouteHandler's returned handler function performs parsing, matching, resolving, error handling, streaming decision, and response sending",
+      "related_files": [
+        "src/route-handler.ts"
+      ],
+      "evidence": [
+        "The handler function (lines 63-147) is 85 lines and handles: (1) header extraction, (2) request parsing with Zod validation, (3) rule matching, (4) reply resolution, (5) error reply handling, (6) history recording, (7) streaming vs. non-streaming decision, (8) logging, (9) SSE writing. While each step is relatively straightforward, the single function accumulates all orchestration responsibilities."
+      ],
+      "suggestion": "The function is cohesive enough that splitting it could be worse than the status quo. However, consider extracting the header-extraction and request-parsing into a helper (e.g. `parseIncomingRequest(format, request)`) to reduce the cognitive load of the main handler. The current structure is functional but approaches the point where a new engineer would need to read the entire function to understand any part of it.",
+      "confidence": "low"
+    },
+    {
+      "dimension": "ai_generated_debt",
+      "identifier": "type_files_high_comment_ratio",
+      "summary": "Type definition files have disproportionately high comment ratios (31-32%) relative to codebase average (4.2%)",
+      "related_files": [
+        "src/types/request.ts",
+        "src/types/rule.ts"
+      ],
+      "evidence": [
+        "src/types/request.ts has 32% comment ratio vs 4.2% codebase average. src/types/rule.ts has 31% comment ratio. Many comments restate what the type signature already communicates. For example, in request.ts: `readonly lastMessage: string` has comment `/** The last user message's text. This is what most matchers check. */` -- the second sentence adds value but the first sentence restates the name. In rule.ts: `/** Returned by `when()`. Call `.reply()` or `.replySequence()` on it to complete the rule. */` on PendingRule -- the interface definition below makes this obvious."
+      ],
+      "suggestion": "Trim JSDoc comments on types to only include non-obvious information. Keep comments that explain 'why' or usage guidance (like 'This is what most matchers check'), but remove comments that restate the type name or signature. For example, `readonly tools?: readonly ToolDef[] | undefined;` needs no doc comment; the name and type are self-documenting.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "incomplete_migration",
+      "identifier": "single_require_in_esm_codebase",
+      "summary": "cli.ts uses createRequire to read package.json version, the only require() in an otherwise pure ESM codebase",
+      "related_files": [
+        "src/cli.ts"
+      ],
+      "evidence": [
+        "Line 4: `import { createRequire } from 'node:module'` and line 17: `const require = createRequire(import.meta.url); const { version } = require('../package.json') as { version: string };`. The entire codebase is ESM (type: module in package.json, all other imports use ESM syntax). This is the sole use of require()."
+      ],
+      "suggestion": "Use an import assertion/attribute to load the package.json: `import pkg from '../package.json' with { type: 'json' };` (Node 22+ supports this). This eliminates the require shim and makes the codebase fully ESM.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "cross_module_architecture",
+      "identifier": "dynamic_import_of_loader_in_server",
+      "summary": "MockServer.load() uses a dynamic import for loader.js, creating a hidden runtime dependency that static analysis cannot trace",
+      "related_files": [
+        "src/mock-server.ts",
+        "src/loader.ts"
+      ],
+      "evidence": [
+        "Line 187: `const { loadRulesFromPath } = await import('./loader.js');`. This dynamic import means loader.ts is not in the static dependency graph of mock-server.ts. While this may be intentional for tree-shaking (users who never call load() don't pay for the loader code), it creates an invisible architecture edge that tools and developers cannot see without reading the implementation."
+      ],
+      "suggestion": "Add a comment explaining the intent (tree-shaking/code-splitting). If tree-shaking is not a goal, convert to a static import. If it is intentional, document this in the module's JSDoc: `/** Dynamically imports loader.js to keep it out of the bundle for users who don't use file-based rules. */`",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "mid_level_elegance",
+      "identifier": "cli_watch_debounce_races",
+      "summary": "CLI watch mode uses a boolean flag for debouncing that can miss rapid successive changes",
+      "related_files": [
+        "src/cli.ts"
+      ],
+      "evidence": [
+        "Lines 88-103: The watch handler uses a `reloading` boolean flag and setTimeout with WATCH_DEBOUNCE_MS=100. If a change fires while reloading is true (during the 100ms timeout or during the async reload), it is silently dropped. A rapid sequence of saves could result in the first change being loaded and the last (most current) being missed entirely, leaving stale rules loaded."
+      ],
+      "suggestion": "Use a proper debounce pattern that resets the timer on each new event, ensuring the final change is always processed: store the timeout ID and clear/reset it on each watch callback. Example: `let timer: NodeJS.Timeout | undefined; watch(..., () => { clearTimeout(timer); timer = setTimeout(async () => { ... }, WATCH_DEBOUNCE_MS); });`",
+      "confidence": "high"
+    },
+    {
+      "dimension": "high_level_elegance",
+      "identifier": "format_interface_serializeError_status_unused",
+      "summary": "Format.serializeError receives status but all three implementations ignore it in the response body",
+      "related_files": [
+        "src/formats/types.ts",
+        "src/formats/openai/serialize.ts",
+        "src/formats/anthropic/serialize.ts",
+        "src/formats/responses/serialize.ts"
+      ],
+      "evidence": [
+        "The Format interface (types.ts line 25) defines serializeError with `status: number` in the parameter. But all three implementations ignore the status field when constructing the error body -- OpenAI (serialize.ts line 134-146) only uses message and type, Anthropic (serialize.ts line 170-179) only uses message and type, Responses (serialize.ts line 315-328) only uses message and type. The status is used by route-handler.ts (line 113) to set the HTTP status code, not by the serializer. The parameter creates a false expectation that the serializer should use it."
+      ],
+      "suggestion": "Remove `status` from the serializeError parameter signature in the Format interface and all implementations, since it is only used by the route handler for the HTTP status code, not by the serialization logic. This makes the boundary clearer.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "abstraction_fitness",
+      "identifier": "buildMockRequest_seven_params",
+      "summary": "buildMockRequest takes 7 positional parameters, making call sites hard to read",
+      "related_files": [
+        "src/formats/request-helpers.ts"
+      ],
+      "evidence": [
+        "buildMockRequest (line 25-51) accepts: format, body, messages, tools, defaultModel, raw, meta. All three callers (openai/parse.ts:35, anthropic/parse.ts:58, responses/parse.ts:61) pass 7 positional arguments. The call sites read like `buildMockRequest('openai', req, parseMessages(req), parseTools(req), 'gpt-5.4', body, meta)` which requires counting positions to understand what each argument is."
+      ],
+      "suggestion": "Group the parameters into an options object: `buildMockRequest({ format: 'openai', body: req, messages: parseMessages(req), tools: parseTools(req), defaultModel: 'gpt-5.4', raw: body, meta })`. This is a common pattern for functions with more than 4-5 parameters and makes call sites self-documenting.",
+      "confidence": "medium"
+    },
+    {
+      "dimension": "package_organization",
+      "identifier": "test_helpers_not_in_dedicated_dir",
+      "summary": "Test helper (make-req.ts) is in test/helpers/ but format-specific test helpers are inline in each test file",
+      "related_files": [
+        "test/helpers/make-req.ts",
+        "test/formats/openai.test.ts",
+        "test/formats/anthropic.test.ts"
+      ],
+      "evidence": [
+        "test/helpers/make-req.ts is a shared fixture factory extracted to its own directory",
+        "Format test files each define their own inline parse() helper rather than sharing one",
+        "Minor inconsistency in where test utilities live"
+      ],
+      "suggestion": "Consider extracting common test parsing helpers alongside make-req.ts in test/helpers/ for consistency.",
+      "confidence": "low"
+    }
+  ],
+  "dimension_notes": {
+    "authorization_consistency": "This is a mock server intended for local testing. Routes are intentionally unauthenticated by design -- the server's purpose is to simulate LLM API responses in test environments. No auth gaps exist because auth is not part of the domain.",
+    "cross_module_architecture": "The codebase has clean dependency direction: types flow outward, formats depend on shared helpers, mock-server orchestrates. The one notable edge is the dynamic import of loader.js, which is reported as a finding. Otherwise, boundaries are well-defined.",
+    "convention_outlier": "The three format modules (openai, anthropic, responses) follow an identical file structure (index.ts, parse.ts, schema.ts, serialize.ts) which is excellent consistency. Minor outlier is the duplicated buildUsage function naming across serializers.",
+    "incomplete_migration": "Nearly fully ESM. The single createRequire usage in cli.ts is the only CJS residue.",
+    "package_organization": "For a project of this size (~40 files), the directory layout is well-organized with clear domain boundaries. The formats/ subdirectory with per-provider modules is clean.",
+    "high_level_elegance": "The decomposition is clear: types define contracts, formats handle protocol translation, rule-engine handles matching, mock-server orchestrates. The Format interface provides a clean plugin boundary. Minor concern about serializeError parameter mismatch is reported.",
+    "abstraction_fitness": "Abstractions are generally well-fitted. The Format interface earns its keep across three implementations. The buildMockRequest parameter count is the main concern.",
+    "api_surface_coherence": "The public API (MockServer class) is coherent. The mixed sync/async in cli-validators is the notable inconsistency.",
+    "mid_level_elegance": "Handoffs between modules are mostly clean. The sequence resolver mutation and the CLI watch debounce pattern are the notable rough edges.",
+    "low_level_elegance": "Implementation craft is generally good. The ID generation collision risk and the hardcoded tool index in serializeComplete are the main issues.",
+    "design_coherence": "Functions are mostly focused. The duplicated sequence-rule logic between MockServer.when() and loader.ts is the clearest design coherence issue.",
+    "ai_generated_debt": "The codebase is generally clean of AI-generated patterns. The type files have somewhat elevated comment ratios with some restating comments, but the comments do generally add value.",
+    "error_consistency": "Error handling is functional but inconsistent in a few places: resolver errors are silently swallowed with fallback, unknown file extensions are silently skipped, and history recording timing is inconsistent between error and success paths."
+  },
+  "provenance": {
+    "kind": "blind_review_batch_import",
+    "blind": true,
+    "runner": "claude",
+    "run_stamp": "ext_20260315_045546_0587ea3b",
+    "created_at": "2026-03-15T05:00:28+00:00",
+    "packet_path": "/Users/suyash.x.srijan/Documents/Personal_Projects/llm-mock-server/.desloppify/review_packet_blind.json",
+    "packet_sha256": "1405a7ac30145db0e952bfb0b7abd92e594e863c7dd65c60dadbfe28680cf423",
+    "external_session_id": "ext_20260315_045546_0587ea3b"
+  }
+}

package/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/claude_launch_prompt.md ADDED Viewed

@@ -0,0 +1,17 @@
+# Claude Blind Reviewer Launch Prompt
+You are an isolated blind reviewer. Do not use prior chat context, prior score history, or target-score anchoring.
+Blind packet: /Users/suyash.x.srijan/Documents/Personal_Projects/llm-mock-server/.desloppify/review_packet_blind.json
+Template JSON: /Users/suyash.x.srijan/Documents/Personal_Projects/llm-mock-server/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/review_result.template.json
+Output JSON path: /Users/suyash.x.srijan/Documents/Personal_Projects/llm-mock-server/.desloppify/external_review_sessions/ext_20260315_045546_0587ea3b/review_result.json
+Requirements:
+1. Read ONLY the blind packet and repository code.
+2. Start from the template JSON so `session.id` and `session.token` are preserved.
+3. Keep `session.id` exactly `ext_20260315_045546_0587ea3b`.
+4. Keep `session.token` exactly `ec214bb085366c91ba9d2310e32c5e2d`.
+5. Output must be valid JSON with top-level keys: session, assessments, findings.
+6. Every finding must include: dimension, identifier, summary, related_files, evidence, suggestion, confidence.
+7. Do not include provenance metadata (CLI injects canonical provenance).
+8. Return JSON only (no markdown fences).