mstro-app 0.4.28 → 0.4.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/dist/server/cli/headless/claude-invoker-process.d.ts.map +1 -1
  2. package/dist/server/cli/headless/claude-invoker-process.js +5 -1
  3. package/dist/server/cli/headless/claude-invoker-process.js.map +1 -1
  4. package/dist/server/cli/headless/haiku-assessments.d.ts.map +1 -1
  5. package/dist/server/cli/headless/haiku-assessments.js +20 -28
  6. package/dist/server/cli/headless/haiku-assessments.js.map +1 -1
  7. package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
  8. package/dist/server/cli/headless/stall-assessor.js +17 -3
  9. package/dist/server/cli/headless/stall-assessor.js.map +1 -1
  10. package/dist/server/cli/prompt-builders.d.ts.map +1 -1
  11. package/dist/server/cli/prompt-builders.js +35 -19
  12. package/dist/server/cli/prompt-builders.js.map +1 -1
  13. package/dist/server/mcp/bouncer-haiku.d.ts.map +1 -1
  14. package/dist/server/mcp/bouncer-haiku.js +5 -30
  15. package/dist/server/mcp/bouncer-haiku.js.map +1 -1
  16. package/dist/server/mcp/security-analysis.d.ts.map +1 -1
  17. package/dist/server/mcp/security-analysis.js +19 -11
  18. package/dist/server/mcp/security-analysis.js.map +1 -1
  19. package/dist/server/services/deploy/headless-session-handler.d.ts.map +1 -1
  20. package/dist/server/services/deploy/headless-session-handler.js +61 -69
  21. package/dist/server/services/deploy/headless-session-handler.js.map +1 -1
  22. package/dist/server/services/pathUtils.d.ts.map +1 -1
  23. package/dist/server/services/pathUtils.js +46 -38
  24. package/dist/server/services/pathUtils.js.map +1 -1
  25. package/dist/server/services/plan/agent-loader.d.ts +20 -4
  26. package/dist/server/services/plan/agent-loader.d.ts.map +1 -1
  27. package/dist/server/services/plan/agent-loader.js +85 -16
  28. package/dist/server/services/plan/agent-loader.js.map +1 -1
  29. package/dist/server/services/plan/issue-retry.d.ts +0 -8
  30. package/dist/server/services/plan/issue-retry.d.ts.map +1 -1
  31. package/dist/server/services/plan/issue-retry.js +72 -63
  32. package/dist/server/services/plan/issue-retry.js.map +1 -1
  33. package/dist/server/services/plan/review-gate.js +16 -88
  34. package/dist/server/services/plan/review-gate.js.map +1 -1
  35. package/dist/server/services/websocket/git-handlers.d.ts.map +1 -1
  36. package/dist/server/services/websocket/git-handlers.js +6 -19
  37. package/dist/server/services/websocket/git-handlers.js.map +1 -1
  38. package/dist/server/services/websocket/git-pr-handlers.d.ts.map +1 -1
  39. package/dist/server/services/websocket/git-pr-handlers.js +5 -21
  40. package/dist/server/services/websocket/git-pr-handlers.js.map +1 -1
  41. package/dist/server/services/websocket/handlers/deploy-handlers.d.ts.map +1 -1
  42. package/dist/server/services/websocket/handlers/deploy-handlers.js +28 -33
  43. package/dist/server/services/websocket/handlers/deploy-handlers.js.map +1 -1
  44. package/dist/server/services/websocket/plan-board-handlers.d.ts.map +1 -1
  45. package/dist/server/services/websocket/plan-board-handlers.js +31 -25
  46. package/dist/server/services/websocket/plan-board-handlers.js.map +1 -1
  47. package/dist/server/services/websocket/quality-fix-agent.d.ts.map +1 -1
  48. package/dist/server/services/websocket/quality-fix-agent.js +11 -18
  49. package/dist/server/services/websocket/quality-fix-agent.js.map +1 -1
  50. package/dist/server/services/websocket/quality-review-agent.d.ts.map +1 -1
  51. package/dist/server/services/websocket/quality-review-agent.js +13 -150
  52. package/dist/server/services/websocket/quality-review-agent.js.map +1 -1
  53. package/package.json +1 -1
  54. package/server/cli/headless/claude-invoker-process.ts +5 -1
  55. package/server/cli/headless/haiku-assessments.ts +21 -28
  56. package/server/cli/headless/stall-assessor.ts +17 -3
  57. package/server/cli/prompt-builders.ts +34 -23
  58. package/server/mcp/bouncer-haiku.ts +5 -30
  59. package/server/mcp/security-analysis.ts +19 -12
  60. package/server/services/deploy/headless-session-handler.ts +75 -76
  61. package/server/services/pathUtils.ts +55 -42
  62. package/server/services/plan/agent-loader.ts +88 -15
  63. package/server/services/plan/issue-retry.ts +93 -68
  64. package/server/services/plan/review-gate.ts +13 -89
  65. package/server/services/websocket/git-handlers.ts +6 -18
  66. package/server/services/websocket/git-pr-handlers.ts +5 -20
  67. package/server/services/websocket/handlers/deploy-handlers.ts +34 -37
  68. package/server/services/websocket/plan-board-handlers.ts +36 -21
  69. package/server/services/websocket/quality-fix-agent.ts +10 -17
  70. package/server/services/websocket/quality-review-agent.ts +12 -149
@@ -97,6 +97,40 @@ paused: false
97
97
  }
98
98
  }
99
99
 
100
+ /** Update front-matter fields in the board.md file. */
101
+ function applyBoardFieldUpdates(
102
+ boardMdPath: string,
103
+ fields: Record<string, unknown>,
104
+ ): void {
105
+ let content = readFileSync(boardMdPath, 'utf-8');
106
+ for (const [key, value] of Object.entries(fields)) {
107
+ const yamlKey = key.replace(/([A-Z])/g, '_$1').toLowerCase();
108
+ content = replaceFrontMatterField(content, yamlKey, formatYamlValue(value));
109
+ }
110
+ writeFileSync(boardMdPath, content, 'utf-8');
111
+ }
112
+
113
+ /** Sync the review-custom agent file when review criteria change. */
114
+ function syncReviewCriteriaAgent(
115
+ fields: Record<string, unknown>,
116
+ pmDir: string,
117
+ boardId: string,
118
+ ): void {
119
+ if (!('reviewCriteria' in fields)) return;
120
+
121
+ const boardDir = join(pmDir, 'boards', boardId);
122
+ const agentsDir = join(boardDir, 'agents');
123
+ const agentPath = join(agentsDir, 'review-custom.md');
124
+ const criteriaValue = String(fields.reviewCriteria ?? '').trim();
125
+
126
+ if (criteriaValue) {
127
+ if (!existsSync(agentsDir)) mkdirSync(agentsDir, { recursive: true });
128
+ writeFileSync(agentPath, buildBoardReviewAgent(criteriaValue), 'utf-8');
129
+ } else if (existsSync(agentPath)) {
130
+ try { unlinkSync(agentPath); } catch { /* non-fatal */ }
131
+ }
132
+ }
133
+
100
134
  export function handleUpdateBoard(
101
135
  ctx: HandlerContext, ws: WSContext, msg: WebSocketMessage,
102
136
  workingDir: string, permission?: 'view',
@@ -118,30 +152,11 @@ export function handleUpdateBoard(
118
152
  return;
119
153
  }
120
154
 
121
- let content = readFileSync(boardMdPath, 'utf-8');
122
- for (const [key, value] of Object.entries(fields as Record<string, unknown>)) {
123
- const yamlKey = key.replace(/([A-Z])/g, '_$1').toLowerCase();
124
- content = replaceFrontMatterField(content, yamlKey, formatYamlValue(value));
125
- }
126
- writeFileSync(boardMdPath, content, 'utf-8');
155
+ applyBoardFieldUpdates(boardMdPath, fields as Record<string, unknown>);
127
156
 
128
157
  // When review criteria are set, also write a board-level review agent file
129
158
  // so users can discover and edit the full prompt as markdown.
130
- const typedFields = fields as Record<string, unknown>;
131
- if ('reviewCriteria' in typedFields) {
132
- const boardDir = join(pmDir, 'boards', boardId);
133
- const agentsDir = join(boardDir, 'agents');
134
- const agentPath = join(agentsDir, 'review-custom.md');
135
- const criteriaValue = String(typedFields.reviewCriteria ?? '').trim();
136
-
137
- if (criteriaValue) {
138
- if (!existsSync(agentsDir)) mkdirSync(agentsDir, { recursive: true });
139
- writeFileSync(agentPath, buildBoardReviewAgent(criteriaValue), 'utf-8');
140
- } else if (existsSync(agentPath)) {
141
- // Clear the agent file when criteria are removed
142
- try { unlinkSync(agentPath); } catch { /* non-fatal */ }
143
- }
144
- }
159
+ syncReviewCriteriaAgent(fields as Record<string, unknown>, pmDir, boardId);
145
160
 
146
161
  const boardState = parseBoardDirectory(pmDir, boardId);
147
162
  if (boardState) {
@@ -10,6 +10,7 @@
10
10
  import { runWithFileLogger } from '../../cli/headless/headless-logger.js';
11
11
  import { HeadlessRunner } from '../../cli/headless/index.js';
12
12
  import type { ToolUseEvent } from '../../cli/headless/types.js';
13
+ import { loadSkillPrompt } from '../plan/agent-loader.js';
13
14
  import type { HandlerContext } from './handler-context.js';
14
15
  import type { QualityPersistence } from './quality-persistence.js';
15
16
  import { detectTools, runQualityScan } from './quality-service.js';
@@ -58,7 +59,7 @@ export function createToolProgressCallback(ctx: HandlerContext, ws: WSContext, r
58
59
 
59
60
  // ── Prompt ────────────────────────────────────────────────────
60
61
 
61
- function buildFixPrompt(findings: FindingForFix[], section?: string): string {
62
+ function buildFixPrompt(findings: FindingForFix[], section?: string, workingDir?: string): string {
62
63
  const filtered = section ? findings.filter((f) => f.category === section) : findings;
63
64
  const sorted = filtered.sort((a, b) => {
64
65
  const order: Record<string, number> = { critical: 0, high: 1, medium: 2, low: 3 };
@@ -73,22 +74,14 @@ function buildFixPrompt(findings: FindingForFix[], section?: string): string {
73
74
  return parts.join('\n');
74
75
  }).join('\n\n');
75
76
 
76
- return `You are a code quality fix agent. Fix the following quality issues in the codebase.
77
+ const fromSkill = loadSkillPrompt('fix-quality', {
78
+ issueList,
79
+ issueCount: String(sorted.length),
80
+ showCount: String(Math.min(30, sorted.length)),
81
+ }, workingDir);
82
+ if (fromSkill) return fromSkill;
77
83
 
78
- ## Issues to Fix (${sorted.length} total, showing top ${Math.min(30, sorted.length)})
79
-
80
- ${issueList}
81
-
82
- ## Rules
83
-
84
- - Fix each issue by editing the relevant file at the specified location.
85
- - For complexity issues: refactor into smaller functions. For long files: split or extract modules. For long functions: break into smaller functions.
86
- - For security issues: apply the suggested fix or use secure coding best practices.
87
- - For bugs: fix the root cause, not just the symptom.
88
- - For linting/formatting: apply the standard for the project.
89
- - Do NOT introduce new issues. Make minimal, focused changes.
90
- - After fixing, verify the changes compile/pass linting if tools are available.
91
- - Work through the issues systematically from most to least severe.`;
84
+ return `You are a code quality fix agent. Fix the following quality issues in the codebase.\n\n## Issues to Fix (${sorted.length} total, showing top ${Math.min(30, sorted.length)})\n\n${issueList}\n\nFix each issue by editing the relevant file. Work from most to least severe. Do NOT introduce new issues.`;
92
85
  }
93
86
 
94
87
  // ── Handler ───────────────────────────────────────────────────
@@ -128,7 +121,7 @@ export async function handleFixIssues(
128
121
  data: { path: reportPath, message: 'Starting Claude Code to fix issues...' },
129
122
  });
130
123
 
131
- const prompt = buildFixPrompt(findings, section);
124
+ const prompt = buildFixPrompt(findings, section, workingDir);
132
125
 
133
126
  const runner = new HeadlessRunner({
134
127
  workingDir: dirPath,
@@ -12,6 +12,7 @@ import { isAbsolute, join } from 'node:path';
12
12
  import { runWithFileLogger } from '../../cli/headless/headless-logger.js';
13
13
  import { HeadlessRunner } from '../../cli/headless/index.js';
14
14
  import type { ToolUseEvent } from '../../cli/headless/types.js';
15
+ import { loadSkillPrompt } from '../plan/agent-loader.js';
15
16
  import type { HandlerContext } from './handler-context.js';
16
17
  import type { QualityPersistence } from './quality-persistence.js';
17
18
  import { recomputeWithAiReview } from './quality-service.js';
@@ -39,106 +40,11 @@ export function buildCodeReviewPrompt(dirPath: string, cliFindings?: Array<{ sev
39
40
  ? `\n## CLI Tool Findings (already detected)\n\nThe following issues were found by automated CLI tools (linters, formatters, complexity analyzers). Review these for context — they are already included in the final report. Focus your analysis on DEEPER issues these tools cannot detect.\n\n${cliFindings.slice(0, 50).map((f, i) => `${i + 1}. [${f.severity.toUpperCase()}] ${f.category} — ${f.file}${f.line ? `:${f.line}` : ''} — ${f.title}: ${f.description}`).join('\n')}\n${cliFindings.length > 50 ? `\n...and ${cliFindings.length - 50} more issues from CLI tools.\n` : ''}`
40
41
  : '';
41
42
 
42
- return `You are a senior staff engineer performing a rigorous, honest code review. Your job is to surface the most impactful quality bottlenecks — the issues a principal engineer would flag in a code review. Be critical and objective. Do NOT inflate scores.
43
-
44
- IMPORTANT: Your current working directory is "${dirPath}". Only review files within this directory.
45
- ${cliFindingsSection}
46
- ## Review Process
47
-
48
- 1. **Discover**: Use Glob to find source files (e.g. "**/*.{ts,tsx,js,py,rs,go,java,rb,php}"). Understand the project structure.
49
- 2. **Read**: Read the most important files — entry points, core modules, handlers, services. Prioritize files with recent git changes (\`git diff --name-only HEAD~5\` via Bash if available).
50
- 3. **Analyze**: Look for real, actionable issues across ALL of these categories:
51
-
52
- ### Architecture
53
- - What is the current architecture (monolith, microservices, layered, etc.)?
54
- - Are there architectural violations? (e.g., presentation layer directly accessing data layer, circular dependencies between modules)
55
- - Is there proper separation of concerns?
56
- - Are there god objects or god modules that do too much?
57
-
58
- ### SOLID / OOP Principles
59
- - **SRP**: Classes/modules with multiple unrelated responsibilities
60
- - **OCP**: Code that requires modification instead of extension for new features
61
- - **LSP**: Subtypes that don't properly substitute for their base types
62
- - **ISP**: Interfaces/contracts that force implementations to depend on methods they don't use
63
- - **DIP**: High-level modules directly depending on low-level modules instead of abstractions
64
-
65
- ### Security
66
- - Injection vulnerabilities (SQL, XSS, command), hardcoded secrets/credentials, auth bypasses, insecure crypto, path traversal, SSRF, unsafe deserialization
67
-
68
- ### Bugs & Logic
69
- - Null/undefined errors, race conditions, logic errors, unhandled edge cases, off-by-one errors, resource leaks, incorrect error handling, incorrect algorithms
70
-
71
- ### Performance
72
- - N+1 queries, unnecessary re-renders, missing memoization, blocking I/O in hot paths, unbounded data structures, missing pagination
73
-
74
- ## CRITICAL — Structured Evidence Requirement
75
-
76
- For EACH finding, you MUST provide structured evidence that grounds the finding in actual code. This is required to prevent false positives.
77
-
78
- For each finding, use this reasoning process:
79
-
80
- 1. **PREMISE**: State the observable fact from the code. Quote the exact code you see.
81
- 2. **CONTEXT**: What is the surrounding code doing? Are there guards, fixes, or patterns elsewhere that might handle this?
82
- 3. **COUNTER-CHECK**: Actively look for evidence that CONTRADICTS your finding. Check for:
83
- - Guards or validation earlier in the call chain
84
- - Error handling wrapping the code
85
- - Configuration that changes behavior (e.g., NODE_ENV checks)
86
- - Comments explaining intentional design choices
87
- 4. **CONCLUSION**: Only report the finding if you could not find contradicting evidence.
88
-
89
- ### Common False Positive Patterns to AVOID
90
-
91
- - Claiming a function uses API X when it actually uses API Y (e.g., claiming Math.random() when code uses crypto.randomInt()) — ALWAYS quote the actual function call
92
- - Claiming a header/value is leaked when code already deletes/filters it — READ the full function
93
- - Claiming there's no guard when a condition check exists nearby — READ surrounding lines
94
- - Claiming N fields/methods when the actual count differs — COUNT explicitly
95
- - Claiming a resource leaks when cleanup exists in a different handler — SEARCH for the cleanup code
96
-
97
- ## Rules
98
-
99
- - Only report findings you are >90% confident about after completing the counter-check step.
100
- - Focus on architecture, SOLID violations, bugs, and security over style nits.
101
- - Each finding MUST reference a specific file and line number. Do not report vague or file-level issues.
102
- - Each finding MUST include an "evidence" field with the exact code snippet (1-5 lines) proving the issue exists.
103
- - Limit to the 25 most important findings, ranked by severity.
104
- - Do NOT modify any files. This is a read-only review.
105
- - Be HONEST about the overall quality. A codebase with serious issues should score low.
106
-
107
- ## Scoring Guidelines
108
-
109
- After your analysis, provide an honest overall quality score (0-100) and letter grade:
110
- - **A (90-100)**: Excellent — clean architecture, minimal issues, well-tested, follows best practices
111
- - **B (80-89)**: Good — solid code with minor issues, mostly well-structured
112
- - **C (70-79)**: Adequate — functional but has notable quality issues that should be addressed
113
- - **D (60-69)**: Below average — significant issues in architecture, testing, or code quality
114
- - **F (0-59)**: Poor — serious problems: security vulnerabilities, broken architecture, major bugs, or unmaintainable code
115
-
116
- Consider ALL findings (both CLI tool findings and your own) when determining the score. The score should reflect the overall state of the codebase honestly. A project with 50+ linting errors, formatting issues, complex functions, AND architectural problems should NOT score above 70.
117
-
118
- ## Output
119
-
120
- After your analysis, output EXACTLY one JSON code block with your findings. No other text after the JSON block.
121
-
122
- \`\`\`json
123
- {
124
- "score": 72,
125
- "grade": "C",
126
- "scoreRationale": "Brief explanation of why this score was given, referencing key issues",
127
- "findings": [
128
- {
129
- "severity": "critical|high|medium|low",
130
- "category": "architecture|oop|security|bugs|performance|logic",
131
- "file": "relative/path/to/file.ts",
132
- "line": 42,
133
- "title": "Short title describing the issue",
134
- "description": "What the problem is and why it matters.",
135
- "suggestion": "How to fix it.",
136
- "evidence": "const token = Math.random().toString(36) // exact code from file proving the issue"
137
- }
138
- ],
139
- "summary": "Brief 1-2 sentence summary of overall code quality."
140
- }
141
- \`\`\``;
43
+ const fromSkill = loadSkillPrompt('code-review', { dirPath, cliFindingsSection }, dirPath);
44
+ if (fromSkill) return fromSkill;
45
+
46
+ // Inline fallback when Skill file is not available (e.g., standalone CLI install)
47
+ return `You are a senior staff engineer performing a rigorous code review.\n\nIMPORTANT: Your current working directory is "${dirPath}". Only review files within this directory.\n${cliFindingsSection}\nDiscover source files with Glob, read important files, analyze for architecture, SOLID, security, bugs, and performance issues. Each finding needs file, line, evidence. Output one JSON code block with score, grade, findings array, and summary.`;
142
48
  }
143
49
 
144
50
  // ── Response parsing ──────────────────────────────────────────
@@ -343,7 +249,7 @@ export function buildVerificationPrompt(
343
249
  dirPath: string,
344
250
  findings: CodeReviewFinding[],
345
251
  ): string {
346
- const findingsJson = findings.map((f, i) => ({
252
+ const findingsJson = JSON.stringify(findings.map((f, i) => ({
347
253
  id: i + 1,
348
254
  severity: f.severity,
349
255
  category: f.category,
@@ -352,56 +258,13 @@ export function buildVerificationPrompt(
352
258
  title: f.title,
353
259
  description: f.description,
354
260
  evidence: f.evidence || '(none provided)',
355
- }));
356
-
357
- return `You are an independent code review VERIFIER. A separate reviewer produced the findings below. Your job is to VERIFY each finding against the actual code. You are a skeptic — do NOT trust the original reviewer's claims.
358
-
359
- IMPORTANT: Your current working directory is "${dirPath}". Only read files within this directory.
360
-
361
- ## Findings to Verify
362
-
363
- ${JSON.stringify(findingsJson, null, 2)}
364
-
365
- ## Verification Process
261
+ })), null, 2);
366
262
 
367
- For EACH finding:
263
+ const fromSkill = loadSkillPrompt('verify-review', { dirPath, findingsJson }, dirPath);
264
+ if (fromSkill) return fromSkill;
368
265
 
369
- 1. **Read the cited file and line** using the Read tool. Read at least 20 lines around the cited line for context.
370
- 2. **Check the specific claim** in the description. Does the code actually do what the finding claims?
371
- 3. **Search for counter-evidence**:
372
- - If the finding claims something is missing (no validation, no cleanup, no guard): search for it with Grep
373
- - If the finding claims an API is used: verify the actual API call at that line
374
- - If the finding claims a value is leaked/exposed: check if it's filtered/deleted elsewhere in the same function
375
- 4. **Verdict**: Mark as "confirmed" or "rejected" with a brief explanation
376
-
377
- ## Rules
378
-
379
- - You MUST actually Read each cited file. Do not rely on memory or assumptions.
380
- - Use Grep to search for patterns the finding claims exist (or don't exist).
381
- - A finding is "rejected" if:
382
- - The code does NOT match what the description claims
383
- - There IS a guard/fix that the finding claims is missing
384
- - The line number doesn't contain the relevant code
385
- - The finding is about a different version of the code than what exists now
386
- - A finding is "confirmed" if you can independently verify the issue exists in the current code.
387
- - Be thorough but efficient — focus verification effort on high/critical severity findings.
388
-
389
- ## Output
390
-
391
- Output EXACTLY one JSON code block. No other text after the JSON block.
392
-
393
- \`\`\`json
394
- {
395
- "verifications": [
396
- {
397
- "id": 1,
398
- "verdict": "confirmed|rejected",
399
- "confidence": 0.95,
400
- "note": "Brief explanation of what you found when checking the code"
401
- }
402
- ]
403
- }
404
- \`\`\``;
266
+ // Inline fallback
267
+ return `You are an independent code review VERIFIER. Verify each finding below against actual code in "${dirPath}".\n\n## Findings to Verify\n\n${findingsJson}\n\nFor each finding: Read the cited file, check the claim, search for counter-evidence. Output one JSON code block with verifications array containing id, verdict (confirmed|rejected), confidence, and note.`;
405
268
  }
406
269
 
407
270
  interface VerificationVerdict {