ruvnet-kb-first 6.0.0 → 6.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/SKILL.md +44 -6
  2. package/package.json +1 -1
  3. package/src/mcp-server.js +1184 -460
package/src/mcp-server.js CHANGED
@@ -1,670 +1,1393 @@
1
1
  /**
2
- * RuvNet KB-First MCP Server - Score-Driven Architecture
3
- * Version 6.0.0
2
+ * RuvNet KB-First MCP Server - Granular Score-Driven Architecture
3
+ * Version 6.2.0
4
4
  *
5
- * PHILOSOPHY: Scoring IS the enforcement mechanism.
6
- * - Every operation requires baseline score first
7
- * - Every operation shows delta (before/after)
8
- * - Hard gates BLOCK on negative deltas
9
- * - No shortcuts - rigorous measurement drives quality
5
+ * PHILOSOPHY: Granular scoring drives discipline.
6
+ * - Score each KB dimension 1-100 (completeness, depth, comprehensiveness, accuracy, freshness, attribution, ux_quality)
7
+ * - Score each phase readiness 1-100 (including Phase 12: UX Quality Review)
8
+ * - Generate enhancement plan based on gaps
9
+ * - User confirms before execution
10
+ * - Post-verify: did we hit predicted scores?
11
+ * - Playwright UX Review: Visual quality audit from end-user perspective
10
12
  *
11
- * 4 Tools (not 7):
12
- * 1. kb_first_assess - Calculate baseline scores (KB + App + Process)
13
- * 2. kb_first_phase - Execute phase work with delta tracking
14
- * 3. kb_first_delta - Explicit before/after comparison
15
- * 4. kb_first_gate - Hard gate that blocks on negative delta
13
+ * 6 Tools:
14
+ * 1. kb_first_assess - Score ALL dimensions (KB quality + phase readiness)
15
+ * 2. kb_first_plan - Generate enhancement plan with predicted improvements
16
+ * 3. kb_first_confirm - User confirms readiness, locks in plan
17
+ * 4. kb_first_execute - Execute plan phase by phase
18
+ * 5. kb_first_verify - Post-verification: predicted vs actual, identify gaps
19
+ * 6. kb_first_ux_review - Playwright-based UX quality audit with screenshots
16
20
  *
17
21
  * Usage:
18
22
  * npx ruvnet-kb-first mcp
19
- * node src/mcp-server.js
20
23
  */
21
24
 
22
25
  import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync, statSync } from 'fs';
23
26
  import { join } from 'path';
24
27
  import { globSync } from 'glob';
25
28
 
26
- // MCP Protocol Constants
27
29
  const MCP_VERSION = '0.1.0';
28
30
  const SERVER_NAME = 'ruvnet-kb-first';
29
- const SERVER_VERSION = '6.0.0';
31
+ const SERVER_VERSION = '6.2.0';
30
32
 
31
33
  /**
32
- * Score Categories (total 100 points)
33
- * These are the ONLY metrics that matter
34
+ * KB Quality Dimensions (each scored 1-100)
34
35
  */
35
- const SCORE_WEIGHTS = {
36
- kb: {
37
- weight: 40,
38
- components: {
39
- entries: 10, // KB has content
40
- coverage: 10, // Domain coverage completeness
41
- embeddings: 10, // Vectors generated
42
- freshness: 10 // Recent updates
43
- }
36
+ const KB_DIMENSIONS = {
37
+ completeness: {
38
+ name: 'Completeness',
39
+ description: 'Does the KB cover all necessary domain topics?',
40
+ weight: 20
44
41
  },
45
- app: {
46
- weight: 40,
47
- components: {
48
- kbCitations: 15, // Code files cite KB sources
49
- gapResolution: 10, // Gaps identified and resolved
50
- testCoverage: 10, // Tests exist and pass
51
- security: 5 // Security basics in place
52
- }
42
+ depth: {
43
+ name: 'Depth',
44
+ description: 'Is each topic covered with sufficient detail?',
45
+ weight: 20
53
46
  },
54
- process: {
55
- weight: 20,
56
- components: {
57
- phaseCompletion: 10, // Phases properly completed
58
- gatesPassed: 5, // Hard gates verified
59
- documentation: 5 // Docs exist
60
- }
47
+ comprehensiveness: {
48
+ name: 'Comprehensiveness',
49
+ description: 'Are edge cases, exceptions, and nuances included?',
50
+ weight: 20
51
+ },
52
+ accuracy: {
53
+ name: 'Accuracy',
54
+ description: 'Is the information correct and up-to-date?',
55
+ weight: 20
56
+ },
57
+ freshness: {
58
+ name: 'Freshness',
59
+ description: 'How recently was the KB updated?',
60
+ weight: 10
61
+ },
62
+ attribution: {
63
+ name: 'Attribution',
64
+ description: 'Are sources and experts properly cited?',
65
+ weight: 10
66
+ },
67
+ ux_quality: {
68
+ name: 'UX Quality',
69
+ description: 'Is the user experience excellent? Visual design, emotional appeal, user flow, versioning display, loading states, error handling, accessibility.',
70
+ weight: 15
61
71
  }
62
72
  };
63
73
 
64
74
  /**
65
- * Phase definitions
75
+ * Phase Definitions with readiness criteria
66
76
  */
67
77
  const PHASES = {
68
- 0: { name: 'Assessment', gate: 'assessment_documented' },
69
- 1: { name: 'KB Design', gate: 'schema_designed' },
70
- 1.5: { name: 'Hooks Setup', gate: 'hooks_verified' },
71
- 2: { name: 'Schema Definition', gate: 'schema_created' },
72
- 3: { name: 'KB Population', gate: 'kb_score_50' },
73
- 4: { name: 'Scoring & Gaps', gate: 'kb_score_80' },
74
- 5: { name: 'Integration', gate: 'integration_tested' },
75
- 6: { name: 'Testing', gate: 'tests_passing' },
76
- 7: { name: 'Optimization', gate: 'performance_met' },
77
- 7.5: { name: 'Testing Gate', gate: 'coverage_80' },
78
- 8: { name: 'Verification', gate: 'all_checks_pass' },
79
- 9: { name: 'Security', gate: 'security_audit_passed' },
80
- 10: { name: 'Documentation', gate: 'docs_complete' },
81
- 11: { name: 'Deployment', gate: 'deployed' },
82
- 11.5: { name: 'Observability', gate: 'monitoring_active' },
83
- 12: { name: 'KB Operations', gate: 'operations_ready' }
78
+ 0: {
79
+ name: 'Assessment',
80
+ criteria: ['Project scope documented', 'Domain complexity identified', 'KB-First suitability confirmed', 'Resources estimated']
81
+ },
82
+ 1: {
83
+ name: 'KB Design',
84
+ criteria: ['Domain concepts mapped', 'Taxonomy designed', 'Relationships defined', 'Query patterns planned']
85
+ },
86
+ 1.5: {
87
+ name: 'Hooks Setup',
88
+ criteria: ['Hooks installed', 'Configuration complete', 'Patterns trained', 'Verification passing']
89
+ },
90
+ 2: {
91
+ name: 'Schema Definition',
92
+ criteria: ['Tables created', 'Vector columns added', 'Indexes designed', 'Migrations written']
93
+ },
94
+ 3: {
95
+ name: 'KB Population',
96
+ criteria: ['Content collected', 'Data cleaned', 'Embeddings generated', 'Import validated']
97
+ },
98
+ 4: {
99
+ name: 'Scoring & Gaps',
100
+ criteria: ['Coverage analyzed', 'Quality scored', 'Gaps identified', 'Remediation planned']
101
+ },
102
+ 5: {
103
+ name: 'Integration',
104
+ criteria: ['Search API built', 'Code generation working', 'Citation system active', 'Gap logging enabled']
105
+ },
106
+ 6: {
107
+ name: 'Testing',
108
+ criteria: ['Unit tests written', 'Integration tests passing', 'Accuracy validated', 'Edge cases covered']
109
+ },
110
+ 7: {
111
+ name: 'Optimization',
112
+ criteria: ['Queries optimized', 'Indexes tuned', 'Caching implemented', 'Benchmarks passing']
113
+ },
114
+ 8: {
115
+ name: 'Verification',
116
+ criteria: ['Code scan clean', 'Imports verified', 'Sources return', 'Startup working', 'Fallbacks tested', 'Attribution valid', 'Confidence scores present', 'Gap logging active']
117
+ },
118
+ 9: {
119
+ name: 'Security',
120
+ criteria: ['Dependencies audited', 'OWASP checked', 'SQL injection tested', 'Auth reviewed', 'Secrets secured', 'APIs protected']
121
+ },
122
+ 10: {
123
+ name: 'Documentation',
124
+ criteria: ['README complete', 'API documented', 'Schema documented', 'Architecture documented', 'Operator guide written']
125
+ },
126
+ 11: {
127
+ name: 'Deployment',
128
+ criteria: ['Infrastructure ready', 'Environments configured', 'CI/CD built', 'Migrations run', 'Monitoring active', 'Go-live complete']
129
+ },
130
+ 12: {
131
+ name: 'UX Quality Review',
132
+ criteria: [
133
+ 'Version displayed in header/footer (major.minor.patch)',
134
+ 'Cache-busting implemented with version notifications',
135
+ 'Visual design excellence (not just functional)',
136
+ 'Emotional appeal and user psychology considered',
137
+ 'Loading states are elegant',
138
+ 'Error messages are helpful and actionable',
139
+ 'User flow is intuitive and compelling',
140
+ 'Playwright screenshots reviewed and critiqued',
141
+ 'Accessibility verified (WCAG 2.1 AA)',
142
+ 'Mobile responsiveness tested'
143
+ ]
144
+ }
84
145
  };
85
146
 
86
147
  /**
87
- * MCP Tools - Score-Driven Architecture
148
+ * MCP Tools
88
149
  */
89
150
  const TOOLS = [
90
151
  {
91
152
  name: 'kb_first_assess',
92
- description: `Calculate comprehensive baseline scores for KB, App, and Process.
93
- ALWAYS RUN THIS FIRST before any work. Returns:
94
- - KB Score (40 points): entries, coverage, embeddings, freshness
95
- - App Score (40 points): citations, gap resolution, tests, security
96
- - Process Score (20 points): phases, gates, documentation
97
- - Total (100 points)
98
-
99
- This becomes your BASELINE for delta comparison.`,
153
+ description: `Score ALL dimensions of KB quality and phase readiness (each 1-100).
154
+
155
+ KB Quality Dimensions:
156
+ - Completeness: Does KB cover all domain topics?
157
+ - Depth: Is each topic detailed enough?
158
+ - Comprehensiveness: Are edge cases included?
159
+ - Accuracy: Is information correct?
160
+ - Freshness: How recently updated?
161
+ - Attribution: Are sources cited?
162
+
163
+ Phase Readiness:
164
+ - Each of 12 phases scored 1-100 based on criteria completion
165
+
166
+ Returns granular scores that reveal exactly where gaps exist.
167
+ This is your BASELINE for planning.`,
100
168
  inputSchema: {
101
169
  type: 'object',
102
170
  properties: {
103
- detailed: { type: 'boolean', description: 'Show component breakdown', default: true },
104
- saveBaseline: { type: 'boolean', description: 'Save as baseline for delta comparison', default: true }
171
+ projectPath: { type: 'string', description: 'Path to project (default: current directory)' }
105
172
  }
106
173
  }
107
174
  },
108
175
  {
109
- name: 'kb_first_phase',
110
- description: `Execute a phase with automatic delta tracking.
111
- REQUIRES: kb_first_assess must be run first to establish baseline.
176
+ name: 'kb_first_plan',
177
+ description: `Generate enhancement plan based on assessment scores.
112
178
 
113
- Workflow:
114
- 1. Loads baseline score from last kb_first_assess
115
- 2. Shows phase requirements and sub-phases
116
- 3. Returns guidance for completing the phase
117
- 4. REMINDS you to run kb_first_delta when done
179
+ Analyzes gaps (scores below threshold) and creates:
180
+ - Prioritized list of enhancements
181
+ - Predicted score improvements for each
182
+ - Estimated effort
183
+ - Execution order
118
184
 
119
- Will WARN if baseline is stale (>1 hour old).`,
185
+ The plan gives you a concrete game plan so you don't lose the thread.
186
+ Returns the plan for user review before execution.`,
120
187
  inputSchema: {
121
188
  type: 'object',
122
189
  properties: {
123
- phase: { type: 'number', description: 'Phase number (0-12, including 1.5, 7.5, 11.5)' }
190
+ threshold: { type: 'number', description: 'Minimum acceptable score (default: 80)', default: 80 },
191
+ focusArea: { type: 'string', enum: ['kb', 'phases', 'all'], description: 'What to focus on', default: 'all' }
192
+ }
193
+ }
194
+ },
195
+ {
196
+ name: 'kb_first_confirm',
197
+ description: `User confirms readiness to execute enhancement plan.
198
+
199
+ Shows the plan summary and asks for confirmation.
200
+ Once confirmed, the plan is locked and execution can begin.
201
+
202
+ This ensures user consent before making changes.`,
203
+ inputSchema: {
204
+ type: 'object',
205
+ properties: {
206
+ confirmed: { type: 'boolean', description: 'User confirms readiness to proceed' }
124
207
  },
125
- required: ['phase']
208
+ required: ['confirmed']
126
209
  }
127
210
  },
128
211
  {
129
- name: 'kb_first_delta',
130
- description: `Compare current scores against baseline. THE ENFORCEMENT MECHANISM.
131
- Shows:
132
- - Baseline score (from kb_first_assess)
133
- - Current score (calculated now)
134
- - Delta (+ improvement or - regression)
135
- - VERDICT: PASS (positive delta) or FAIL (negative delta)
136
-
137
- If delta is negative, you CANNOT proceed to next phase.
138
- This prevents shortcuts and enforces rigor.`,
212
+ name: 'kb_first_execute',
213
+ description: `Execute the confirmed enhancement plan.
214
+
215
+ Works through the plan systematically:
216
+ - Shows current task
217
+ - Provides guidance for completion
218
+ - Tracks progress
219
+ - Updates predicted scores
220
+
221
+ Call repeatedly to work through each enhancement.`,
139
222
  inputSchema: {
140
223
  type: 'object',
141
224
  properties: {
142
- showBreakdown: { type: 'boolean', description: 'Show which components changed', default: true }
225
+ taskComplete: { type: 'boolean', description: 'Mark current task as complete' }
143
226
  }
144
227
  }
145
228
  },
146
229
  {
147
- name: 'kb_first_gate',
148
- description: `Hard gate check for phase transition.
149
- BLOCKS progress if:
150
- - Delta is negative (score dropped)
151
- - Required gate condition not met
152
- - Baseline not established
153
-
154
- Returns:
155
- - canProceed: boolean
156
- - blockReason: string (if blocked)
157
- - nextPhase: number (if can proceed)
158
-
159
- THIS IS THE HARD GATE. No bypassing.`,
230
+ name: 'kb_first_verify',
231
+ description: `Post-verification: Compare predicted vs actual scores.
232
+
233
+ Re-scores everything and compares to predictions:
234
+ - Which improvements were achieved?
235
+ - Which fell short?
236
+ - What gaps remain?
237
+ - What's the next priority?
238
+
239
+ This closes the loop and ensures you delivered what you promised.`,
240
+ inputSchema: {
241
+ type: 'object',
242
+ properties: {
243
+ detailed: { type: 'boolean', description: 'Show detailed comparison', default: true }
244
+ }
245
+ }
246
+ },
247
+ {
248
+ name: 'kb_first_ux_review',
249
+ description: `Playwright-based UX Quality Audit - End-user perspective review.
250
+
251
+ Walks through the application as an end user, capturing screenshots.
252
+ Then critically reviews EACH screenshot:
253
+
254
+ For each screen:
255
+ - How good is this? (1-100 score)
256
+ - How could we make it better? (specific recommendations)
257
+ - Where is it falling down? (issues identified)
258
+ - What would EXCELLENT look like? (vision for improvement)
259
+ - Recommendations with priority
260
+
261
+ Also checks critical UX requirements:
262
+ - Version number displayed in header/footer (major.minor.patch)
263
+ - Cache-busting with version change notifications
264
+ - Loading states are elegant, not jarring
265
+ - Error messages are helpful and guide user to resolution
266
+ - Visual design creates emotional appeal, not just functional
267
+ - User psychology leveraged for compelling flow
268
+
269
+ This ensures the application isn't just functional—it's EXCELLENT.`,
160
270
  inputSchema: {
161
271
  type: 'object',
162
272
  properties: {
163
- phase: { type: 'number', description: 'Phase to verify gate for' }
273
+ appUrl: { type: 'string', description: 'URL of the running application to review' },
274
+ flows: {
275
+ type: 'array',
276
+ items: { type: 'string' },
277
+ description: 'User flows to test (e.g., ["login", "search", "checkout"])',
278
+ default: ['homepage', 'main_flow']
279
+ },
280
+ screenshotDir: {
281
+ type: 'string',
282
+ description: 'Directory to save screenshots',
283
+ default: '.ruvector/ux-review'
284
+ },
285
+ criticalReview: {
286
+ type: 'boolean',
287
+ description: 'Perform deep critical review of each screenshot',
288
+ default: true
289
+ }
164
290
  },
165
- required: ['phase']
291
+ required: ['appUrl']
166
292
  }
167
293
  }
168
294
  ];
169
295
 
170
296
  /**
171
- * Calculate all scores
297
+ * Score KB Quality Dimensions (1-100 each)
172
298
  */
173
- function calculateScores(cwd) {
174
- const scores = {
175
- kb: { total: 0, max: 40, components: {} },
176
- app: { total: 0, max: 40, components: {} },
177
- process: { total: 0, max: 20, components: {} },
178
- total: 0,
179
- max: 100,
180
- grade: 'F',
181
- timestamp: new Date().toISOString()
182
- };
183
-
184
- // ===== KB SCORE (40 points) =====
185
- const ruvectorDir = join(cwd, '.ruvector');
299
+ function scoreKBDimensions(cwd) {
300
+ const scores = {};
186
301
  const kbDir = join(cwd, 'src', 'kb');
302
+ const docsDir = join(cwd, 'docs');
303
+ const ruvectorDir = join(cwd, '.ruvector');
187
304
 
188
- // KB Entries (10 points)
305
+ // Count KB entries and docs
189
306
  let kbEntries = 0;
307
+ let docFiles = 0;
308
+ let totalContent = 0;
309
+
190
310
  if (existsSync(kbDir)) {
191
311
  try {
192
312
  const files = readdirSync(kbDir);
193
313
  kbEntries = files.length;
314
+ for (const f of files) {
315
+ try {
316
+ const content = readFileSync(join(kbDir, f), 'utf-8');
317
+ totalContent += content.length;
318
+ } catch {}
319
+ }
194
320
  } catch {}
195
321
  }
196
- scores.kb.components.entries = Math.min(10, Math.floor(kbEntries / 5) * 2);
197
322
 
198
- // KB Coverage (10 points) - based on documented domains
199
- const docsDir = join(cwd, 'docs');
200
- let domainDocs = 0;
201
323
  if (existsSync(docsDir)) {
202
324
  try {
203
- const files = readdirSync(docsDir);
204
- domainDocs = files.filter(f => f.endsWith('.md')).length;
325
+ docFiles = readdirSync(docsDir).filter(f => f.endsWith('.md')).length;
205
326
  } catch {}
206
327
  }
207
- scores.kb.components.coverage = Math.min(10, domainDocs * 2);
208
328
 
209
- // KB Embeddings (10 points) - check for vector files or config
210
- const configPath = join(ruvectorDir, 'config.json');
211
- let hasEmbeddings = false;
212
- if (existsSync(configPath)) {
329
+ // Completeness: Based on number of KB entries and docs
330
+ // 0 entries = 0, 5 entries = 50, 10+ entries = 100
331
+ scores.completeness = {
332
+ score: Math.min(100, Math.max(0, kbEntries * 10 + docFiles * 10)),
333
+ reason: `${kbEntries} KB entries, ${docFiles} doc files`,
334
+ improvement: kbEntries < 10 ? `Add ${10 - kbEntries} more KB entries` : 'Adequate coverage'
335
+ };
336
+
337
+ // Depth: Based on average content length
338
+ // < 500 chars avg = shallow, > 2000 = deep
339
+ const avgLength = kbEntries > 0 ? totalContent / kbEntries : 0;
340
+ scores.depth = {
341
+ score: Math.min(100, Math.max(0, Math.round(avgLength / 20))),
342
+ reason: `Average entry length: ${Math.round(avgLength)} chars`,
343
+ improvement: avgLength < 2000 ? 'Add more detail to KB entries' : 'Good depth'
344
+ };
345
+
346
+ // Comprehensiveness: Check for edge case documentation
347
+ let edgeCaseScore = 0;
348
+ const srcDir = join(cwd, 'src');
349
+ if (existsSync(srcDir)) {
213
350
  try {
214
- const config = JSON.parse(readFileSync(configPath, 'utf-8'));
215
- hasEmbeddings = config.kbFirst?.embeddings === true || kbEntries > 0;
351
+ const files = globSync('**/*.{ts,tsx,js,jsx,py}', { cwd: srcDir });
352
+ for (const f of files) {
353
+ try {
354
+ const content = readFileSync(join(srcDir, f), 'utf-8');
355
+ if (content.includes('edge case') || content.includes('exception') || content.includes('fallback')) {
356
+ edgeCaseScore += 10;
357
+ }
358
+ } catch {}
359
+ }
216
360
  } catch {}
217
361
  }
218
- scores.kb.components.embeddings = hasEmbeddings ? 10 : 0;
362
+ scores.comprehensiveness = {
363
+ score: Math.min(100, edgeCaseScore + (kbEntries * 5)),
364
+ reason: `Edge case handling detected in ${Math.floor(edgeCaseScore / 10)} files`,
365
+ improvement: edgeCaseScore < 50 ? 'Document edge cases and exceptions' : 'Good coverage'
366
+ };
367
+
368
+ // Accuracy: Based on presence of verification/testing
369
+ let accuracyScore = 50; // Base score
370
+ if (existsSync(join(cwd, 'tests')) || existsSync(join(cwd, '__tests__'))) accuracyScore += 25;
371
+ if (existsSync(join(cwd, '.ruvector', 'config.json'))) accuracyScore += 15;
372
+ if (existsSync(join(cwd, 'CHANGELOG.md'))) accuracyScore += 10;
373
+ scores.accuracy = {
374
+ score: Math.min(100, accuracyScore),
375
+ reason: accuracyScore > 75 ? 'Tests and verification present' : 'Limited verification',
376
+ improvement: accuracyScore < 80 ? 'Add tests and validation' : 'Good accuracy controls'
377
+ };
219
378
 
220
- // KB Freshness (10 points) - recent updates
221
- let freshness = 0;
379
+ // Freshness: Based on last modification
380
+ let freshnessScore = 0;
222
381
  if (existsSync(ruvectorDir)) {
223
382
  try {
224
383
  const stat = statSync(ruvectorDir);
225
- const daysSinceUpdate = (Date.now() - stat.mtime.getTime()) / (1000 * 60 * 60 * 24);
226
- if (daysSinceUpdate < 1) freshness = 10;
227
- else if (daysSinceUpdate < 7) freshness = 7;
228
- else if (daysSinceUpdate < 30) freshness = 4;
229
- else freshness = 0;
384
+ const daysSince = (Date.now() - stat.mtime.getTime()) / (1000 * 60 * 60 * 24);
385
+ if (daysSince < 1) freshnessScore = 100;
386
+ else if (daysSince < 7) freshnessScore = 80;
387
+ else if (daysSince < 30) freshnessScore = 50;
388
+ else if (daysSince < 90) freshnessScore = 25;
389
+ else freshnessScore = 10;
230
390
  } catch {}
231
391
  }
232
- scores.kb.components.freshness = freshness;
233
-
234
- scores.kb.total = Object.values(scores.kb.components).reduce((a, b) => a + b, 0);
235
-
236
- // ===== APP SCORE (40 points) =====
237
- const srcDir = join(cwd, 'src');
392
+ scores.freshness = {
393
+ score: freshnessScore,
394
+ reason: freshnessScore > 50 ? 'Recently updated' : 'Stale - needs refresh',
395
+ improvement: freshnessScore < 80 ? 'Update KB content' : 'Fresh'
396
+ };
238
397
 
239
- // KB Citations (15 points)
240
- let codeFiles = [];
241
- let filesWithCitation = 0;
398
+ // Attribution: Check for citations in code
399
+ let attributionScore = 0;
242
400
  if (existsSync(srcDir)) {
243
401
  try {
244
- codeFiles = globSync('**/*.{ts,tsx,js,jsx,py,go,rs}', { cwd: srcDir });
245
- for (const file of codeFiles) {
402
+ const files = globSync('**/*.{ts,tsx,js,jsx,py}', { cwd: srcDir });
403
+ let filesWithCitation = 0;
404
+ for (const f of files) {
246
405
  try {
247
- const content = readFileSync(join(srcDir, file), 'utf-8');
406
+ const content = readFileSync(join(srcDir, f), 'utf-8');
248
407
  if (content.includes('KB-Generated:') || content.includes('Sources:') || content.includes('@kb-source')) {
249
408
  filesWithCitation++;
250
409
  }
251
410
  } catch {}
252
411
  }
412
+ attributionScore = files.length > 0 ? Math.round((filesWithCitation / files.length) * 100) : 100;
253
413
  } catch {}
414
+ } else {
415
+ attributionScore = 100; // No code = not applicable
254
416
  }
255
- const citationPercent = codeFiles.length > 0 ? filesWithCitation / codeFiles.length : 1;
256
- scores.app.components.kbCitations = Math.round(citationPercent * 15);
417
+ scores.attribution = {
418
+ score: attributionScore,
419
+ reason: `${attributionScore}% of code files have KB citations`,
420
+ improvement: attributionScore < 80 ? 'Add KB citations to code files' : 'Good attribution'
421
+ };
422
+
423
+ // UX Quality: Based on presence of UX-related artifacts
424
+ let uxScore = 0;
425
+ const packagePath = join(cwd, 'package.json');
426
+ const uxReviewPath = join(cwd, '.ruvector', 'ux-review');
257
427
 
258
- // Gap Resolution (10 points)
259
- const gapsPath = join(ruvectorDir, 'gaps.jsonl');
260
- let gapCount = 0;
261
- if (existsSync(gapsPath)) {
428
+ // Check for version in package.json
429
+ if (existsSync(packagePath)) {
262
430
  try {
263
- const content = readFileSync(gapsPath, 'utf-8').trim();
264
- gapCount = content ? content.split('\n').length : 0;
431
+ const pkg = JSON.parse(readFileSync(packagePath, 'utf-8'));
432
+ if (pkg.version) uxScore += 15; // Versioning exists
265
433
  } catch {}
266
434
  }
267
- scores.app.components.gapResolution = Math.max(0, 10 - gapCount);
268
435
 
269
- // Test Coverage (10 points)
270
- let hasTests = false;
271
- const testDirs = ['tests', 'test', '__tests__', 'src/__tests__'];
272
- for (const td of testDirs) {
273
- if (existsSync(join(cwd, td))) {
274
- hasTests = true;
275
- break;
276
- }
436
+ // Check for prior UX review
437
+ if (existsSync(uxReviewPath)) {
438
+ try {
439
+ const reviewFiles = readdirSync(uxReviewPath);
440
+ if (reviewFiles.length > 0) uxScore += 25; // UX review conducted
441
+ if (reviewFiles.some(f => f.includes('review-report'))) uxScore += 20; // Detailed review exists
442
+ } catch {}
277
443
  }
278
- const testFiles = existsSync(srcDir) ? globSync('**/*.{test,spec}.{ts,tsx,js,jsx}', { cwd: srcDir }) : [];
279
- scores.app.components.testCoverage = hasTests ? 5 : 0;
280
- scores.app.components.testCoverage += Math.min(5, testFiles.length);
281
444
 
282
- // Security (5 points)
283
- let secScore = 5;
284
- const gitignorePath = join(cwd, '.gitignore');
285
- if (existsSync(gitignorePath)) {
445
+ // Check for accessibility considerations
446
+ if (existsSync(srcDir)) {
286
447
  try {
287
- const content = readFileSync(gitignorePath, 'utf-8');
288
- if (!content.includes('.env')) secScore -= 2;
289
- if (!content.includes('node_modules')) secScore -= 1;
448
+ const files = globSync('**/*.{tsx,jsx,html}', { cwd: srcDir });
449
+ let a11yScore = 0;
450
+ for (const f of files) {
451
+ try {
452
+ const content = readFileSync(join(srcDir, f), 'utf-8');
453
+ if (content.includes('aria-') || content.includes('role=')) a11yScore += 5;
454
+ if (content.includes('alt=') || content.includes('loading=')) a11yScore += 3;
455
+ } catch {}
456
+ }
457
+ uxScore += Math.min(40, a11yScore);
290
458
  } catch {}
291
- } else {
292
- secScore -= 3;
293
459
  }
294
- scores.app.components.security = Math.max(0, secScore);
295
460
 
296
- scores.app.total = Object.values(scores.app.components).reduce((a, b) => a + b, 0);
461
+ scores.ux_quality = {
462
+ score: Math.min(100, uxScore),
463
+ reason: uxScore >= 60 ? 'UX review conducted, accessibility present' : 'Limited UX review artifacts',
464
+ improvement: uxScore < 80 ? 'Run kb_first_ux_review for Playwright-based UX audit' : 'Good UX coverage'
465
+ };
297
466
 
298
- // ===== PROCESS SCORE (20 points) =====
467
+ return scores;
468
+ }
299
469
 
300
- // Phase Completion (10 points)
301
- let completedPhases = [];
302
- if (existsSync(configPath)) {
303
- try {
304
- const config = JSON.parse(readFileSync(configPath, 'utf-8'));
305
- completedPhases = config.phases?.completed || [];
306
- } catch {}
307
- }
308
- const totalPhases = Object.keys(PHASES).length;
309
- scores.process.components.phaseCompletion = Math.round((completedPhases.length / totalPhases) * 10);
470
+ /**
471
+ * Score Phase Readiness (1-100 each)
472
+ */
473
+ function scorePhaseReadiness(cwd) {
474
+ const scores = {};
475
+ const configPath = join(cwd, '.ruvector', 'config.json');
476
+ let config = { phases: { completed: [], gates: {} } };
310
477
 
311
- // Gates Passed (5 points)
312
- let gatesPassed = 0;
313
478
  if (existsSync(configPath)) {
314
479
  try {
315
- const config = JSON.parse(readFileSync(configPath, 'utf-8'));
316
- gatesPassed = Object.values(config.phases?.gates || {}).filter(v => v === true).length;
480
+ config = JSON.parse(readFileSync(configPath, 'utf-8'));
317
481
  } catch {}
318
482
  }
319
- scores.process.components.gatesPassed = Math.min(5, Math.round((gatesPassed / totalPhases) * 5));
320
483
 
321
- // Documentation (5 points)
322
- let docScore = 0;
323
- if (existsSync(join(cwd, 'README.md'))) docScore += 2;
324
- if (existsSync(join(cwd, 'docs', 'api.md')) || existsSync(join(cwd, 'docs', 'API.md'))) docScore += 1;
325
- if (existsSync(join(cwd, 'docs', 'architecture.md'))) docScore += 1;
326
- if (existsSync(join(cwd, 'CHANGELOG.md'))) docScore += 1;
327
- scores.process.components.documentation = Math.min(5, docScore);
484
+ const completed = config.phases?.completed || [];
485
+ const gates = config.phases?.gates || {};
328
486
 
329
- scores.process.total = Object.values(scores.process.components).reduce((a, b) => a + b, 0);
487
+ for (const [phaseNum, phaseInfo] of Object.entries(PHASES)) {
488
+ const num = parseFloat(phaseNum);
489
+ const isCompleted = completed.includes(num);
490
+ const criteriaCount = phaseInfo.criteria.length;
330
491
 
331
- // ===== TOTAL =====
332
- scores.total = scores.kb.total + scores.app.total + scores.process.total;
492
+ // Check which criteria are met
493
+ let metCriteria = 0;
494
+ const unmetCriteria = [];
333
495
 
334
- // Grade
335
- if (scores.total >= 98) scores.grade = 'A+';
336
- else if (scores.total >= 93) scores.grade = 'A';
337
- else if (scores.total >= 90) scores.grade = 'A-';
338
- else if (scores.total >= 87) scores.grade = 'B+';
339
- else if (scores.total >= 83) scores.grade = 'B';
340
- else if (scores.total >= 80) scores.grade = 'B-';
341
- else if (scores.total >= 70) scores.grade = 'C';
342
- else if (scores.total >= 60) scores.grade = 'D';
343
- else scores.grade = 'F';
496
+ for (const criterion of phaseInfo.criteria) {
497
+ // Simplified check - in real implementation, this would be more sophisticated
498
+ if (isCompleted || checkCriterion(cwd, num, criterion)) {
499
+ metCriteria++;
500
+ } else {
501
+ unmetCriteria.push(criterion);
502
+ }
503
+ }
504
+
505
+ const score = Math.round((metCriteria / criteriaCount) * 100);
506
+
507
+ scores[phaseNum] = {
508
+ name: phaseInfo.name,
509
+ score,
510
+ metCriteria,
511
+ totalCriteria: criteriaCount,
512
+ unmet: unmetCriteria,
513
+ completed: isCompleted
514
+ };
515
+ }
344
516
 
345
517
  return scores;
346
518
  }
347
519
 
348
520
  /**
349
- * Tool Handlers
521
+ * Check if a criterion is met (simplified)
350
522
  */
351
- async function handleKbFirstAssess(cwd, args) {
352
- const scores = calculateScores(cwd);
353
-
354
- // Save baseline
355
- if (args.saveBaseline !== false) {
356
- const ruvectorDir = join(cwd, '.ruvector');
357
- if (!existsSync(ruvectorDir)) {
358
- mkdirSync(ruvectorDir, { recursive: true });
523
+ function checkCriterion(cwd, phase, criterion) {
524
+ // Check for common indicators
525
+ const criterionLower = criterion.toLowerCase();
526
+
527
+ if (criterionLower.includes('documented')) {
528
+ return existsSync(join(cwd, 'docs')) || existsSync(join(cwd, 'README.md'));
529
+ }
530
+ if (criterionLower.includes('tests')) {
531
+ return existsSync(join(cwd, 'tests')) || existsSync(join(cwd, '__tests__'));
532
+ }
533
+ if (criterionLower.includes('config')) {
534
+ return existsSync(join(cwd, '.ruvector', 'config.json'));
535
+ }
536
+ if (criterionLower.includes('hooks')) {
537
+ return existsSync(join(cwd, '.ruvector', 'hooks'));
538
+ }
539
+ if (criterionLower.includes('schema') || criterionLower.includes('tables')) {
540
+ return existsSync(join(cwd, 'templates', 'schema.sql'));
541
+ }
542
+
543
+ return false;
544
+ }
545
+
546
+ /**
547
+ * Calculate overall weighted scores
548
+ */
549
+ function calculateOverallScores(kbScores, phaseScores) {
550
+ // KB Overall (weighted average)
551
+ let kbTotal = 0;
552
+ let kbWeightTotal = 0;
553
+ for (const [dim, info] of Object.entries(KB_DIMENSIONS)) {
554
+ if (kbScores[dim]) {
555
+ kbTotal += kbScores[dim].score * info.weight;
556
+ kbWeightTotal += info.weight;
359
557
  }
360
- writeFileSync(join(ruvectorDir, 'baseline.json'), JSON.stringify(scores, null, 2));
361
558
  }
559
+ const kbOverall = kbWeightTotal > 0 ? Math.round(kbTotal / kbWeightTotal) : 0;
362
560
 
363
- const result = {
364
- action: 'BASELINE_ESTABLISHED',
365
- timestamp: scores.timestamp,
366
- total: scores.total,
367
- max: scores.max,
368
- grade: scores.grade,
369
- summary: {
370
- kb: `${scores.kb.total}/${scores.kb.max}`,
371
- app: `${scores.app.total}/${scores.app.max}`,
372
- process: `${scores.process.total}/${scores.process.max}`
373
- },
374
- nextStep: 'Run kb_first_phase to begin work, then kb_first_delta to measure improvement'
561
+ // Phase Overall (average)
562
+ const phaseValues = Object.values(phaseScores);
563
+ const phaseOverall = phaseValues.length > 0
564
+ ? Math.round(phaseValues.reduce((sum, p) => sum + p.score, 0) / phaseValues.length)
565
+ : 0;
566
+
567
+ // Combined Overall
568
+ const overall = Math.round((kbOverall * 0.5) + (phaseOverall * 0.5));
569
+
570
+ return { kbOverall, phaseOverall, overall };
571
+ }
572
+
573
+ /**
574
+ * Tool Handlers
575
+ */
576
+ async function handleAssess(cwd, args) {
577
+ const kbScores = scoreKBDimensions(cwd);
578
+ const phaseScores = scorePhaseReadiness(cwd);
579
+ const overall = calculateOverallScores(kbScores, phaseScores);
580
+
581
+ // Save assessment
582
+ const ruvectorDir = join(cwd, '.ruvector');
583
+ if (!existsSync(ruvectorDir)) {
584
+ mkdirSync(ruvectorDir, { recursive: true });
585
+ }
586
+
587
+ const assessment = {
588
+ timestamp: new Date().toISOString(),
589
+ kb: kbScores,
590
+ phases: phaseScores,
591
+ overall
375
592
  };
376
593
 
377
- if (args.detailed !== false) {
378
- result.breakdown = {
379
- kb: scores.kb.components,
380
- app: scores.app.components,
381
- process: scores.process.components
594
+ writeFileSync(join(ruvectorDir, 'assessment.json'), JSON.stringify(assessment, null, 2));
595
+
596
+ // Format for display
597
+ const kbSummary = {};
598
+ for (const [dim, data] of Object.entries(kbScores)) {
599
+ kbSummary[dim] = {
600
+ score: data.score,
601
+ reason: data.reason
382
602
  };
383
603
  }
384
604
 
385
- return result;
605
+ const phaseSummary = {};
606
+ for (const [num, data] of Object.entries(phaseScores)) {
607
+ phaseSummary[`Phase ${num}: ${data.name}`] = {
608
+ score: data.score,
609
+ criteria: `${data.metCriteria}/${data.totalCriteria}`,
610
+ status: data.completed ? 'COMPLETE' : (data.score >= 80 ? 'READY' : 'GAPS')
611
+ };
612
+ }
613
+
614
+ return {
615
+ action: 'ASSESSMENT_COMPLETE',
616
+ timestamp: assessment.timestamp,
617
+ overallScores: {
618
+ kb: `${overall.kbOverall}/100`,
619
+ phases: `${overall.phaseOverall}/100`,
620
+ combined: `${overall.overall}/100`
621
+ },
622
+ kbQuality: kbSummary,
623
+ phaseReadiness: phaseSummary,
624
+ nextStep: 'Run kb_first_plan to generate enhancement plan based on gaps'
625
+ };
386
626
  }
387
627
 
388
- async function handleKbFirstPhase(cwd, args) {
389
- const phase = args.phase;
390
- const phaseInfo = PHASES[phase];
628
+ async function handlePlan(cwd, args) {
629
+ const assessmentPath = join(cwd, '.ruvector', 'assessment.json');
391
630
 
392
- if (!phaseInfo) {
631
+ if (!existsSync(assessmentPath)) {
393
632
  return {
394
- error: `Unknown phase: ${phase}`,
395
- validPhases: Object.entries(PHASES).map(([k, v]) => ({ phase: parseFloat(k), name: v.name }))
633
+ error: 'NO_ASSESSMENT',
634
+ message: 'No assessment found. Run kb_first_assess first.',
635
+ action: 'Run kb_first_assess to score KB and phase readiness'
396
636
  };
397
637
  }
398
638
 
399
- // Check for baseline
400
- const baselinePath = join(cwd, '.ruvector', 'baseline.json');
401
- let baseline = null;
402
- let baselineWarning = null;
639
+ const assessment = JSON.parse(readFileSync(assessmentPath, 'utf-8'));
640
+ const threshold = args.threshold || 80;
641
+ const focusArea = args.focusArea || 'all';
642
+
643
+ const enhancements = [];
644
+ let taskId = 1;
645
+
646
+ // Find KB gaps
647
+ if (focusArea === 'all' || focusArea === 'kb') {
648
+ for (const [dim, data] of Object.entries(assessment.kb)) {
649
+ if (data.score < threshold) {
650
+ const gap = threshold - data.score;
651
+ enhancements.push({
652
+ id: taskId++,
653
+ area: 'KB Quality',
654
+ dimension: KB_DIMENSIONS[dim]?.name || dim,
655
+ currentScore: data.score,
656
+ targetScore: threshold,
657
+ predictedImprovement: gap,
658
+ task: data.improvement,
659
+ priority: gap > 30 ? 'HIGH' : (gap > 15 ? 'MEDIUM' : 'LOW'),
660
+ effort: gap > 30 ? 'Large' : (gap > 15 ? 'Medium' : 'Small')
661
+ });
662
+ }
663
+ }
664
+ }
403
665
 
404
- if (existsSync(baselinePath)) {
405
- try {
406
- baseline = JSON.parse(readFileSync(baselinePath, 'utf-8'));
407
- const baselineAge = (Date.now() - new Date(baseline.timestamp).getTime()) / (1000 * 60);
408
- if (baselineAge > 60) {
409
- baselineWarning = `Baseline is ${Math.round(baselineAge)} minutes old. Consider running kb_first_assess for fresh baseline.`;
666
+ // Find Phase gaps
667
+ if (focusArea === 'all' || focusArea === 'phases') {
668
+ for (const [num, data] of Object.entries(assessment.phases)) {
669
+ if (data.score < threshold && !data.completed) {
670
+ const gap = threshold - data.score;
671
+ enhancements.push({
672
+ id: taskId++,
673
+ area: 'Phase Readiness',
674
+ dimension: `Phase ${num}: ${data.name}`,
675
+ currentScore: data.score,
676
+ targetScore: threshold,
677
+ predictedImprovement: gap,
678
+ task: `Complete: ${data.unmet.slice(0, 3).join(', ')}${data.unmet.length > 3 ? '...' : ''}`,
679
+ priority: gap > 30 ? 'HIGH' : (gap > 15 ? 'MEDIUM' : 'LOW'),
680
+ effort: gap > 30 ? 'Large' : (gap > 15 ? 'Medium' : 'Small')
681
+ });
410
682
  }
411
- } catch {}
683
+ }
412
684
  }
413
685
 
414
- // Phase-specific guidance
415
- const phaseGuidance = {
416
- 0: ['Document project scope', 'Identify domain complexity', 'Assess KB-First suitability', 'Estimate resources', 'Make go/no-go decision'],
417
- 1: ['Map domain concepts', 'Design taxonomy', 'Define relationships', 'Plan query patterns', 'Review with stakeholders'],
418
- 1.5: ['Install enforcement hooks', 'Configure hook behavior', 'Train on project patterns', 'Verify hooks work'],
419
- 2: ['Create database tables', 'Add vector columns', 'Design indexes', 'Write migration scripts'],
420
- 3: ['Collect domain content', 'Process and clean data', 'Generate embeddings', 'Import to KB', 'Validate entries'],
421
- 4: ['Analyze KB coverage', 'Calculate quality score', 'Identify gaps', 'Prioritize fixes', 'Create remediation plan'],
422
- 5: ['Build search API', 'Implement code generation', 'Add citation system', 'Enable gap logging'],
423
- 6: ['Write unit tests', 'Create integration tests', 'Test KB accuracy', 'Performance tests', 'Edge case testing'],
424
- 7: ['Optimize queries', 'Tune indexes', 'Add caching', 'Run benchmarks'],
425
- 7.5: ['Verify test coverage ≥80%', 'Run E2E suite', 'Load testing', 'Build regression suite'],
426
- 8: ['Run code scan', 'Check imports', 'Verify source returns', 'Test startup', 'Check fallbacks', 'Validate attribution', 'Test confidence', 'Review gap logs'],
427
- 9: ['Audit dependencies', 'Check OWASP Top 10', 'Test SQL injection', 'Review auth', 'Audit secrets', 'Secure APIs'],
428
- 10: ['Write README', 'Document API', 'Schema documentation', 'Architecture docs', 'Operator guide'],
429
- 11: ['Setup infrastructure', 'Configure environments', 'Build CI/CD', 'Run migrations', 'Setup monitoring', 'Go live'],
430
- 11.5: ['Setup OpenTelemetry', 'Build KB dashboard', 'Configure alerts', 'Write runbooks'],
431
- 12: ['Define gap triage', 'Setup expert review', 'Document KB updates', 'Version control', 'A/B testing']
686
+ // Sort by priority
687
+ const priorityOrder = { HIGH: 0, MEDIUM: 1, LOW: 2 };
688
+ enhancements.sort((a, b) => priorityOrder[a.priority] - priorityOrder[b.priority]);
689
+
690
+ // Calculate predicted totals
691
+ const predictedKBImprovement = enhancements
692
+ .filter(e => e.area === 'KB Quality')
693
+ .reduce((sum, e) => sum + e.predictedImprovement, 0);
694
+
695
+ const predictedPhaseImprovement = enhancements
696
+ .filter(e => e.area === 'Phase Readiness')
697
+ .reduce((sum, e) => sum + e.predictedImprovement, 0);
698
+
699
+ const plan = {
700
+ timestamp: new Date().toISOString(),
701
+ threshold,
702
+ baselineScores: assessment.overall,
703
+ enhancements,
704
+ predictions: {
705
+ kbImprovement: `+${Math.round(predictedKBImprovement / 6)}`, // Average across 6 dimensions
706
+ phaseImprovement: `+${Math.round(predictedPhaseImprovement / Object.keys(PHASES).length)}`,
707
+ tasksCount: enhancements.length,
708
+ highPriority: enhancements.filter(e => e.priority === 'HIGH').length,
709
+ mediumPriority: enhancements.filter(e => e.priority === 'MEDIUM').length,
710
+ lowPriority: enhancements.filter(e => e.priority === 'LOW').length
711
+ },
712
+ confirmed: false,
713
+ currentTaskIndex: 0
432
714
  };
433
715
 
716
+ writeFileSync(join(cwd, '.ruvector', 'plan.json'), JSON.stringify(plan, null, 2));
717
+
434
718
  return {
435
- phase,
436
- name: phaseInfo.name,
437
- gate: phaseInfo.gate,
438
- baseline: baseline ? {
439
- score: baseline.total,
440
- grade: baseline.grade,
441
- timestamp: baseline.timestamp
442
- } : null,
443
- baselineWarning,
444
- tasks: phaseGuidance[phase] || [],
445
- reminder: '⚠️ IMPORTANT: Run kb_first_delta when phase work is complete to measure improvement',
446
- gateRequirement: `Gate "${phaseInfo.gate}" must be satisfied to proceed`
719
+ action: 'PLAN_GENERATED',
720
+ summary: {
721
+ totalTasks: enhancements.length,
722
+ highPriority: plan.predictions.highPriority,
723
+ mediumPriority: plan.predictions.mediumPriority,
724
+ lowPriority: plan.predictions.lowPriority
725
+ },
726
+ predictedImprovements: {
727
+ kb: plan.predictions.kbImprovement,
728
+ phases: plan.predictions.phaseImprovement
729
+ },
730
+ enhancements: enhancements.map(e => ({
731
+ id: e.id,
732
+ priority: e.priority,
733
+ area: e.dimension,
734
+ current: e.currentScore,
735
+ target: e.targetScore,
736
+ task: e.task
737
+ })),
738
+ nextStep: 'Review the plan above. Run kb_first_confirm with confirmed=true when ready to proceed.'
447
739
  };
448
740
  }
449
741
 
450
- async function handleKbFirstDelta(cwd, args) {
451
- const baselinePath = join(cwd, '.ruvector', 'baseline.json');
742
+ async function handleConfirm(cwd, args) {
743
+ const planPath = join(cwd, '.ruvector', 'plan.json');
452
744
 
453
- if (!existsSync(baselinePath)) {
745
+ if (!existsSync(planPath)) {
454
746
  return {
455
- error: 'NO_BASELINE',
456
- message: 'No baseline found. Run kb_first_assess first to establish baseline.',
457
- action: 'Run kb_first_assess with saveBaseline=true'
747
+ error: 'NO_PLAN',
748
+ message: 'No plan found. Run kb_first_plan first.',
749
+ action: 'Run kb_first_plan to generate enhancement plan'
458
750
  };
459
751
  }
460
752
 
461
- let baseline;
462
- try {
463
- baseline = JSON.parse(readFileSync(baselinePath, 'utf-8'));
464
- } catch {
465
- return { error: 'CORRUPT_BASELINE', message: 'Baseline file is corrupt. Run kb_first_assess again.' };
753
+ if (!args.confirmed) {
754
+ return {
755
+ action: 'CONFIRMATION_REQUIRED',
756
+ message: 'You must confirm with confirmed=true to proceed.',
757
+ hint: 'Review the plan from kb_first_plan, then confirm when ready.'
758
+ };
466
759
  }
467
760
 
468
- const current = calculateScores(cwd);
761
+ const plan = JSON.parse(readFileSync(planPath, 'utf-8'));
762
+ plan.confirmed = true;
763
+ plan.confirmedAt = new Date().toISOString();
764
+ plan.currentTaskIndex = 0;
765
+
766
+ writeFileSync(planPath, JSON.stringify(plan, null, 2));
469
767
 
470
- const delta = {
471
- total: current.total - baseline.total,
472
- kb: current.kb.total - baseline.kb.total,
473
- app: current.app.total - baseline.app.total,
474
- process: current.process.total - baseline.process.total
768
+ const firstTask = plan.enhancements[0];
769
+
770
+ return {
771
+ action: 'PLAN_CONFIRMED',
772
+ confirmedAt: plan.confirmedAt,
773
+ totalTasks: plan.enhancements.length,
774
+ message: 'Plan locked. Ready to execute.',
775
+ firstTask: firstTask ? {
776
+ id: firstTask.id,
777
+ priority: firstTask.priority,
778
+ area: firstTask.dimension,
779
+ task: firstTask.task,
780
+ currentScore: firstTask.currentScore,
781
+ targetScore: firstTask.targetScore
782
+ } : null,
783
+ nextStep: 'Run kb_first_execute to work through the plan'
475
784
  };
785
+ }
476
786
 
477
- const verdict = delta.total >= 0 ? 'PASS' : 'FAIL';
478
- const canProceed = delta.total >= 0;
787
+ async function handleExecute(cwd, args) {
788
+ const planPath = join(cwd, '.ruvector', 'plan.json');
479
789
 
480
- const result = {
481
- verdict,
482
- canProceed,
483
- baseline: {
484
- score: baseline.total,
485
- grade: baseline.grade,
486
- timestamp: baseline.timestamp
790
+ if (!existsSync(planPath)) {
791
+ return {
792
+ error: 'NO_PLAN',
793
+ message: 'No plan found. Run kb_first_plan first.'
794
+ };
795
+ }
796
+
797
+ const plan = JSON.parse(readFileSync(planPath, 'utf-8'));
798
+
799
+ if (!plan.confirmed) {
800
+ return {
801
+ error: 'PLAN_NOT_CONFIRMED',
802
+ message: 'Plan not confirmed. Run kb_first_confirm first.',
803
+ action: 'Run kb_first_confirm with confirmed=true'
804
+ };
805
+ }
806
+
807
+ // Mark current task complete if requested
808
+ if (args.taskComplete && plan.currentTaskIndex < plan.enhancements.length) {
809
+ plan.enhancements[plan.currentTaskIndex].completed = true;
810
+ plan.enhancements[plan.currentTaskIndex].completedAt = new Date().toISOString();
811
+ plan.currentTaskIndex++;
812
+ writeFileSync(planPath, JSON.stringify(plan, null, 2));
813
+ }
814
+
815
+ // Check if all done
816
+ if (plan.currentTaskIndex >= plan.enhancements.length) {
817
+ return {
818
+ action: 'EXECUTION_COMPLETE',
819
+ message: 'All tasks completed!',
820
+ completedTasks: plan.enhancements.length,
821
+ nextStep: 'Run kb_first_verify to compare predicted vs actual improvements'
822
+ };
823
+ }
824
+
825
+ const currentTask = plan.enhancements[plan.currentTaskIndex];
826
+ const completedCount = plan.enhancements.filter(e => e.completed).length;
827
+
828
+ return {
829
+ action: 'EXECUTING',
830
+ progress: {
831
+ completed: completedCount,
832
+ total: plan.enhancements.length,
833
+ percent: Math.round((completedCount / plan.enhancements.length) * 100)
487
834
  },
488
- current: {
489
- score: current.total,
490
- grade: current.grade,
491
- timestamp: current.timestamp
835
+ currentTask: {
836
+ id: currentTask.id,
837
+ priority: currentTask.priority,
838
+ area: currentTask.dimension,
839
+ task: currentTask.task,
840
+ currentScore: currentTask.currentScore,
841
+ targetScore: currentTask.targetScore,
842
+ predictedImprovement: `+${currentTask.predictedImprovement}`
492
843
  },
493
- delta: {
494
- total: delta.total > 0 ? `+${delta.total}` : `${delta.total}`,
495
- kb: delta.kb > 0 ? `+${delta.kb}` : `${delta.kb}`,
496
- app: delta.app > 0 ? `+${delta.app}` : `${delta.app}`,
497
- process: delta.process > 0 ? `+${delta.process}` : `${delta.process}`
844
+ guidance: getTaskGuidance(currentTask),
845
+ nextStep: 'Complete the task above, then run kb_first_execute with taskComplete=true'
846
+ };
847
+ }
848
+
849
+ function getTaskGuidance(task) {
850
+ // Provide specific guidance based on task type
851
+ if (task.area === 'KB Quality') {
852
+ switch (task.dimension) {
853
+ case 'Completeness':
854
+ return 'Add more KB entries covering missing domain topics. Each entry should be in src/kb/ directory.';
855
+ case 'Depth':
856
+ return 'Expand existing KB entries with more detail. Target 2000+ characters per entry.';
857
+ case 'Comprehensiveness':
858
+ return 'Document edge cases, exceptions, and nuances in your KB entries.';
859
+ case 'Accuracy':
860
+ return 'Add tests to validate KB content. Create a tests/ directory with validation tests.';
861
+ case 'Freshness':
862
+ return 'Update KB content with latest information. Touch .ruvector/ to update timestamps.';
863
+ case 'Attribution':
864
+ return 'Add KB-Generated: headers to code files citing their KB sources.';
865
+ default:
866
+ return task.task;
498
867
  }
868
+ }
869
+ return task.task;
870
+ }
871
+
872
+ async function handleVerify(cwd, args) {
873
+ const planPath = join(cwd, '.ruvector', 'plan.json');
874
+ const assessmentPath = join(cwd, '.ruvector', 'assessment.json');
875
+ const TARGET_SCORE = 98; // Recursive loop until we hit 98+
876
+
877
+ if (!existsSync(planPath) || !existsSync(assessmentPath)) {
878
+ return {
879
+ error: 'MISSING_DATA',
880
+ message: 'Missing plan or assessment. Run kb_first_assess and kb_first_plan first.'
881
+ };
882
+ }
883
+
884
+ const plan = JSON.parse(readFileSync(planPath, 'utf-8'));
885
+ const originalAssessment = JSON.parse(readFileSync(assessmentPath, 'utf-8'));
886
+
887
+ // Re-assess current state
888
+ const currentKB = scoreKBDimensions(cwd);
889
+ const currentPhases = scorePhaseReadiness(cwd);
890
+ const currentOverall = calculateOverallScores(currentKB, currentPhases);
891
+
892
+ // Compare predictions vs actual
893
+ const comparison = {
894
+ kb: {},
895
+ phases: {}
499
896
  };
500
897
 
501
- if (!canProceed) {
502
- result.blockReason = `Score dropped by ${Math.abs(delta.total)} points. You CANNOT proceed until score improves.`;
503
- result.action = 'Fix issues causing score regression, then run kb_first_delta again.';
504
- } else {
505
- result.action = 'Run kb_first_gate to verify phase completion and proceed.';
898
+ // Compare KB dimensions
899
+ for (const [dim, original] of Object.entries(originalAssessment.kb)) {
900
+ const current = currentKB[dim];
901
+ const enhancement = plan.enhancements.find(e => e.dimension === KB_DIMENSIONS[dim]?.name);
902
+
903
+ comparison.kb[dim] = {
904
+ before: original.score,
905
+ after: current.score,
906
+ actual: current.score - original.score,
907
+ predicted: enhancement?.predictedImprovement || 0,
908
+ hit: current.score >= TARGET_SCORE
909
+ };
506
910
  }
507
911
 
508
- if (args.showBreakdown !== false) {
509
- result.componentChanges = {
510
- kb: {
511
- before: baseline.kb.components,
512
- after: current.kb.components
513
- },
514
- app: {
515
- before: baseline.app.components,
516
- after: current.app.components
517
- },
518
- process: {
519
- before: baseline.process.components,
520
- after: current.process.components
912
+ // Compare phases
913
+ for (const [num, original] of Object.entries(originalAssessment.phases)) {
914
+ const current = currentPhases[num];
915
+ const enhancement = plan.enhancements.find(e => e.dimension === `Phase ${num}: ${original.name}`);
916
+
917
+ comparison.phases[num] = {
918
+ name: original.name,
919
+ before: original.score,
920
+ after: current.score,
921
+ actual: current.score - original.score,
922
+ predicted: enhancement?.predictedImprovement || 0,
923
+ hit: current.score >= TARGET_SCORE
924
+ };
925
+ }
926
+
927
+ // Calculate summary
928
+ const kbHits = Object.values(comparison.kb).filter(c => c.hit).length;
929
+ const kbTotal = Object.keys(comparison.kb).length;
930
+ const phaseHits = Object.values(comparison.phases).filter(c => c.hit).length;
931
+ const phaseTotal = Object.keys(comparison.phases).length;
932
+
933
+ // Identify remaining gaps (anything below 98)
934
+ const remainingGaps = [];
935
+ for (const [dim, data] of Object.entries(comparison.kb)) {
936
+ if (data.after < TARGET_SCORE) {
937
+ remainingGaps.push({
938
+ area: 'KB Quality',
939
+ dimension: KB_DIMENSIONS[dim]?.name || dim,
940
+ currentScore: data.after,
941
+ targetScore: TARGET_SCORE,
942
+ gap: TARGET_SCORE - data.after
943
+ });
944
+ }
945
+ }
946
+ for (const [num, data] of Object.entries(comparison.phases)) {
947
+ if (data.after < TARGET_SCORE) {
948
+ remainingGaps.push({
949
+ area: 'Phase Readiness',
950
+ dimension: `Phase ${num}: ${data.name}`,
951
+ currentScore: data.after,
952
+ targetScore: TARGET_SCORE,
953
+ gap: TARGET_SCORE - data.after
954
+ });
955
+ }
956
+ }
957
+
958
+ // Track iteration count
959
+ let iterationCount = plan.iterationCount || 1;
960
+
961
+ // Save verification
962
+ const verification = {
963
+ timestamp: new Date().toISOString(),
964
+ iteration: iterationCount,
965
+ original: originalAssessment.overall,
966
+ current: currentOverall,
967
+ comparison,
968
+ remainingGaps,
969
+ targetScore: TARGET_SCORE,
970
+ targetMet: currentOverall.overall >= TARGET_SCORE
971
+ };
972
+ writeFileSync(join(cwd, '.ruvector', 'verification.json'), JSON.stringify(verification, null, 2));
973
+
974
+ const result = {
975
+ action: 'VERIFICATION_COMPLETE',
976
+ iteration: iterationCount,
977
+ targetScore: TARGET_SCORE,
978
+ summary: {
979
+ kbAt98Plus: `${kbHits}/${kbTotal}`,
980
+ phasesAt98Plus: `${phaseHits}/${phaseTotal}`,
981
+ overallImprovement: {
982
+ kb: `${originalAssessment.overall.kbOverall} → ${currentOverall.kbOverall} (${currentOverall.kbOverall - originalAssessment.overall.kbOverall >= 0 ? '+' : ''}${currentOverall.kbOverall - originalAssessment.overall.kbOverall})`,
983
+ phases: `${originalAssessment.overall.phaseOverall} → ${currentOverall.phaseOverall} (${currentOverall.phaseOverall - originalAssessment.overall.phaseOverall >= 0 ? '+' : ''}${currentOverall.phaseOverall - originalAssessment.overall.phaseOverall})`,
984
+ combined: `${originalAssessment.overall.overall} → ${currentOverall.overall} (${currentOverall.overall - originalAssessment.overall.overall >= 0 ? '+' : ''}${currentOverall.overall - originalAssessment.overall.overall})`
521
985
  }
986
+ }
987
+ };
988
+
989
+ if (args.detailed) {
990
+ result.kbComparison = comparison.kb;
991
+ result.phaseComparison = comparison.phases;
992
+ }
993
+
994
+ // Check if we've hit the target
995
+ if (currentOverall.overall >= TARGET_SCORE && remainingGaps.length === 0) {
996
+ result.status = 'TARGET_ACHIEVED';
997
+ result.message = `🎯 All scores at ${TARGET_SCORE}+ after ${iterationCount} iteration(s)!`;
998
+ result.remainingGaps = 'None - all targets met!';
999
+ result.nextStep = 'Excellence achieved. Ready for production.';
1000
+ } else {
1001
+ // RECURSIVE: Auto-generate next plan
1002
+ result.status = 'NEEDS_MORE_WORK';
1003
+ result.message = `Score ${currentOverall.overall}/100 - target is ${TARGET_SCORE}. Generating next iteration plan...`;
1004
+ result.remainingGaps = remainingGaps;
1005
+
1006
+ // Update assessment with current scores for next iteration
1007
+ const newAssessment = {
1008
+ timestamp: new Date().toISOString(),
1009
+ kb: currentKB,
1010
+ phases: currentPhases,
1011
+ overall: currentOverall,
1012
+ previousIteration: iterationCount
522
1013
  };
1014
+ writeFileSync(join(cwd, '.ruvector', 'assessment.json'), JSON.stringify(newAssessment, null, 2));
1015
+
1016
+ // Auto-generate new plan for remaining gaps
1017
+ const newEnhancements = remainingGaps.map((gap, idx) => ({
1018
+ id: idx + 1,
1019
+ area: gap.area,
1020
+ dimension: gap.dimension,
1021
+ currentScore: gap.currentScore,
1022
+ targetScore: TARGET_SCORE,
1023
+ predictedImprovement: gap.gap,
1024
+ task: getImprovementTask(gap),
1025
+ priority: gap.gap > 30 ? 'HIGH' : (gap.gap > 15 ? 'MEDIUM' : 'LOW'),
1026
+ effort: gap.gap > 30 ? 'Large' : (gap.gap > 15 ? 'Medium' : 'Small')
1027
+ }));
1028
+
1029
+ const newPlan = {
1030
+ timestamp: new Date().toISOString(),
1031
+ threshold: TARGET_SCORE,
1032
+ iterationCount: iterationCount + 1,
1033
+ baselineScores: currentOverall,
1034
+ enhancements: newEnhancements,
1035
+ predictions: {
1036
+ tasksCount: newEnhancements.length,
1037
+ highPriority: newEnhancements.filter(e => e.priority === 'HIGH').length,
1038
+ mediumPriority: newEnhancements.filter(e => e.priority === 'MEDIUM').length,
1039
+ lowPriority: newEnhancements.filter(e => e.priority === 'LOW').length
1040
+ },
1041
+ confirmed: false,
1042
+ currentTaskIndex: 0
1043
+ };
1044
+ writeFileSync(join(cwd, '.ruvector', 'plan.json'), JSON.stringify(newPlan, null, 2));
1045
+
1046
+ result.newPlan = {
1047
+ iteration: iterationCount + 1,
1048
+ tasks: newEnhancements.length,
1049
+ highPriority: newPlan.predictions.highPriority
1050
+ };
1051
+ result.nextStep = `Iteration ${iterationCount + 1} plan generated. Run kb_first_confirm with confirmed=true to continue.`;
523
1052
  }
524
1053
 
525
1054
  return result;
526
1055
  }
527
1056
 
528
- async function handleKbFirstGate(cwd, args) {
529
- const phase = args.phase;
530
- const phaseInfo = PHASES[phase];
1057
+ /**
1058
+ * UX Review Criteria for critical evaluation
1059
+ */
1060
+ const UX_CRITERIA = {
1061
+ versioning: {
1062
+ name: 'Version Display',
1063
+ checks: ['Header shows version', 'Footer shows version', 'Format: major.minor.patch'],
1064
+ weight: 15
1065
+ },
1066
+ caching: {
1067
+ name: 'Cache Management',
1068
+ checks: ['Version change detection', 'User notification on updates', 'Force refresh capability'],
1069
+ weight: 10
1070
+ },
1071
+ visual_design: {
1072
+ name: 'Visual Design Excellence',
1073
+ checks: ['Professional typography', 'Cohesive color palette', 'Proper spacing/hierarchy', 'Not generic AI aesthetic'],
1074
+ weight: 20
1075
+ },
1076
+ emotional_appeal: {
1077
+ name: 'Emotional Appeal',
1078
+ checks: ['Creates confidence', 'Guides without confusion', 'Celebrates success states', 'Softens error states'],
1079
+ weight: 15
1080
+ },
1081
+ loading_states: {
1082
+ name: 'Loading States',
1083
+ checks: ['Skeleton loaders present', 'Progress indicators', 'No jarring transitions', 'Graceful degradation'],
1084
+ weight: 10
1085
+ },
1086
+ error_handling: {
1087
+ name: 'Error Handling UX',
1088
+ checks: ['Clear error messages', 'Actionable next steps', 'Recovery paths provided', 'No technical jargon'],
1089
+ weight: 10
1090
+ },
1091
+ user_flow: {
1092
+ name: 'User Flow',
1093
+ checks: ['Intuitive navigation', 'Clear call-to-actions', 'Logical progression', 'Minimal friction'],
1094
+ weight: 10
1095
+ },
1096
+ accessibility: {
1097
+ name: 'Accessibility',
1098
+ checks: ['Keyboard navigation', 'Screen reader compatible', 'Color contrast', 'Focus indicators'],
1099
+ weight: 10
1100
+ }
1101
+ };
1102
+
1103
+ /**
1104
+ * Detect available frontend-design skills
1105
+ */
1106
+ function detectFrontendDesignSkills() {
1107
+ const homeDir = process.env.HOME || process.env.USERPROFILE || '';
1108
+ const skillsDir = join(homeDir, '.claude', 'skills');
1109
+ const skills = {
1110
+ preferred: null,
1111
+ available: [],
1112
+ recommendation: null
1113
+ };
531
1114
 
532
- if (!phaseInfo) {
533
- return { error: `Unknown phase: ${phase}` };
1115
+ // Check for frontend-design-Stu (preferred)
1116
+ if (existsSync(join(skillsDir, 'frontend-design-Stu.md'))) {
1117
+ skills.preferred = 'frontend-design-Stu';
1118
+ skills.available.push('frontend-design-Stu');
534
1119
  }
535
1120
 
536
- // Check baseline exists
537
- const baselinePath = join(cwd, '.ruvector', 'baseline.json');
538
- if (!existsSync(baselinePath)) {
539
- return {
540
- canProceed: false,
541
- blockReason: 'GATE_BLOCKED: No baseline established. Run kb_first_assess first.',
542
- phase,
543
- phaseName: phaseInfo.name
544
- };
1121
+ // Check for frontend-design
1122
+ if (existsSync(join(skillsDir, 'frontend-design.md'))) {
1123
+ if (!skills.preferred) {
1124
+ skills.preferred = 'frontend-design';
1125
+ }
1126
+ skills.available.push('frontend-design');
545
1127
  }
546
1128
 
547
- // Check delta
548
- let baseline;
549
- try {
550
- baseline = JSON.parse(readFileSync(baselinePath, 'utf-8'));
551
- } catch {
552
- return {
553
- canProceed: false,
554
- blockReason: 'GATE_BLOCKED: Corrupt baseline. Run kb_first_assess again.',
555
- phase,
556
- phaseName: phaseInfo.name
1129
+ // Generate recommendation if no skills found
1130
+ if (skills.available.length === 0) {
1131
+ skills.recommendation = {
1132
+ message: 'No frontend-design skill installed. For UX excellence, install the Anthropic frontend-design skill.',
1133
+ installation: 'Visit: https://github.com/anthropics/claude-code-plugins and install frontend-design plugin',
1134
+ benefit: 'Creates distinctive, production-grade interfaces that avoid generic AI aesthetics'
557
1135
  };
558
1136
  }
559
1137
 
560
- const current = calculateScores(cwd);
561
- const delta = current.total - baseline.total;
1138
+ return skills;
1139
+ }
562
1140
 
563
- if (delta < 0) {
564
- return {
565
- canProceed: false,
566
- blockReason: `GATE_BLOCKED: Score regression detected (${delta} points). Fix issues before proceeding.`,
567
- phase,
568
- phaseName: phaseInfo.name,
569
- baseline: baseline.total,
570
- current: current.total,
571
- delta
572
- };
573
- }
1141
+ /**
1142
+ * Handle UX Review with Playwright
1143
+ */
1144
+ async function handleUXReview(cwd, args) {
1145
+ const { appUrl, flows = ['homepage', 'main_flow'], screenshotDir = '.ruvector/ux-review', criticalReview = true } = args;
574
1146
 
575
- // Check gate-specific conditions
576
- const configPath = join(cwd, '.ruvector', 'config.json');
577
- let config = { phases: { current: 0, completed: [], gates: {} } };
578
- if (existsSync(configPath)) {
579
- try {
580
- config = JSON.parse(readFileSync(configPath, 'utf-8'));
581
- } catch {}
582
- }
1147
+ // Check for frontend-design skills
1148
+ const designSkills = detectFrontendDesignSkills();
583
1149
 
584
- // Phase-specific gate checks
585
- let gateConditionMet = false;
586
- let gateMessage = '';
587
-
588
- switch (phaseInfo.gate) {
589
- case 'kb_score_50':
590
- gateConditionMet = current.kb.total >= 20; // 50% of 40
591
- gateMessage = gateConditionMet ? 'KB score ≥50%' : `KB score ${current.kb.total}/40 < 50%`;
592
- break;
593
- case 'kb_score_80':
594
- gateConditionMet = current.kb.total >= 32; // 80% of 40
595
- gateMessage = gateConditionMet ? 'KB score ≥80%' : `KB score ${current.kb.total}/40 < 80%`;
596
- break;
597
- case 'coverage_80':
598
- gateConditionMet = current.app.components.testCoverage >= 8; // 80% of 10
599
- gateMessage = gateConditionMet ? 'Test coverage ≥80%' : `Test coverage ${current.app.components.testCoverage}/10 < 80%`;
600
- break;
601
- default:
602
- // Default: pass if delta is non-negative
603
- gateConditionMet = true;
604
- gateMessage = 'Gate condition satisfied (positive delta)';
1150
+ // Check if Playwright is installed
1151
+ let playwrightAvailable = false;
1152
+ try {
1153
+ await import('playwright');
1154
+ playwrightAvailable = true;
1155
+ } catch {
1156
+ // Playwright not installed
605
1157
  }
606
1158
 
607
- if (!gateConditionMet) {
1159
+ if (!playwrightAvailable) {
608
1160
  return {
609
- canProceed: false,
610
- blockReason: `GATE_BLOCKED: ${gateMessage}`,
611
- phase,
612
- phaseName: phaseInfo.name,
613
- gate: phaseInfo.gate
1161
+ action: 'PLAYWRIGHT_REQUIRED',
1162
+ status: 'INSTALLATION_NEEDED',
1163
+ message: 'Playwright is required for UX review but not installed.',
1164
+ installation: {
1165
+ command: 'npm install playwright && npx playwright install chromium',
1166
+ description: 'Installs Playwright and Chromium browser for screenshot capture',
1167
+ automated: 'Run this command, then re-run kb_first_ux_review'
1168
+ },
1169
+ designSkills: designSkills,
1170
+ alternative: {
1171
+ message: 'Alternatively, you can manually capture screenshots and place them in:',
1172
+ directory: join(cwd, screenshotDir),
1173
+ format: 'Name format: 01-homepage.png, 02-login.png, etc.',
1174
+ thenRun: 'Re-run kb_first_ux_review with criticalReview=true to analyze'
1175
+ }
614
1176
  };
615
1177
  }
616
1178
 
617
- // Mark phase as completed
618
- if (!config.phases.completed.includes(parseFloat(phase))) {
619
- config.phases.completed.push(parseFloat(phase));
1179
+ // Create screenshot directory
1180
+ const screenshotPath = join(cwd, screenshotDir);
1181
+ if (!existsSync(screenshotPath)) {
1182
+ mkdirSync(screenshotPath, { recursive: true });
620
1183
  }
621
- config.phases.gates[phaseInfo.gate] = true;
622
1184
 
623
- // Determine next phase
624
- const phaseOrder = [0, 1, 1.5, 2, 3, 4, 5, 6, 7, 7.5, 8, 9, 10, 11, 11.5, 12];
625
- const currentIdx = phaseOrder.indexOf(parseFloat(phase));
626
- const nextPhase = currentIdx < phaseOrder.length - 1 ? phaseOrder[currentIdx + 1] : null;
1185
+ // Capture screenshots using Playwright
1186
+ const screenshots = [];
1187
+ try {
1188
+ const { chromium } = await import('playwright');
1189
+ const browser = await chromium.launch();
1190
+ const context = await browser.newContext({
1191
+ viewport: { width: 1920, height: 1080 }
1192
+ });
1193
+ const page = await context.newPage();
1194
+
1195
+ // Navigate to app
1196
+ await page.goto(appUrl, { waitUntil: 'networkidle', timeout: 30000 });
1197
+
1198
+ // Capture initial screenshot
1199
+ const timestamp = Date.now();
1200
+ const homepagePath = join(screenshotPath, `01-homepage-${timestamp}.png`);
1201
+ await page.screenshot({ path: homepagePath, fullPage: true });
1202
+ screenshots.push({
1203
+ id: 1,
1204
+ name: 'Homepage',
1205
+ path: homepagePath,
1206
+ url: appUrl,
1207
+ viewport: 'desktop'
1208
+ });
1209
+
1210
+ // Check for version display
1211
+ const versionCheck = await page.evaluate(() => {
1212
+ const body = document.body.innerText;
1213
+ const versionPattern = /v?\d+\.\d+\.\d+/;
1214
+ const hasVersion = versionPattern.test(body);
1215
+ const header = document.querySelector('header')?.innerText || '';
1216
+ const footer = document.querySelector('footer')?.innerText || '';
1217
+ return {
1218
+ hasVersion,
1219
+ inHeader: versionPattern.test(header),
1220
+ inFooter: versionPattern.test(footer),
1221
+ foundVersion: body.match(versionPattern)?.[0] || null
1222
+ };
1223
+ });
1224
+
1225
+ // Mobile viewport screenshot
1226
+ await page.setViewportSize({ width: 375, height: 812 });
1227
+ await page.reload({ waitUntil: 'networkidle' });
1228
+ const mobilePath = join(screenshotPath, `02-homepage-mobile-${timestamp}.png`);
1229
+ await page.screenshot({ path: mobilePath, fullPage: true });
1230
+ screenshots.push({
1231
+ id: 2,
1232
+ name: 'Homepage Mobile',
1233
+ path: mobilePath,
1234
+ url: appUrl,
1235
+ viewport: 'mobile'
1236
+ });
1237
+
1238
+ await browser.close();
1239
+
1240
+ // Generate critical review for each screenshot
1241
+ const reviews = [];
1242
+ if (criticalReview) {
1243
+ for (const screenshot of screenshots) {
1244
+ reviews.push({
1245
+ screenshot: screenshot.name,
1246
+ path: screenshot.path,
1247
+ score: 0, // To be filled by Claude's analysis
1248
+ evaluation: {
1249
+ howGood: 'REQUIRES_VISUAL_ANALYSIS - Open screenshot and evaluate',
1250
+ couldBeBetter: [],
1251
+ fallingDown: [],
1252
+ excellentWouldLookLike: '',
1253
+ recommendations: []
1254
+ },
1255
+ criteria: Object.entries(UX_CRITERIA).map(([key, crit]) => ({
1256
+ name: crit.name,
1257
+ checks: crit.checks,
1258
+ score: 0,
1259
+ notes: 'Awaiting visual review'
1260
+ }))
1261
+ });
1262
+ }
1263
+ }
627
1264
 
628
- if (nextPhase !== null) {
629
- config.phases.current = nextPhase;
630
- }
1265
+ // Save review report
1266
+ const reviewReport = {
1267
+ timestamp: new Date().toISOString(),
1268
+ appUrl,
1269
+ versionCheck,
1270
+ screenshots,
1271
+ reviews,
1272
+ summary: {
1273
+ screenshotsCaptured: screenshots.length,
1274
+ versionDisplayed: versionCheck.hasVersion,
1275
+ versionInHeader: versionCheck.inHeader,
1276
+ versionInFooter: versionCheck.inFooter,
1277
+ requiresManualReview: true
1278
+ },
1279
+ criticalQuestions: [
1280
+ 'How good is this? (Score each screen 1-100)',
1281
+ 'How could we make it better?',
1282
+ 'Where is it falling down?',
1283
+ 'What would EXCELLENT look like?',
1284
+ 'What are the specific recommendations?'
1285
+ ]
1286
+ };
631
1287
 
632
- // Save config
633
- writeFileSync(configPath, JSON.stringify(config, null, 2));
1288
+ writeFileSync(join(screenshotPath, 'review-report.json'), JSON.stringify(reviewReport, null, 2));
634
1289
 
635
- // Save current scores as new baseline for next phase
636
- writeFileSync(baselinePath, JSON.stringify(current, null, 2));
1290
+ return {
1291
+ action: 'UX_REVIEW_CAPTURED',
1292
+ status: 'SCREENSHOTS_READY',
1293
+ appUrl,
1294
+ versionCheck: {
1295
+ found: versionCheck.hasVersion,
1296
+ version: versionCheck.foundVersion,
1297
+ inHeader: versionCheck.inHeader,
1298
+ inFooter: versionCheck.inFooter,
1299
+ recommendation: !versionCheck.inHeader && !versionCheck.inFooter
1300
+ ? '⚠️ Version not displayed in header/footer - ADD version display immediately'
1301
+ : '✓ Version displayed'
1302
+ },
1303
+ screenshots: screenshots.map(s => ({
1304
+ name: s.name,
1305
+ path: s.path,
1306
+ viewport: s.viewport
1307
+ })),
1308
+ nextSteps: {
1309
+ step1: 'Open each screenshot file and visually inspect',
1310
+ step2: 'Score each screen 1-100 for overall UX quality',
1311
+ step3: 'Document: What works? What fails? What would excellent look like?',
1312
+ step4: 'Generate specific recommendations with priority (HIGH/MEDIUM/LOW)',
1313
+ step5: 'Update review-report.json with scores and findings',
1314
+ step6: 'Re-run kb_first_assess to update UX Quality scores'
1315
+ },
1316
+ criticalQuestions: reviewReport.criticalQuestions,
1317
+ criteria: Object.entries(UX_CRITERIA).map(([key, crit]) => ({
1318
+ name: crit.name,
1319
+ checks: crit.checks,
1320
+ weight: `${crit.weight}%`
1321
+ })),
1322
+ reportPath: join(screenshotPath, 'review-report.json'),
1323
+ designSkills: {
1324
+ available: designSkills.available,
1325
+ useSkill: designSkills.preferred || null,
1326
+ recommendation: designSkills.recommendation,
1327
+ implementationGuidance: designSkills.preferred
1328
+ ? `Use skill "${designSkills.preferred}" when implementing UX improvements. It creates distinctive, production-grade interfaces.`
1329
+ : 'Install frontend-design skill for UX improvements: https://github.com/anthropics/claude-code-plugins'
1330
+ }
1331
+ };
1332
+ } catch (error) {
1333
+ return {
1334
+ action: 'UX_REVIEW_ERROR',
1335
+ status: 'CAPTURE_FAILED',
1336
+ error: error.message,
1337
+ troubleshooting: {
1338
+ step1: 'Ensure the app is running at the specified URL',
1339
+ step2: 'Check if Playwright browsers are installed: npx playwright install chromium',
1340
+ step3: 'Try with a simpler URL first',
1341
+ step4: 'Manually capture screenshots and place in: ' + screenshotPath
1342
+ }
1343
+ };
1344
+ }
1345
+ }
637
1346
 
638
- return {
639
- canProceed: true,
640
- phase,
641
- phaseName: phaseInfo.name,
642
- gateStatus: 'PASSED',
643
- gateMessage,
644
- scoreImprovement: delta > 0 ? `+${delta}` : '0',
645
- nextPhase,
646
- nextPhaseName: nextPhase !== null ? PHASES[nextPhase]?.name : 'PROJECT COMPLETE',
647
- action: nextPhase !== null
648
- ? `Run kb_first_assess to establish baseline for Phase ${nextPhase}: ${PHASES[nextPhase]?.name}`
649
- : 'All phases complete! Project ready for production.'
650
- };
1347
+ /**
1348
+ * Get improvement task based on gap
1349
+ */
1350
+ function getImprovementTask(gap) {
1351
+ if (gap.area === 'KB Quality') {
1352
+ switch (gap.dimension) {
1353
+ case 'Completeness':
1354
+ return `Add more KB entries to reach ${gap.targetScore}% coverage`;
1355
+ case 'Depth':
1356
+ return `Expand KB entries with more detail (target: 2500+ chars each)`;
1357
+ case 'Comprehensiveness':
1358
+ return `Document additional edge cases and exceptions`;
1359
+ case 'Accuracy':
1360
+ return `Add validation tests and verification`;
1361
+ case 'Freshness':
1362
+ return `Update KB content with latest information`;
1363
+ case 'Attribution':
1364
+ return `Add KB-Generated headers to remaining code files`;
1365
+ default:
1366
+ return `Improve ${gap.dimension} to ${gap.targetScore}`;
1367
+ }
1368
+ }
1369
+ return `Complete remaining criteria for ${gap.dimension}`;
651
1370
  }
652
1371
 
653
1372
  /**
654
1373
  * Handle MCP tool calls
655
1374
  */
656
1375
  async function handleToolCall(toolName, args) {
657
- const cwd = process.cwd();
1376
+ const cwd = args.projectPath || process.cwd();
658
1377
 
659
1378
  switch (toolName) {
660
1379
  case 'kb_first_assess':
661
- return await handleKbFirstAssess(cwd, args);
662
- case 'kb_first_phase':
663
- return await handleKbFirstPhase(cwd, args);
664
- case 'kb_first_delta':
665
- return await handleKbFirstDelta(cwd, args);
666
- case 'kb_first_gate':
667
- return await handleKbFirstGate(cwd, args);
1380
+ return await handleAssess(cwd, args);
1381
+ case 'kb_first_plan':
1382
+ return await handlePlan(cwd, args);
1383
+ case 'kb_first_confirm':
1384
+ return await handleConfirm(cwd, args);
1385
+ case 'kb_first_execute':
1386
+ return await handleExecute(cwd, args);
1387
+ case 'kb_first_verify':
1388
+ return await handleVerify(cwd, args);
1389
+ case 'kb_first_ux_review':
1390
+ return await handleUXReview(cwd, args);
668
1391
  default:
669
1392
  return { error: `Unknown tool: ${toolName}` };
670
1393
  }
@@ -711,12 +1434,13 @@ async function handleMCPMessage(message) {
711
1434
  }
712
1435
 
713
1436
  /**
714
- * Start MCP Server (stdio mode)
1437
+ * Start MCP Server
715
1438
  */
716
1439
  export async function startMCPServer(options = {}) {
717
1440
  console.error(`RuvNet KB-First MCP Server v${SERVER_VERSION}`);
718
- console.error('Architecture: Score-Driven | Tools: 4 | Phases: 15');
719
- console.error('Philosophy: Scoring IS enforcement. No shortcuts.');
1441
+ console.error('Architecture: Granular Score-Driven | Tools: 6 | Dimensions: 7 KB + 13 Phases');
1442
+ console.error('Workflow: Assess Plan Confirm → Execute → Verify → UX Review');
1443
+ console.error('UX Review: Playwright-based visual quality audit from end-user perspective');
720
1444
 
721
1445
  let buffer = '';
722
1446
  process.stdin.setEncoding('utf-8');