edsger 0.45.1 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.claude/settings.local.json +3 -23
  2. package/dist/api/__tests__/app-store.test.d.ts +7 -0
  3. package/dist/api/__tests__/app-store.test.js +60 -0
  4. package/dist/api/__tests__/intelligence.test.d.ts +11 -0
  5. package/dist/api/__tests__/intelligence.test.js +315 -0
  6. package/dist/api/features/__tests__/feature-utils.test.d.ts +4 -0
  7. package/dist/api/features/__tests__/feature-utils.test.js +370 -0
  8. package/dist/api/features/__tests__/status-updater.test.d.ts +4 -0
  9. package/dist/api/features/__tests__/status-updater.test.js +88 -0
  10. package/dist/commands/build/__tests__/build.test.d.ts +5 -0
  11. package/dist/commands/build/__tests__/build.test.js +206 -0
  12. package/dist/commands/build/__tests__/detect-project.test.d.ts +6 -0
  13. package/dist/commands/build/__tests__/detect-project.test.js +160 -0
  14. package/dist/commands/build/__tests__/run-build.test.d.ts +6 -0
  15. package/dist/commands/build/__tests__/run-build.test.js +433 -0
  16. package/dist/commands/intelligence/__tests__/command.test.d.ts +4 -0
  17. package/dist/commands/intelligence/__tests__/command.test.js +48 -0
  18. package/dist/commands/run-sheet/index.js +6 -0
  19. package/dist/commands/workflow/core/__tests__/feature-filter.test.d.ts +5 -0
  20. package/dist/commands/workflow/core/__tests__/feature-filter.test.js +316 -0
  21. package/dist/commands/workflow/core/__tests__/pipeline-evaluator.test.d.ts +4 -0
  22. package/dist/commands/workflow/core/__tests__/pipeline-evaluator.test.js +397 -0
  23. package/dist/commands/workflow/core/__tests__/state-manager.test.d.ts +4 -0
  24. package/dist/commands/workflow/core/__tests__/state-manager.test.js +384 -0
  25. package/dist/commands/workflow/executors/phase-executor.js +3 -1
  26. package/dist/commands/workflow/phase-orchestrator.js +1 -2
  27. package/dist/config/__tests__/config.test.d.ts +4 -0
  28. package/dist/config/__tests__/config.test.js +286 -0
  29. package/dist/config/__tests__/feature-status.test.d.ts +4 -0
  30. package/dist/config/__tests__/feature-status.test.js +111 -0
  31. package/dist/errors/__tests__/index.test.d.ts +4 -0
  32. package/dist/errors/__tests__/index.test.js +349 -0
  33. package/dist/index.js +0 -0
  34. package/dist/phases/app-store-generation/__tests__/agent.test.d.ts +5 -0
  35. package/dist/phases/app-store-generation/__tests__/agent.test.js +142 -0
  36. package/dist/phases/app-store-generation/__tests__/context.test.d.ts +4 -0
  37. package/dist/phases/app-store-generation/__tests__/context.test.js +284 -0
  38. package/dist/phases/app-store-generation/__tests__/prompts.test.d.ts +4 -0
  39. package/dist/phases/app-store-generation/__tests__/prompts.test.js +122 -0
  40. package/dist/phases/app-store-generation/__tests__/screenshot-composer.test.d.ts +5 -0
  41. package/dist/phases/app-store-generation/__tests__/screenshot-composer.test.js +826 -0
  42. package/dist/phases/app-store-generation/index.js +1 -2
  43. package/dist/phases/branch-planning/index.js +1 -2
  44. package/dist/phases/bug-fixing/analyzer.js +1 -2
  45. package/dist/phases/code-implementation/index.js +1 -2
  46. package/dist/phases/code-refine/index.js +1 -2
  47. package/dist/phases/code-review/__tests__/diff-utils.test.d.ts +1 -0
  48. package/dist/phases/code-review/__tests__/diff-utils.test.js +101 -0
  49. package/dist/phases/code-review/index.js +1 -2
  50. package/dist/phases/code-testing/analyzer.js +1 -2
  51. package/dist/phases/feature-analysis/index.js +1 -2
  52. package/dist/phases/functional-testing/analyzer.js +1 -2
  53. package/dist/phases/growth-analysis/index.js +1 -2
  54. package/dist/phases/intelligence-analysis/__tests__/context.test.d.ts +4 -0
  55. package/dist/phases/intelligence-analysis/__tests__/context.test.js +192 -0
  56. package/dist/phases/intelligence-analysis/__tests__/matching.test.d.ts +13 -0
  57. package/dist/phases/intelligence-analysis/__tests__/matching.test.js +154 -0
  58. package/dist/phases/intelligence-analysis/__tests__/orchestration.test.d.ts +5 -0
  59. package/dist/phases/intelligence-analysis/__tests__/orchestration.test.js +378 -0
  60. package/dist/phases/intelligence-analysis/__tests__/prompts.test.d.ts +4 -0
  61. package/dist/phases/intelligence-analysis/__tests__/prompts.test.js +33 -0
  62. package/dist/phases/pr-execution/__tests__/file-assigner.test.d.ts +1 -0
  63. package/dist/phases/pr-execution/__tests__/file-assigner.test.js +303 -0
  64. package/dist/phases/pr-execution/index.js +1 -0
  65. package/dist/phases/pr-resolve/__tests__/checklist-learner.test.d.ts +1 -0
  66. package/dist/phases/pr-resolve/__tests__/checklist-learner.test.js +157 -0
  67. package/dist/phases/pr-resolve/__tests__/prompts.test.d.ts +1 -0
  68. package/dist/phases/pr-resolve/__tests__/prompts.test.js +116 -0
  69. package/dist/phases/pr-resolve/__tests__/resolve-mapping.test.d.ts +1 -0
  70. package/dist/phases/pr-resolve/__tests__/resolve-mapping.test.js +138 -0
  71. package/dist/phases/pr-resolve/__tests__/types.test.d.ts +1 -0
  72. package/dist/phases/pr-resolve/__tests__/types.test.js +43 -0
  73. package/dist/phases/pr-resolve/__tests__/workspace.test.d.ts +1 -0
  74. package/dist/phases/pr-resolve/__tests__/workspace.test.js +111 -0
  75. package/dist/phases/pr-review/__tests__/prompts.test.d.ts +1 -0
  76. package/dist/phases/pr-review/__tests__/prompts.test.js +49 -0
  77. package/dist/phases/pr-review/__tests__/review-comments.test.d.ts +1 -0
  78. package/dist/phases/pr-review/__tests__/review-comments.test.js +110 -0
  79. package/dist/phases/pr-shared/__tests__/agent-utils.test.d.ts +1 -0
  80. package/dist/phases/pr-shared/__tests__/agent-utils.test.js +91 -0
  81. package/dist/phases/pr-shared/__tests__/context.test.d.ts +1 -0
  82. package/dist/phases/pr-shared/__tests__/context.test.js +94 -0
  83. package/dist/phases/pr-splitting/__tests__/import-dep-validator.test.d.ts +1 -0
  84. package/dist/phases/pr-splitting/__tests__/import-dep-validator.test.js +331 -0
  85. package/dist/phases/pr-splitting/index.js +1 -2
  86. package/dist/phases/release-sync/github.d.ts +12 -0
  87. package/dist/phases/release-sync/github.js +39 -0
  88. package/dist/phases/release-sync/snapshot.js +0 -1
  89. package/dist/phases/run-sheet/index.d.ts +15 -0
  90. package/dist/phases/run-sheet/index.js +161 -29
  91. package/dist/phases/run-sheet/render.d.ts +23 -5
  92. package/dist/phases/run-sheet/render.js +195 -31
  93. package/dist/phases/smoke-test/__tests__/agent.test.d.ts +4 -0
  94. package/dist/phases/smoke-test/__tests__/agent.test.js +84 -0
  95. package/dist/phases/smoke-test/__tests__/github.test.d.ts +9 -0
  96. package/dist/phases/smoke-test/__tests__/github.test.js +120 -0
  97. package/dist/phases/smoke-test/__tests__/snapshot.test.d.ts +8 -0
  98. package/dist/phases/smoke-test/__tests__/snapshot.test.js +93 -0
  99. package/dist/phases/smoke-test/agent.js +2 -4
  100. package/dist/phases/smoke-test/github.d.ts +54 -0
  101. package/dist/phases/smoke-test/github.js +101 -0
  102. package/dist/phases/smoke-test/index.js +11 -6
  103. package/dist/phases/smoke-test/snapshot.d.ts +27 -0
  104. package/dist/phases/smoke-test/snapshot.js +157 -0
  105. package/dist/phases/technical-design/index.js +1 -2
  106. package/dist/phases/test-cases-analysis/index.js +1 -2
  107. package/dist/phases/user-stories-analysis/index.js +1 -2
  108. package/dist/services/coaching/__tests__/coaching-agent.test.d.ts +1 -0
  109. package/dist/services/coaching/__tests__/coaching-agent.test.js +74 -0
  110. package/dist/services/coaching/__tests__/coaching-loop.test.d.ts +1 -0
  111. package/dist/services/coaching/__tests__/coaching-loop.test.js +59 -0
  112. package/dist/services/coaching/__tests__/self-rating.test.d.ts +1 -0
  113. package/dist/services/coaching/__tests__/self-rating.test.js +188 -0
  114. package/dist/services/lifecycle-agent/__tests__/phase-criteria.test.d.ts +4 -0
  115. package/dist/services/lifecycle-agent/__tests__/phase-criteria.test.js +133 -0
  116. package/dist/services/lifecycle-agent/__tests__/transition-rules.test.d.ts +4 -0
  117. package/dist/services/lifecycle-agent/__tests__/transition-rules.test.js +336 -0
  118. package/dist/services/lifecycle-agent/index.d.ts +24 -0
  119. package/dist/services/lifecycle-agent/index.js +25 -0
  120. package/dist/services/lifecycle-agent/phase-criteria.d.ts +57 -0
  121. package/dist/services/lifecycle-agent/phase-criteria.js +335 -0
  122. package/dist/services/lifecycle-agent/transition-rules.d.ts +60 -0
  123. package/dist/services/lifecycle-agent/transition-rules.js +184 -0
  124. package/dist/services/lifecycle-agent/types.d.ts +190 -0
  125. package/dist/services/lifecycle-agent/types.js +12 -0
  126. package/dist/services/phase-hooks/__tests__/bindings-fetcher.test.d.ts +1 -0
  127. package/dist/services/phase-hooks/__tests__/bindings-fetcher.test.js +122 -0
  128. package/dist/services/phase-hooks/__tests__/hook-executor.test.d.ts +1 -0
  129. package/dist/services/phase-hooks/__tests__/hook-executor.test.js +321 -0
  130. package/dist/services/phase-hooks/__tests__/hook-runner.test.d.ts +1 -0
  131. package/dist/services/phase-hooks/__tests__/hook-runner.test.js +261 -0
  132. package/dist/services/phase-hooks/__tests__/plugin-loader.test.d.ts +1 -0
  133. package/dist/services/phase-hooks/__tests__/plugin-loader.test.js +158 -0
  134. package/dist/services/video/__tests__/video-pipeline.test.d.ts +6 -0
  135. package/dist/services/video/__tests__/video-pipeline.test.js +249 -0
  136. package/dist/workspace/__tests__/workspace-manager.test.d.ts +7 -0
  137. package/dist/workspace/__tests__/workspace-manager.test.js +52 -0
  138. package/dist/workspace/workspace-manager.js +17 -4
  139. package/package.json +1 -1
  140. package/.env.local +0 -12
@@ -0,0 +1,157 @@
1
+ /**
2
+ * Snapshot detection: after the latest GitHub release already has a
3
+ * smoke-test plan, inspect the cloned repo for a yet-to-be-released
4
+ * "snapshot" version (next package.json version, an [Unreleased] entry
5
+ * in CHANGELOG, an unreleased git tag, etc.) so we can prepare the
6
+ * next smoke test before anyone cuts the tag on GitHub.
7
+ */
8
+ import { query } from '@anthropic-ai/claude-agent-sdk';
9
+ import { DEFAULT_MODEL } from '../../constants.js';
10
+ import { logDebug, logInfo } from '../../utils/logger.js';
11
+ import { findBalancedJsonObject } from './agent.js';
12
+ export function buildSnapshotDetectionPrompt(latestReleaseTag) {
13
+ return `You are inspecting a code repository to decide whether an unreleased "snapshot" version is being prepared.
14
+
15
+ The most recent shipped release on GitHub is tagged: **${latestReleaseTag}**
16
+
17
+ Your job:
18
+
19
+ 1. Read the repository's primary version source. In priority order:
20
+ - package.json "version" field (JS/TS projects)
21
+ - Cargo.toml [package] version (Rust)
22
+ - pyproject.toml [project] / [tool.poetry] version (Python)
23
+ - pom.xml <version> (Java / Maven)
24
+ - VERSION file (plain text)
25
+ 2. Read CHANGELOG.md / HISTORY.md / RELEASES.md for an [Unreleased] or similarly labelled section.
26
+ 3. Run \`git tag --list\` and \`git tag --sort=-creatordate | head -5\` to look for tags newer than ${latestReleaseTag} that have not yet been cut as GitHub releases.
27
+
28
+ Then decide:
29
+ - If the primary version source is strictly greater than ${latestReleaseTag} (accounting for "v" prefixes and semver prerelease suffixes like \`-SNAPSHOT\`, \`-rc.1\`, \`-next.0\`), OR there is a newer unreleased git tag, report that string as \`snapshot_tag\`.
30
+ - Otherwise report \`snapshot_tag: null\`.
31
+ - Do NOT invent a version. Use the exact string from the source file. Preserve the repo's existing tag convention (e.g. if releases are \`v1.2.3\` but package.json says \`1.2.3\`, return \`v1.2.3\`).
32
+
33
+ Respond with ONLY a JSON object — no prose, no markdown fences:
34
+
35
+ {
36
+ "snapshot_tag": "v2.0.0" | null,
37
+ "source": "package.json" | "Cargo.toml" | "pyproject.toml" | "pom.xml" | "VERSION" | "CHANGELOG" | "git_tag" | null,
38
+ "reasoning": "<one sentence>"
39
+ }`;
40
+ }
41
+ export function parseSnapshotDetection(raw) {
42
+ let body = raw.trim();
43
+ const fence = body.match(/```(?:json)?\s*([\s\S]*?)```/);
44
+ if (fence) {
45
+ body = fence[1].trim();
46
+ }
47
+ let parsed;
48
+ try {
49
+ parsed = JSON.parse(body);
50
+ }
51
+ catch {
52
+ const object = findBalancedJsonObject(body);
53
+ if (!object) {
54
+ throw new Error('No JSON object found in snapshot-detection output');
55
+ }
56
+ parsed = JSON.parse(object);
57
+ }
58
+ if (typeof parsed !== 'object' || parsed === null) {
59
+ throw new Error('Snapshot detection returned non-object response');
60
+ }
61
+ const obj = parsed;
62
+ const rawTag = obj.snapshot_tag;
63
+ const snapshotTag = typeof rawTag === 'string' && rawTag.trim().length > 0
64
+ ? rawTag.trim()
65
+ : null;
66
+ const source = typeof obj.source === 'string' ? obj.source : null;
67
+ const reasoning = typeof obj.reasoning === 'string' ? obj.reasoning : '(no reasoning given)';
68
+ return { snapshot_tag: snapshotTag, source, reasoning };
69
+ }
70
+ /**
71
+ * Very loose sanity check on a proposed snapshot tag. We reject tags
72
+ * that are obviously the same as the latest release or that contain
73
+ * characters that would break downstream git / GitHub API calls.
74
+ */
75
+ export function isPlausibleSnapshotTag(candidate, latestReleaseTag) {
76
+ if (candidate === latestReleaseTag) {
77
+ return false;
78
+ }
79
+ if (candidate.length === 0 || candidate.length > 100) {
80
+ return false;
81
+ }
82
+ if (/\s/.test(candidate)) {
83
+ return false;
84
+ }
85
+ if (/^[-.]/.test(candidate) || candidate.includes('..') || candidate.includes('@{')) {
86
+ return false;
87
+ }
88
+ return /^[A-Za-z0-9._\-+/@]+$/.test(candidate);
89
+ }
90
+ function userMessage(content) {
91
+ return { type: 'user', message: { role: 'user', content } };
92
+ }
93
+ // eslint-disable-next-line @typescript-eslint/require-await -- async generator required by SDK interface
94
+ async function* makePrompt(text) {
95
+ yield userMessage(text);
96
+ }
97
+ // eslint-disable-next-line complexity -- agent loop with message-type handling
98
+ export async function detectSnapshotVersion(options) {
99
+ const { cwd, latestReleaseTag, config, verbose } = options;
100
+ if (verbose) {
101
+ logInfo(`Detecting snapshot version ahead of ${latestReleaseTag}...`);
102
+ }
103
+ let lastAssistant = '';
104
+ let detection = null;
105
+ for await (const message of query({
106
+ prompt: makePrompt(buildSnapshotDetectionPrompt(latestReleaseTag)),
107
+ options: {
108
+ systemPrompt: {
109
+ type: 'preset',
110
+ preset: 'claude_code',
111
+ },
112
+ model: config.model || DEFAULT_MODEL,
113
+ maxTurns: 10,
114
+ permissionMode: 'bypassPermissions',
115
+ cwd,
116
+ },
117
+ })) {
118
+ if (message.type === 'assistant' && message.message?.content) {
119
+ for (const content of message.message.content) {
120
+ if (content.type === 'text') {
121
+ lastAssistant += `${content.text}\n`;
122
+ logDebug(content.text, verbose);
123
+ }
124
+ }
125
+ }
126
+ if (message.type === 'result') {
127
+ const text = message.result || lastAssistant;
128
+ try {
129
+ detection = parseSnapshotDetection(text);
130
+ }
131
+ catch (err) {
132
+ if (verbose) {
133
+ logDebug(`Snapshot detection parse error: ${err instanceof Error ? err.message : String(err)}`, verbose);
134
+ }
135
+ }
136
+ }
137
+ }
138
+ if (!detection) {
139
+ return {
140
+ snapshot_tag: null,
141
+ source: null,
142
+ reasoning: 'Could not parse detection response',
143
+ };
144
+ }
145
+ // Validate the proposed tag before returning.
146
+ if (detection.snapshot_tag !== null &&
147
+ !isPlausibleSnapshotTag(detection.snapshot_tag, latestReleaseTag)) {
148
+ if (verbose) {
149
+ logInfo(`Rejecting implausible snapshot tag: ${JSON.stringify(detection.snapshot_tag)}`);
150
+ }
151
+ return {
152
+ ...detection,
153
+ snapshot_tag: null,
154
+ };
155
+ }
156
+ return detection;
157
+ }
@@ -24,9 +24,8 @@ async function* prompt(analysisPrompt) {
24
24
  setTimeout(res, 10000);
25
25
  });
26
26
  }
27
- export const generateTechnicalDesign = async (options, config, checklistContext
28
27
  // eslint-disable-next-line complexity
29
- ) => {
28
+ export const generateTechnicalDesign = async (options, config, checklistContext) => {
30
29
  const { featureId, verbose } = options;
31
30
  if (verbose) {
32
31
  logInfo(`Starting technical design generation for feature ID: ${featureId}`);
@@ -6,9 +6,8 @@ import { executeTestCasesAnalysisQuery, parseAnalysisResult } from './agent.js';
6
6
  import { prepareTestCasesAnalysisContext } from './context.js';
7
7
  import { buildTestCasesAnalysisResult, deleteSpecificTestCases, deleteTestCaseArtifacts, getAllDraftTestCaseIds, resetReadyTestCasesToDraft, saveTestCasesAsDraft, updateTestCasesToReady, } from './outcome.js';
8
8
  import { createTestCasesAnalysisSystemPrompt } from './prompts.js';
9
- export const analyseTestCases = async (options, config, checklistContext
10
9
  // eslint-disable-next-line complexity
11
- ) => {
10
+ export const analyseTestCases = async (options, config, checklistContext) => {
12
11
  const { featureId, verbose } = options;
13
12
  if (verbose) {
14
13
  logInfo(`Starting test cases analysis for feature ID: ${featureId}`);
@@ -6,9 +6,8 @@ import { executeUserStoriesAnalysisQuery, parseAnalysisResult, } from './agent.j
6
6
  import { prepareUserStoriesAnalysisContext } from './context.js';
7
7
  import { buildUserStoriesAnalysisResult, deleteSpecificUserStories, deleteUserStoryArtifacts, getAllDraftUserStoryIds, resetReadyUserStoriesToDraft, saveUserStoriesAsDraft, updateUserStoriesToReady, } from './outcome.js';
8
8
  import { createUserStoriesAnalysisSystemPrompt } from './prompts.js';
9
- export const analyseUserStories = async (options, config, checklistContext
10
9
  // eslint-disable-next-line complexity
11
- ) => {
10
+ export const analyseUserStories = async (options, config, checklistContext) => {
12
11
  const { featureId, verbose } = options;
13
12
  if (verbose) {
14
13
  logInfo(`Starting user stories analysis for feature ID: ${featureId}`);
@@ -0,0 +1,74 @@
1
+ import assert from 'node:assert';
2
+ import { describe, it } from 'node:test';
3
+ import { buildImprovementPrompt, } from '../coaching-agent.js';
4
+ void describe('buildImprovementPrompt', () => {
5
+ const decision = {
6
+ shouldContinue: true,
7
+ reasoning: 'Completeness is weak, needs more coverage',
8
+ focusAreas: ['completeness', 'accuracy'],
9
+ suggestions: [
10
+ 'Add error handling section',
11
+ 'Include database migration plan',
12
+ ],
13
+ };
14
+ const rating = {
15
+ score: 72,
16
+ summary: 'Decent but incomplete',
17
+ criteria_scores: {
18
+ completeness: { score: 60, reason: 'Missing sections' },
19
+ accuracy: { score: 75, reason: 'Minor issues' },
20
+ quality: { score: 82, reason: 'Good structure' },
21
+ },
22
+ strengths: ['Clean structure'],
23
+ weaknesses: ['Missing error handling', 'No migration plan'],
24
+ };
25
+ void it('includes current score and target', () => {
26
+ const prompt = buildImprovementPrompt(decision, rating);
27
+ assert.ok(prompt.includes('72/100'));
28
+ assert.ok(prompt.includes('100/100'));
29
+ });
30
+ void it('includes coaching reasoning', () => {
31
+ const prompt = buildImprovementPrompt(decision, rating);
32
+ assert.ok(prompt.includes('Completeness is weak, needs more coverage'));
33
+ });
34
+ void it('includes focus areas with scores', () => {
35
+ const prompt = buildImprovementPrompt(decision, rating);
36
+ assert.ok(prompt.includes('completeness'));
37
+ assert.ok(prompt.includes('60/100'));
38
+ assert.ok(prompt.includes('accuracy'));
39
+ assert.ok(prompt.includes('75/100'));
40
+ });
41
+ void it('includes specific suggestions', () => {
42
+ const prompt = buildImprovementPrompt(decision, rating);
43
+ assert.ok(prompt.includes('Add error handling section'));
44
+ assert.ok(prompt.includes('Include database migration plan'));
45
+ });
46
+ void it('includes weaknesses to address', () => {
47
+ const prompt = buildImprovementPrompt(decision, rating);
48
+ assert.ok(prompt.includes('Missing error handling'));
49
+ assert.ok(prompt.includes('No migration plan'));
50
+ });
51
+ void it('handles empty focus areas gracefully', () => {
52
+ const emptyDecision = {
53
+ shouldContinue: true,
54
+ reasoning: 'General improvement needed',
55
+ focusAreas: [],
56
+ suggestions: ['Improve overall'],
57
+ };
58
+ const prompt = buildImprovementPrompt(emptyDecision, rating);
59
+ assert.ok(prompt.includes('General improvement needed'));
60
+ assert.ok(prompt.includes('Improve overall'));
61
+ });
62
+ void it('handles focus areas not present in criteria_scores', () => {
63
+ const decisionWithUnknown = {
64
+ shouldContinue: true,
65
+ reasoning: 'test',
66
+ focusAreas: ['nonexistent_criterion'],
67
+ suggestions: ['test'],
68
+ };
69
+ const prompt = buildImprovementPrompt(decisionWithUnknown, rating);
70
+ // Should not crash, should show '?' for missing score
71
+ assert.ok(prompt.includes('nonexistent_criterion'));
72
+ assert.ok(prompt.includes('?/100'));
73
+ });
74
+ });
@@ -0,0 +1,59 @@
1
+ import assert from 'node:assert';
2
+ import { describe, it } from 'node:test';
3
+ import { isStagnant } from '../coaching-loop.js';
4
+ function makeRating(score) {
5
+ return {
6
+ score,
7
+ summary: `Score: ${score}`,
8
+ criteria_scores: {},
9
+ strengths: [],
10
+ weaknesses: [],
11
+ };
12
+ }
13
+ void describe('isStagnant', () => {
14
+ void it('returns false when fewer than 3 ratings', () => {
15
+ assert.strictEqual(isStagnant([]), false);
16
+ assert.strictEqual(isStagnant([makeRating(50)]), false);
17
+ assert.strictEqual(isStagnant([makeRating(50), makeRating(52)]), false);
18
+ });
19
+ void it('returns true when 3 ratings with < 3 point improvement', () => {
20
+ const ratings = [makeRating(80), makeRating(81), makeRating(82)];
21
+ assert.strictEqual(isStagnant(ratings), true);
22
+ });
23
+ void it('returns true when 3 ratings with zero improvement', () => {
24
+ const ratings = [makeRating(85), makeRating(85), makeRating(85)];
25
+ assert.strictEqual(isStagnant(ratings), true);
26
+ });
27
+ void it('returns false when improvement >= 3 points', () => {
28
+ // 73 - 70 = 3, and MIN_IMPROVEMENT = 3, so 3 < 3 is false → not stagnant
29
+ const ratings = [makeRating(70), makeRating(72), makeRating(73)];
30
+ assert.strictEqual(isStagnant(ratings), false);
31
+ });
32
+ void it('returns false when improvement exactly equals threshold', () => {
33
+ const ratings = [makeRating(70), makeRating(72), makeRating(73)];
34
+ // improvement = 73 - 70 = 3, MIN_IMPROVEMENT = 3
35
+ // 3 < 3 is false → not stagnant
36
+ assert.strictEqual(isStagnant(ratings), false);
37
+ });
38
+ void it('returns false when improvement exceeds threshold', () => {
39
+ const ratings = [makeRating(60), makeRating(65), makeRating(70)];
40
+ assert.strictEqual(isStagnant(ratings), false);
41
+ });
42
+ void it('only looks at last STAGNATION_WINDOW ratings', () => {
43
+ // Early ratings had big jumps, but last 3 are flat
44
+ const ratings = [
45
+ makeRating(30),
46
+ makeRating(50),
47
+ makeRating(70),
48
+ makeRating(71),
49
+ makeRating(72),
50
+ ];
51
+ // Last 3: 70, 71, 72 → improvement = 2 < 3 → stagnant
52
+ assert.strictEqual(isStagnant(ratings), true);
53
+ });
54
+ void it('handles decreasing scores as stagnant', () => {
55
+ const ratings = [makeRating(80), makeRating(78), makeRating(79)];
56
+ // improvement = 79 - 80 = -1 < 3 → stagnant
57
+ assert.strictEqual(isStagnant(ratings), true);
58
+ });
59
+ });
@@ -0,0 +1,188 @@
1
+ import assert from 'node:assert';
2
+ import { describe, it } from 'node:test';
3
+ import { buildSelfRatingPrompt, getCriteriaForPhase, parseSelfRating, } from '../self-rating.js';
4
+ void describe('getCriteriaForPhase', () => {
5
+ void it('returns analysis criteria for feature_analysis', () => {
6
+ const criteria = getCriteriaForPhase('feature_analysis');
7
+ assert.ok(criteria.includes('coverage'));
8
+ assert.ok(criteria.includes('clarity'));
9
+ assert.ok(criteria.includes('completeness'));
10
+ });
11
+ void it('returns analysis criteria for user_stories_analysis', () => {
12
+ const criteria = getCriteriaForPhase('user_stories_analysis');
13
+ assert.ok(criteria.includes('coverage'));
14
+ });
15
+ void it('returns analysis criteria for test_cases_analysis', () => {
16
+ const criteria = getCriteriaForPhase('test_cases_analysis');
17
+ assert.ok(criteria.includes('clarity'));
18
+ });
19
+ void it('returns design criteria for technical_design', () => {
20
+ const criteria = getCriteriaForPhase('technical_design');
21
+ assert.ok(criteria.includes('feasibility'));
22
+ assert.ok(criteria.includes('scalability'));
23
+ assert.ok(criteria.includes('maintainability'));
24
+ });
25
+ void it('returns design criteria for branch_planning', () => {
26
+ const criteria = getCriteriaForPhase('branch_planning');
27
+ assert.ok(criteria.includes('feasibility'));
28
+ });
29
+ void it('returns code criteria for code_implementation', () => {
30
+ const criteria = getCriteriaForPhase('code_implementation');
31
+ assert.ok(criteria.includes('readability'));
32
+ assert.ok(criteria.includes('testability'));
33
+ assert.ok(criteria.includes('performance'));
34
+ assert.ok(criteria.includes('security'));
35
+ });
36
+ void it('returns code criteria for code_refine', () => {
37
+ const criteria = getCriteriaForPhase('code_refine');
38
+ assert.ok(criteria.includes('readability'));
39
+ });
40
+ void it('returns common criteria for autonomous', () => {
41
+ const criteria = getCriteriaForPhase('autonomous');
42
+ assert.ok(criteria.includes('completeness'));
43
+ assert.ok(criteria.includes('accuracy'));
44
+ assert.ok(criteria.includes('quality'));
45
+ assert.ok(criteria.includes('consistency'));
46
+ });
47
+ void it('all criteria sets include common criteria', () => {
48
+ const phases = [
49
+ 'feature_analysis',
50
+ 'technical_design',
51
+ 'code_implementation',
52
+ 'autonomous',
53
+ ];
54
+ for (const phase of phases) {
55
+ const criteria = getCriteriaForPhase(phase);
56
+ assert.ok(criteria.includes('completeness'), `${phase} missing completeness`);
57
+ assert.ok(criteria.includes('accuracy'), `${phase} missing accuracy`);
58
+ assert.ok(criteria.includes('quality'), `${phase} missing quality`);
59
+ assert.ok(criteria.includes('consistency'), `${phase} missing consistency`);
60
+ }
61
+ });
62
+ });
63
+ void describe('buildSelfRatingPrompt', () => {
64
+ void it('includes phase-specific criteria', () => {
65
+ const prompt = buildSelfRatingPrompt('technical_design');
66
+ assert.ok(prompt.includes('"feasibility"'));
67
+ assert.ok(prompt.includes('"scalability"'));
68
+ assert.ok(prompt.includes('"maintainability"'));
69
+ });
70
+ void it('includes JSON structure template', () => {
71
+ const prompt = buildSelfRatingPrompt('code_implementation');
72
+ assert.ok(prompt.includes('"self_rating"'));
73
+ assert.ok(prompt.includes('"score"'));
74
+ assert.ok(prompt.includes('"criteria_scores"'));
75
+ assert.ok(prompt.includes('"strengths"'));
76
+ assert.ok(prompt.includes('"weaknesses"'));
77
+ });
78
+ void it('asks for 0-100 scores', () => {
79
+ const prompt = buildSelfRatingPrompt('feature_analysis');
80
+ assert.ok(prompt.includes('0-100'));
81
+ });
82
+ });
83
+ void describe('parseSelfRating', () => {
84
+ void it('parses valid JSON in markdown code block', () => {
85
+ const response = `Here is my self-rating:
86
+
87
+ \`\`\`json
88
+ {
89
+ "self_rating": {
90
+ "score": 85,
91
+ "summary": "Good coverage but some gaps",
92
+ "criteria_scores": {
93
+ "completeness": { "score": 90, "reason": "Covered all cases" },
94
+ "accuracy": { "score": 80, "reason": "Minor issues" }
95
+ },
96
+ "strengths": ["Good structure", "Clear naming"],
97
+ "weaknesses": ["Missing edge cases"]
98
+ }
99
+ }
100
+ \`\`\``;
101
+ const rating = parseSelfRating(response, 'technical_design');
102
+ assert.ok(rating);
103
+ assert.strictEqual(rating.score, 85);
104
+ assert.strictEqual(rating.summary, 'Good coverage but some gaps');
105
+ assert.strictEqual(rating.criteria_scores.completeness.score, 90);
106
+ assert.strictEqual(rating.criteria_scores.accuracy.score, 80);
107
+ assert.deepStrictEqual(rating.strengths, ['Good structure', 'Clear naming']);
108
+ assert.deepStrictEqual(rating.weaknesses, ['Missing edge cases']);
109
+ });
110
+ void it('parses raw JSON without code block', () => {
111
+ const response = JSON.stringify({
112
+ self_rating: {
113
+ score: 72,
114
+ summary: 'Needs work',
115
+ criteria_scores: {},
116
+ strengths: [],
117
+ weaknesses: ['Poor coverage'],
118
+ },
119
+ });
120
+ const rating = parseSelfRating(response, 'feature_analysis');
121
+ assert.ok(rating);
122
+ assert.strictEqual(rating.score, 72);
123
+ assert.strictEqual(rating.summary, 'Needs work');
124
+ });
125
+ void it('rounds score to integer', () => {
126
+ const response = JSON.stringify({
127
+ self_rating: {
128
+ score: 85.7,
129
+ summary: 'test',
130
+ criteria_scores: {},
131
+ strengths: [],
132
+ weaknesses: [],
133
+ },
134
+ });
135
+ const rating = parseSelfRating(response, 'technical_design');
136
+ assert.ok(rating);
137
+ assert.strictEqual(rating.score, 86);
138
+ });
139
+ void it('returns null for invalid score above 100', () => {
140
+ const response = JSON.stringify({
141
+ self_rating: {
142
+ score: 150,
143
+ summary: 'test',
144
+ criteria_scores: {},
145
+ strengths: [],
146
+ weaknesses: [],
147
+ },
148
+ });
149
+ const rating = parseSelfRating(response, 'technical_design');
150
+ assert.strictEqual(rating, null);
151
+ });
152
+ void it('returns null for negative score', () => {
153
+ const response = JSON.stringify({
154
+ self_rating: {
155
+ score: -5,
156
+ summary: 'test',
157
+ criteria_scores: {},
158
+ strengths: [],
159
+ weaknesses: [],
160
+ },
161
+ });
162
+ const rating = parseSelfRating(response, 'technical_design');
163
+ assert.strictEqual(rating, null);
164
+ });
165
+ void it('returns null for non-JSON response', () => {
166
+ const rating = parseSelfRating('This is not JSON at all', 'technical_design');
167
+ assert.strictEqual(rating, null);
168
+ });
169
+ void it('returns null for empty response', () => {
170
+ const rating = parseSelfRating('', 'technical_design');
171
+ assert.strictEqual(rating, null);
172
+ });
173
+ void it('handles missing optional fields gracefully', () => {
174
+ const response = JSON.stringify({
175
+ self_rating: {
176
+ score: 60,
177
+ criteria_scores: { completeness: { score: 60, reason: 'ok' } },
178
+ },
179
+ });
180
+ const rating = parseSelfRating(response, 'technical_design');
181
+ assert.ok(rating);
182
+ assert.strictEqual(rating.score, 60);
183
+ assert.strictEqual(rating.summary, '');
184
+ assert.deepStrictEqual(rating.strengths, []);
185
+ assert.deepStrictEqual(rating.weaknesses, []);
186
+ assert.strictEqual(rating.criteria_scores.completeness.score, 60);
187
+ });
188
+ });
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Unit tests for phase quality criteria definitions
3
+ */
4
+ export {};
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Unit tests for phase quality criteria definitions
3
+ */
4
+ import { describe, it } from 'node:test';
5
+ import assert from 'node:assert';
6
+ import { DEFAULT_PHASE_CRITERIA, USER_STORIES_ANALYSIS_CRITERIA, TEST_CASES_ANALYSIS_CRITERIA, TECHNICAL_DESIGN_CRITERIA, BRANCH_PLANNING_CRITERIA, CODE_IMPLEMENTATION_CRITERIA, FUNCTIONAL_TESTING_CRITERIA, CODE_REVIEW_CRITERIA, getPhaseQualityCriteria, } from '../phase-criteria.js';
7
+ describe('Phase Quality Criteria', () => {
8
+ describe('DEFAULT_PHASE_CRITERIA', () => {
9
+ it('should cover all evaluable phases', () => {
10
+ const expectedPhases = [
11
+ 'user_stories_analysis',
12
+ 'test_cases_analysis',
13
+ 'technical_design',
14
+ 'branch_planning',
15
+ 'code_implementation',
16
+ 'functional_testing',
17
+ 'code_review',
18
+ ];
19
+ for (const phase of expectedPhases) {
20
+ assert.ok(phase in DEFAULT_PHASE_CRITERIA, `Should have criteria for phase: ${phase}`);
21
+ }
22
+ });
23
+ it('should have consistent phase names in criteria objects', () => {
24
+ for (const [key, criteria] of Object.entries(DEFAULT_PHASE_CRITERIA)) {
25
+ assert.strictEqual(criteria.phase, key, `Criteria for ${key} should have matching phase name`);
26
+ }
27
+ });
28
+ });
29
+ describe('Criteria Structural Validity', () => {
30
+ const allCriteria = [
31
+ USER_STORIES_ANALYSIS_CRITERIA,
32
+ TEST_CASES_ANALYSIS_CRITERIA,
33
+ TECHNICAL_DESIGN_CRITERIA,
34
+ BRANCH_PLANNING_CRITERIA,
35
+ CODE_IMPLEMENTATION_CRITERIA,
36
+ FUNCTIONAL_TESTING_CRITERIA,
37
+ CODE_REVIEW_CRITERIA,
38
+ ];
39
+ for (const phaseCriteria of allCriteria) {
40
+ describe(phaseCriteria.phase, () => {
41
+ it('should have advanceThreshold > escalateThreshold', () => {
42
+ assert.ok(phaseCriteria.advanceThreshold > phaseCriteria.escalateThreshold, `advanceThreshold (${phaseCriteria.advanceThreshold}) should be > escalateThreshold (${phaseCriteria.escalateThreshold})`);
43
+ });
44
+ it('should have thresholds in valid range (0-100)', () => {
45
+ assert.ok(phaseCriteria.advanceThreshold >= 0);
46
+ assert.ok(phaseCriteria.advanceThreshold <= 100);
47
+ assert.ok(phaseCriteria.escalateThreshold >= 0);
48
+ assert.ok(phaseCriteria.escalateThreshold <= 100);
49
+ });
50
+ it('should have maxAutoRetries >= 1', () => {
51
+ assert.ok(phaseCriteria.maxAutoRetries >= 1, `maxAutoRetries should be >= 1, got ${phaseCriteria.maxAutoRetries}`);
52
+ });
53
+ it('should have at least one criterion', () => {
54
+ assert.ok(phaseCriteria.criteria.length > 0, 'Should have at least one criterion');
55
+ });
56
+ it('should have criteria weights that approximately sum to 1', () => {
57
+ const totalWeight = phaseCriteria.criteria.reduce((sum, c) => sum + c.weight, 0);
58
+ assert.ok(Math.abs(totalWeight - 1.0) < 0.01, `Weights should sum to ~1.0, got ${totalWeight}`);
59
+ });
60
+ it('should have unique criterion IDs', () => {
61
+ const ids = phaseCriteria.criteria.map((c) => c.id);
62
+ const uniqueIds = new Set(ids);
63
+ assert.strictEqual(ids.length, uniqueIds.size, 'Criterion IDs should be unique');
64
+ });
65
+ it('should have valid criterion weights (0 < weight <= 1)', () => {
66
+ for (const criterion of phaseCriteria.criteria) {
67
+ assert.ok(criterion.weight > 0 && criterion.weight <= 1, `Weight for ${criterion.id} should be between 0 and 1, got ${criterion.weight}`);
68
+ }
69
+ });
70
+ it('should have valid minimum scores (0-100)', () => {
71
+ for (const criterion of phaseCriteria.criteria) {
72
+ assert.ok(criterion.minimumScore >= 0 && criterion.minimumScore <= 100, `minimumScore for ${criterion.id} should be 0-100, got ${criterion.minimumScore}`);
73
+ }
74
+ });
75
+ it('should have non-empty evaluation guidance', () => {
76
+ for (const criterion of phaseCriteria.criteria) {
77
+ assert.ok(criterion.evaluationGuidance.length > 0, `Criterion ${criterion.id} should have evaluation guidance`);
78
+ }
79
+ });
80
+ });
81
+ }
82
+ });
83
+ describe('getPhaseQualityCriteria', () => {
84
+ it('should return default criteria for known phases', () => {
85
+ const criteria = getPhaseQualityCriteria('user_stories_analysis');
86
+ assert.ok(criteria);
87
+ assert.strictEqual(criteria.phase, 'user_stories_analysis');
88
+ assert.strictEqual(criteria.advanceThreshold, USER_STORIES_ANALYSIS_CRITERIA.advanceThreshold);
89
+ });
90
+ it('should return null for unknown phases', () => {
91
+ const criteria = getPhaseQualityCriteria('nonexistent_phase');
92
+ assert.strictEqual(criteria, null);
93
+ });
94
+ it('should apply overrides when provided', () => {
95
+ const criteria = getPhaseQualityCriteria('user_stories_analysis', {
96
+ user_stories_analysis: {
97
+ advanceThreshold: 90,
98
+ maxAutoRetries: 5,
99
+ },
100
+ });
101
+ assert.ok(criteria);
102
+ assert.strictEqual(criteria.advanceThreshold, 90);
103
+ assert.strictEqual(criteria.maxAutoRetries, 5);
104
+ // Non-overridden values should remain from defaults
105
+ assert.strictEqual(criteria.escalateThreshold, USER_STORIES_ANALYSIS_CRITERIA.escalateThreshold);
106
+ });
107
+ it('should return defaults when override map does not include the phase', () => {
108
+ const criteria = getPhaseQualityCriteria('technical_design', {
109
+ user_stories_analysis: { advanceThreshold: 90 },
110
+ });
111
+ assert.ok(criteria);
112
+ assert.strictEqual(criteria.advanceThreshold, TECHNICAL_DESIGN_CRITERIA.advanceThreshold);
113
+ });
114
+ it('should override criteria array when provided', () => {
115
+ const customCriteria = [
116
+ {
117
+ id: 'custom_1',
118
+ name: 'Custom',
119
+ description: 'A custom criterion',
120
+ weight: 1.0,
121
+ minimumScore: 50,
122
+ evaluationGuidance: 'Custom guidance',
123
+ },
124
+ ];
125
+ const criteria = getPhaseQualityCriteria('technical_design', {
126
+ technical_design: { criteria: customCriteria },
127
+ });
128
+ assert.ok(criteria);
129
+ assert.strictEqual(criteria.criteria.length, 1);
130
+ assert.strictEqual(criteria.criteria[0].id, 'custom_1');
131
+ });
132
+ });
133
+ });
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Unit tests for lifecycle agent transition rules and decision logic
3
+ */
4
+ export {};