gsd-lite 0.5.12 → 0.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,7 @@
13
13
  "name": "gsd",
14
14
  "source": "./",
15
15
  "description": "AI orchestration tool — GSD management shell + Superpowers quality core. 5 commands, 4 agents, 5 workflows, MCP server, context monitoring.",
16
- "version": "0.5.12",
16
+ "version": "0.5.14",
17
17
  "keywords": [
18
18
  "orchestration",
19
19
  "mcp",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gsd",
3
- "version": "0.5.12",
3
+ "version": "0.5.14",
4
4
  "description": "AI orchestration tool for Claude Code — GSD management shell + Superpowers quality core",
5
5
  "author": {
6
6
  "name": "sdsrss",
package/README.md CHANGED
@@ -17,7 +17,7 @@ GSD-Lite is an AI orchestration tool for [Claude Code](https://docs.anthropic.co
17
17
  ### Quality Discipline (Built-in, Not Optional)
18
18
  - **TDD enforcement** — "No production code without a failing test first" baked into every executor dispatch
19
19
  - **Anti-rationalization guards** — Red-flag checklists inline in every agent prompt, blocking common excuses to skip process
20
- - **Multi-level code review** — L0 self-review / L1 phase-batch review / L2 immediate independent review
20
+ - **Multi-level code review** — L0 self-review / L1 phase-batch review / L2 immediate independent review / phase review retry limit
21
21
  - **Contract change propagation** — When an API contract changes, downstream tasks automatically invalidate
22
22
 
23
23
  ### Intelligent Failure Recovery
@@ -26,6 +26,12 @@ GSD-Lite is an AI orchestration tool for [Claude Code](https://docs.anthropic.co
26
26
  - **Blocked task handling** — Blocked tasks are parked; execution continues with remaining tasks
27
27
  - **Rework propagation** — Critical review issues cascade invalidation to dependent tasks
28
28
 
29
+ ### Adaptive Review & Parallel Execution
30
+ - **Confidence-based review adjustment** — Executor self-assesses confidence (high/medium/low); orchestrator auto-adjusts review level with evidence cross-validation
31
+ - **Impact analysis before review** — Reviewer runs impact analysis on multi-file changes to catch missed downstream effects
32
+ - **Parallel task scheduling** — Independent tasks within the same phase are identified for concurrent dispatch
33
+ - **Auto PR suggestion** — Phase/project completion prompts PR creation with evidence summary
34
+
29
35
  ### Context Protection
30
36
  - **Subagent isolation** — Each task runs in its own agent context, preventing cross-contamination
31
37
  - **StatusLine monitoring** — Real-time context health tracking via Claude Code StatusLine
@@ -243,7 +249,7 @@ gsd-lite/
243
249
  ├── references/ # 8 reference docs
244
250
  ├── hooks/ # Session lifecycle (StatusLine + PostToolUse + SessionStart + Stop + AutoUpdate)
245
251
  │ └── lib/ # Shared hook utilities (gsd-finder)
246
- ├── tests/ # 804 tests (unit + simulation + E2E)
252
+ ├── tests/ # 826 tests (unit + simulation + E2E)
247
253
  ├── cli.js # Install/uninstall CLI entry
248
254
  ├── install.js # Installation script
249
255
  └── uninstall.js # Uninstall script
@@ -252,8 +258,8 @@ gsd-lite/
252
258
  ## Testing
253
259
 
254
260
  ```bash
255
- npm test # Run all 804 tests
256
- npm run test:coverage # Tests + coverage report (94%+ lines, 81%+ branches)
261
+ npm test # Run all 826 tests
262
+ npm run test:coverage # Tests + coverage report (94%+ lines, 83%+ branches)
257
263
  npm run lint # Biome lint
258
264
  node --test tests/file.js # Run a single test file
259
265
  ```
@@ -55,6 +55,7 @@ tools: Read, Write, Edit, Bash, Grep, Glob
55
55
  "decisions": ["[DECISION] use optimistic locking by version column"],
56
56
  "blockers": [],
57
57
  "contract_changed": true,
58
+ "confidence": "high",
58
59
  "evidence": [
59
60
  {"id": "ev:test:users-update", "scope": "task:2.3"},
60
61
  {"id": "ev:typecheck:phase-2", "scope": "task:2.3"}
@@ -67,6 +68,13 @@ tools: Read, Write, Edit, Bash, Grep, Glob
67
68
  - 改了共享类型定义 / 接口 → true
68
69
  - 只改了内部实现逻辑、不影响外部调用方 → false
69
70
  - 拿不准时 → true (安全优先)
71
+
72
+ `confidence` 判定指南 (用于审查级别自动调整):
73
+ - "high" — 测试全通过 + 改动明确 + 无意外复杂度
74
+ - "medium" — 测试通过但有不确定性 (边界条件、并发、外部依赖)
75
+ - "low" — 有已知风险/跳过的测试/不确定的副作用
76
+ - 拿不准时 → "medium"
77
+ - 编排器会根据 confidence 自动升/降审查级别
70
78
  </result_contract>
71
79
 
72
80
  <uncertainty_handling>
@@ -58,12 +58,26 @@ L2 关键任务 → 单任务独立 review
58
58
  - 拿不准时 → 升一级处理
59
59
  </review_strategy>
60
60
 
61
+ <impact_analysis>
62
+ ## 审查前影响分析 (多文件变更时)
63
+
64
+ 当 `files_changed` 包含 3+ 文件,或涉及跨模块修改时:
65
+ 1. 使用 `code-graph-mcp impact <主要变更的函数/类名>` 分析影响范围
66
+ 2. 检查调用方是否都已被修改或兼容
67
+ 3. 将未覆盖的影响范围标注为 Critical issue
68
+
69
+ 这能发现 executor 遗漏的下游影响,是审查增值的关键步骤。
70
+ 单文件内部修改可跳过此步骤。
71
+ 如 `code-graph-mcp` 不可用,改用 Grep/Glob 手动追踪变更函数的调用方。
72
+ </impact_analysis>
73
+
61
74
  <stage_1_spec_review>
62
75
  检查代码是否符合任务规格:
63
76
  - 所有需求都实现了吗?
64
77
  - 有没有多余的实现 (YAGNI)?
65
78
  - 接口/API 是否符合计划?
66
79
  - 测试是否覆盖了需求中的每个场景?
80
+ - 影响分析发现的调用方是否都已适配?
67
81
  结果: ✅ 通过 / ❌ 列出不符合项 (附具体代码位置)
68
82
  </stage_1_spec_review>
69
83
 
@@ -6,24 +6,7 @@
6
6
  const fs = require('node:fs');
7
7
  const path = require('node:path');
8
8
  const os = require('node:os');
9
-
10
- /**
11
- * Walk from startDir up to filesystem root looking for a .gsd directory.
12
- * Returns the absolute path to .gsd if found, or null.
13
- */
14
- function findGsdDir(startDir) {
15
- let dir = startDir;
16
- while (true) {
17
- const candidate = path.join(dir, '.gsd');
18
- try {
19
- if (fs.statSync(candidate).isDirectory()) return candidate;
20
- } catch {
21
- const parent = path.dirname(dir);
22
- if (parent === dir) return null; // reached filesystem root
23
- dir = parent;
24
- }
25
- }
26
- }
9
+ const { findGsdDir } = require('./lib/gsd-finder.cjs');
27
10
 
28
11
  let input = '';
29
12
  const stdinTimeout = setTimeout(() => process.exit(0), 3000);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gsd-lite",
3
- "version": "0.5.12",
3
+ "version": "0.5.14",
4
4
  "description": "AI orchestration tool for Claude Code — GSD management shell + Superpowers quality core",
5
5
  "type": "module",
6
6
  "bin": {
package/src/schema.js CHANGED
@@ -586,6 +586,10 @@ export function validateExecutorResult(r) {
586
586
  if (r.outcome === 'checkpointed' && typeof r.checkpoint_commit !== 'string') {
587
587
  errors.push('checkpointed outcome requires checkpoint_commit');
588
588
  }
589
+ // confidence is optional; when present must be one of the valid values
590
+ if ('confidence' in r && !['high', 'medium', 'low'].includes(r.confidence)) {
591
+ errors.push('confidence must be "high", "medium", or "low"');
592
+ }
589
593
  return { valid: errors.length === 0, errors };
590
594
  }
591
595
 
@@ -16,6 +16,8 @@ export async function handleDebuggerResult({ result, basePath = process.cwd() }
16
16
  return { error: true, message: `Invalid debugger result: ${validation.errors.join('; ')}` };
17
17
  }
18
18
 
19
+ // Note: read() is outside the state lock — safe under single-session sequential execution.
20
+ // See executor.js for rationale.
19
21
  const state = await read({ basePath });
20
22
  if (state.error) return state;
21
23
  const { phase, task } = getPhaseAndTask(state, result.task_id);
@@ -20,6 +20,9 @@ export async function handleExecutorResult({ result, basePath = process.cwd() }
20
20
  return { error: true, message: `Invalid executor result: ${validation.errors.join('; ')}` };
21
21
  }
22
22
 
23
+ // Note: read() is outside the state lock. This is safe because the MCP server
24
+ // processes tool calls sequentially (single-session, promise-queue serialized).
25
+ // persist() below re-acquires the lock and applies changes atomically.
23
26
  const state = await read({ basePath });
24
27
  if (state.error) return state;
25
28
  const { phase, task } = getPhaseAndTask(state, result.task_id);
@@ -10,6 +10,7 @@ import { getGitHead, getGsdDir } from '../../utils.js';
10
10
  const MAX_DEBUG_RETRY = 3;
11
11
  const MAX_RESUME_DEPTH = 3;
12
12
  const CONTEXT_RESUME_THRESHOLD = 40;
13
+ const MAX_PHASE_REVIEW_RETRY = 5;
13
14
 
14
15
  // ── Result Contracts ──
15
16
  // Provided in dispatch responses so agents produce valid results on the first call.
@@ -23,6 +24,7 @@ const RESULT_CONTRACTS = {
23
24
  decisions: '{ id, title, rationale }[] — architectural decisions made',
24
25
  blockers: '{ description, type }[] — what blocked progress (when outcome="blocked")',
25
26
  contract_changed: 'boolean — true if external API/behavior contract changed',
27
+ confidence: '"high" | "medium" | "low" (optional) — executor self-assessed confidence; affects review level',
26
28
  evidence: '{ type, detail }[] — verification evidence (test results, lint, etc.)',
27
29
  },
28
30
  reviewer: {
@@ -423,6 +425,7 @@ export {
423
425
  MAX_DEBUG_RETRY,
424
426
  MAX_RESUME_DEPTH,
425
427
  CONTEXT_RESUME_THRESHOLD,
428
+ MAX_PHASE_REVIEW_RETRY,
426
429
  RESULT_CONTRACTS,
427
430
  isTerminalWorkflowMode,
428
431
  parseTimestamp,
@@ -1,6 +1,5 @@
1
1
  import { storeResearch } from '../state/index.js';
2
2
  import { validateResearcherResult } from '../../schema.js';
3
- import { resumeWorkflow } from './resume.js';
4
3
 
5
4
  export async function handleResearcherResult({ result, artifacts, decision_index, basePath = process.cwd() } = {}) {
6
5
  if (!result || typeof result !== 'object' || Array.isArray(result)) {
@@ -15,11 +14,10 @@ export async function handleResearcherResult({ result, artifacts, decision_index
15
14
  const persisted = await storeResearch({ result, artifacts, decision_index, basePath });
16
15
  if (persisted.error) return persisted;
17
16
 
18
- const resumed = await resumeWorkflow({ basePath });
19
- if (resumed.error) return resumed;
20
-
21
17
  return {
22
- ...resumed,
18
+ success: true,
19
+ action: 'research_stored',
20
+ workflow_mode: persisted.workflow_mode,
23
21
  stored_files: persisted.stored_files,
24
22
  decision_ids: persisted.decision_ids,
25
23
  research_warnings: persisted.warnings,
@@ -118,7 +118,16 @@ async function resumeExecutingTask(state, basePath) {
118
118
  }],
119
119
  });
120
120
  if (persistError) return persistError;
121
- return buildExecutorDispatch(state, phase, task);
121
+ const dispatch = buildExecutorDispatch(state, phase, task);
122
+ // Expose parallel-available tasks so callers can dispatch multiple subagents
123
+ if (selection.parallel_available?.length > 0) {
124
+ dispatch.parallel_available = selection.parallel_available.map(t => ({
125
+ id: t.id,
126
+ name: t.name,
127
+ level: t.level || 'L1',
128
+ }));
129
+ }
130
+ return dispatch;
122
131
  }
123
132
 
124
133
  if (selection.mode === 'trigger_review') {
@@ -179,12 +188,20 @@ async function resumeExecutingTask(state, basePath) {
179
188
  });
180
189
  if (advanceError) return advanceError;
181
190
  }
191
+ // Check if this is the last phase — suggest PR creation
192
+ const isLastPhase = phase.id === state.total_phases;
182
193
  return {
183
194
  success: true,
184
195
  action: 'complete_phase',
185
196
  workflow_mode: 'executing_task',
186
197
  phase_id: phase.id,
187
198
  message: 'All tasks accepted and review passed; phase ready for completion',
199
+ ...(isLastPhase ? {
200
+ pr_suggestion: {
201
+ recommended: true,
202
+ message: 'All phases complete. Consider creating a PR with `gh pr create`.',
203
+ },
204
+ } : {}),
188
205
  };
189
206
  }
190
207
 
@@ -366,6 +383,10 @@ export async function resumeWorkflow({ basePath = process.cwd(), _depth = 0, unb
366
383
  completed_phases: (state.phases || []).filter((phase) => phase.lifecycle === 'accepted').length,
367
384
  total_phases: state.total_phases,
368
385
  message: 'Workflow already completed',
386
+ pr_suggestion: {
387
+ recommended: true,
388
+ message: 'Project complete. Consider creating a PR with `gh pr create` if not already done.',
389
+ },
369
390
  };
370
391
  case 'failed': {
371
392
  const failedPhases = [];
@@ -1,6 +1,7 @@
1
1
  import { read } from '../state/index.js';
2
2
  import { validateReviewerResult } from '../../schema.js';
3
3
  import {
4
+ MAX_PHASE_REVIEW_RETRY,
4
5
  getCurrentPhase,
5
6
  getTaskById,
6
7
  persist,
@@ -15,6 +16,8 @@ export async function handleReviewerResult({ result, basePath = process.cwd() }
15
16
  return { error: true, message: `Invalid reviewer result: ${validation.errors.join('; ')}` };
16
17
  }
17
18
 
19
+ // Note: read() is outside the state lock — safe under single-session sequential execution.
20
+ // See executor.js for rationale.
18
21
  const state = await read({ basePath });
19
22
  if (state.error) return state;
20
23
 
@@ -70,6 +73,40 @@ export async function handleReviewerResult({ result, basePath = process.cwd() }
70
73
  const specFailed = result.spec_passed === false;
71
74
  const qualityFailed = result.quality_passed === false;
72
75
  const needsRework = hasCritical || specFailed || qualityFailed;
76
+
77
+ // Compute retry count once for both exhaustion check and state update
78
+ const currentRetryCount = phase.phase_review?.retry_count || 0;
79
+ const nextRetryCount = needsRework ? currentRetryCount + 1 : 0;
80
+
81
+ // Phase review retry limit: prevent infinite reviewing↔active cycles
82
+ if (needsRework && nextRetryCount > MAX_PHASE_REVIEW_RETRY) {
83
+ const persistError = await persist(basePath, {
84
+ workflow_mode: 'awaiting_user',
85
+ current_task: null,
86
+ current_review: {
87
+ scope: 'phase',
88
+ scope_id: phase.id,
89
+ stage: 'review_retry_exhausted',
90
+ retry_count: nextRetryCount,
91
+ },
92
+ phases: [{
93
+ id: phase.id,
94
+ lifecycle: phase.lifecycle === 'reviewing' ? 'active' : phase.lifecycle,
95
+ phase_review: { status: 'rework_required', retry_count: nextRetryCount },
96
+ }],
97
+ });
98
+ if (persistError) return persistError;
99
+
100
+ return {
101
+ success: true,
102
+ action: 'review_retry_exhausted',
103
+ workflow_mode: 'awaiting_user',
104
+ phase_id: phase.id,
105
+ retry_count: nextRetryCount,
106
+ message: `Phase ${phase.id} review failed ${nextRetryCount} times (limit: ${MAX_PHASE_REVIEW_RETRY}). User intervention required.`,
107
+ };
108
+ }
109
+
73
110
  const reviewStatus = needsRework ? 'rework_required' : 'accepted';
74
111
 
75
112
  // done is auto-recomputed by update() — no manual tracking needed
@@ -77,9 +114,7 @@ export async function handleReviewerResult({ result, basePath = process.cwd() }
77
114
  id: phase.id,
78
115
  phase_review: {
79
116
  status: reviewStatus,
80
- ...(needsRework
81
- ? { retry_count: (phase.phase_review?.retry_count || 0) + 1 }
82
- : { retry_count: 0 }),
117
+ retry_count: nextRetryCount,
83
118
  },
84
119
  todo: taskPatches,
85
120
  };
@@ -12,7 +12,6 @@ import {
12
12
  createInitialState,
13
13
  migrateState,
14
14
  } from '../../schema.js';
15
- import { runAll } from '../verify.js';
16
15
  import {
17
16
  ERROR_CODES,
18
17
  MAX_EVIDENCE_ENTRIES,
@@ -29,6 +28,11 @@ export async function init({ project, phases, research, force = false, basePath
29
28
  if (!project || typeof project !== 'string') {
30
29
  return { error: true, code: ERROR_CODES.INVALID_INPUT, message: 'project must be a non-empty string' };
31
30
  }
31
+ // Sanitize: strip HTML comment delimiters (could break marker-based CLAUDE.md injection) and cap length
32
+ project = project.replace(/<!--|-->/g, '').trim().slice(0, 200);
33
+ if (!project) {
34
+ return { error: true, code: ERROR_CODES.INVALID_INPUT, message: 'project name is empty after sanitization' };
35
+ }
32
36
  if (!Array.isArray(phases)) {
33
37
  return { error: true, code: ERROR_CODES.INVALID_INPUT, message: 'phases must be an array' };
34
38
  }
@@ -420,7 +424,14 @@ export async function phaseComplete({
420
424
  };
421
425
  }
422
426
 
423
- const verificationResult = verification || (run_verify ? await runAll(basePath) : null);
427
+ if (run_verify && !verification) {
428
+ return {
429
+ error: true,
430
+ code: ERROR_CODES.INVALID_INPUT,
431
+ message: 'run_verify requires verification results to be passed via the verification parameter; the state layer does not execute external tools',
432
+ };
433
+ }
434
+ const verificationResult = verification || null;
424
435
  const testsPassed = verificationResult
425
436
  ? verificationPassed(verificationResult)
426
437
  : phase.phase_handoff.tests_passed === true;
@@ -66,7 +66,10 @@ export function selectRunnableTask(phase, state, { maxRetry = DEFAULT_MAX_RETRY
66
66
  }
67
67
 
68
68
  if (runnableTasks.length > 0) {
69
- return { task: runnableTasks[0] };
69
+ return {
70
+ task: runnableTasks[0],
71
+ ...(runnableTasks.length > 1 ? { parallel_available: runnableTasks.slice(1) } : {}),
72
+ };
70
73
  }
71
74
 
72
75
  const awaitingReview = phase.todo.filter(t => t.lifecycle === 'checkpointed');
@@ -236,8 +239,9 @@ const SENSITIVE_KEYWORDS = /\b(auth|payment|security|public.?api|login|token|cre
236
239
 
237
240
  /**
238
241
  * Reclassify review level at runtime based on executor results.
239
- * Upgrades L1->L2 when contract_changed + sensitive keywords or [LEVEL-UP].
240
- * Never downgrades.
242
+ * Upgrades L1->L2 when: contract_changed + sensitive keywords, [LEVEL-UP], or low confidence.
243
+ * Downgrades L1->L0 when: confidence is high and no contract change.
244
+ * Never downgrades L2/L3.
241
245
  */
242
246
  export function reclassifyReviewLevel(task, executorResult) {
243
247
  const currentLevel = task.level || 'L1';
@@ -259,6 +263,25 @@ export function reclassifyReviewLevel(task, executorResult) {
259
263
  return 'L2';
260
264
  }
261
265
 
266
+ // Confidence-based adjustment: low confidence upgrades L1 → L2
267
+ if (executorResult.confidence === 'low' && currentLevel === 'L1') {
268
+ return 'L2';
269
+ }
270
+
271
+ // High confidence on non-sensitive L1 tasks → downgrade to L0 (self-review sufficient)
272
+ // Cross-validate: require objective evidence before trusting self-reported confidence.
273
+ // Without evidence or with failed tests, confidence claim is not credible.
274
+ if (executorResult.confidence === 'high' && currentLevel === 'L1'
275
+ && !executorResult.contract_changed) {
276
+ const hasEvidence = Array.isArray(executorResult.evidence) && executorResult.evidence.length > 0;
277
+ const hasTestFailure = Array.isArray(executorResult.evidence)
278
+ && executorResult.evidence.some(e => e && e.type === 'test' && e.passed === false);
279
+ if (hasEvidence && !hasTestFailure) {
280
+ return 'L0';
281
+ }
282
+ // Insufficient evidence or test failure — stay at L1 despite high confidence claim
283
+ }
284
+
262
285
  return currentLevel;
263
286
  }
264
287
 
package/src/utils.js CHANGED
@@ -65,6 +65,7 @@ const LOCK_MAX_RETRIES = 100; // 5 seconds total
65
65
  */
66
66
  export async function withFileLock(lockPath, fn) {
67
67
  let acquired = false;
68
+ let nonLockError = false;
68
69
  for (let i = 0; i < LOCK_MAX_RETRIES; i++) {
69
70
  try {
70
71
  await writeFile(lockPath, String(process.pid), { flag: 'wx' });
@@ -84,11 +85,21 @@ export async function withFileLock(lockPath, fn) {
84
85
  }
85
86
  await new Promise(r => setTimeout(r, LOCK_RETRY_MS));
86
87
  } else {
87
- break; // Non-EEXIST error — proceed without lock
88
+ // Non-EEXIST error (e.g., read-only fs) — proceed without lock
89
+ nonLockError = true;
90
+ break;
88
91
  }
89
92
  }
90
93
  }
91
94
 
95
+ // Lock exhaustion (retries depleted while another process held the lock):
96
+ // throw to prevent concurrent unlocked writes that cause data corruption.
97
+ // Non-EEXIST errors (read-only fs, permission denied) still proceed without lock
98
+ // since locking is physically impossible in those environments.
99
+ if (!acquired && !nonLockError) {
100
+ throw new Error(`Lock acquisition timeout: could not acquire ${lockPath} after ${LOCK_MAX_RETRIES} retries (${LOCK_MAX_RETRIES * LOCK_RETRY_MS}ms)`);
101
+ }
102
+
92
103
  try {
93
104
  return await fn();
94
105
  } finally {