agentplane 0.6.8 → 0.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/assets/AGENTS.md CHANGED
@@ -20,7 +20,7 @@ Detailed procedures live in canonical modules from `## CANONICAL DOCS`.
20
20
 
21
21
  - Repository type: user project initialized with `agentplane`.
22
22
  - CLI rule: prefer `ap` for compact agent-oriented commands; fall back to `agentplane`; if neither is available, stop and request installation guidance (do not invent repo-local entrypoints).
23
- - Startup shortcut: run `## COMMANDS -> Preflight`, then use `ap quickstart`; activate `ap role ORCHESTRATOR` for planning and `ap role <ROLE>` for the active owner before owner-scoped execution; then apply `## LOAD RULES` before any mutation. The guarded route is determined by `workflow.mode` in `.agentplane/WORKFLOW.md`; use `ap quickstart` as the canonical summary of the active path before mutating. In `branch_pr`, start from `ap work start ... --worktree`; in `direct`, stay in the current checkout and use the task lifecycle route.
23
+ - Startup shortcut: run `## COMMANDS -> Preflight`, then use `ap quickstart`; activate `ap role ORCHESTRATOR` for planning and `ap role <ROLE>` for the active owner before owner-scoped execution; then apply `## LOAD RULES` before any mutation. The guarded route is determined by `workflow.mode` in `.agentplane/WORKFLOW.md`; use `ap task brief <task-id>` and the emitted next command before manually assembling route commands. In `branch_pr`, start from the emitted `work start` route command or `ap work start ... --worktree`; in `direct`, stay in the current checkout and use the task lifecycle route.
24
24
 
25
25
  <!-- /ap:fragment -->
26
26
  <!-- ap:fragment id="gateway.agents.source_of_truth.sources.of.truth" slot="source_of_truth" mutability="replaceable" -->
@@ -85,7 +85,7 @@ git commit -m "Implement <task>"
85
85
  ap task verify-show <task-id>
86
86
  ap pr open <task-id> --branch task/<task-id>/<slug> --author <ROLE>
87
87
  ap verify <task-id> --ok|--rework --by <ROLE> --note "..."
88
- ap verify <task-id> --ok|--rework --by EVALUATOR --note "..." # verify --by EVALUATOR
88
+ ap evaluator run <task-id> --verdict pass|rework|blocked|human_review --summary "..." --finding "..." --evidence <path-or-check>
89
89
  ap integrate <task-id> --branch task/<task-id>/<slug> --run-verify
90
90
  ap finish <task-id> --author INTEGRATOR --body "Verified: ..." --result "..." --commit <git-rev> --close-commit
91
91
  ```
@@ -95,6 +95,7 @@ ap finish <task-id> --author INTEGRATOR --body "Verified: ..." --result "..." --
95
95
  ```bash
96
96
  ap vshow <task-id>
97
97
  ap verify <task-id> --ok|--rework --by <ROLE> --note "..." [--observation "..." --impact "..." --resolution "..."] [--local-only]
98
+ ap evaluator run <task-id> --verdict pass|rework|blocked|human_review --summary "..." --finding "..." --evidence <path-or-check> [--missing-test "..." --hidden-assumption "..." --residual-risk "..."]
98
99
  ap incidents advise <task-id>
99
100
  ap incidents collect <task-id> --check
100
101
  ap doctor
@@ -116,7 +117,7 @@ node .agentplane/policy/check-routing.mjs
116
117
  ## SHARED PROMPT CONTRACT
117
118
 
118
119
  - Outcome-first, concise, evidence-first: state goal, success criteria, constraints, stop rules, and output; use procedure only for command contracts, state machines, or irreversible gates; ask one narrow question only when missing information changes scope, task graph, security, or irreversible action.
119
- - Retrieval/progress/cache: preamble before multi-step or tool-heavy work; load only matched policy, task README, Verify Steps, and relevant files; use incidents only for analogous scope/tags; final output names actions, checks, blockers/drift, and next approval; keep stable gateway/policy/role before dynamic context and never cache mutable task state.
120
+ - Retrieval/progress/cache: preamble before multi-step or tool-heavy work; use `ap task active` and `ap task brief <task-id>` before manually combining task docs, route status, Verify Steps, PR metadata, and policy notes; load only matched policy, task README, Verify Steps, and relevant files; use incidents only for analogous scope/tags; final output names actions, checks, blockers/drift, and next approval; keep stable gateway/policy/role before dynamic context and never cache mutable task state.
120
121
 
121
122
  <!-- /ap:fragment -->
122
123
  <!-- ap:fragment id="gateway.user.instructions" slot="body" mutability="append_only" -->
@@ -8,19 +8,19 @@
8
8
  "reference.behavior": "Optional reference behavior for prompt/module/recipe evals, including expected outputs, hard gates, scoring rubric, and promotion policy."
9
9
  },
10
10
  "outputs": {
11
- "verdict": "One of pass, rework, or blocked, with the criteria and evidence that determined the result.",
11
+ "verdict": "One of pass, rework, blocked, or human_review, with the criteria and evidence that determined the result.",
12
12
  "rework.context": "Focused instructions for the next runner pass when criteria are not yet satisfied.",
13
- "quality.report": "Deterministic gate results, LLM quality assessment when requested, residual risks, and promotion/finish recommendation."
13
+ "quality.report": "Structured `ap evaluator run` report with findings, evidence_refs, missing_tests, hidden_assumptions, residual_risks, evaluated_sha, and promotion/finish recommendation."
14
14
  },
15
15
  "permissions": {
16
16
  "review.artifacts": "Read task documentation, runner artifacts, diffs, reports, and eval outputs.",
17
- "task.verification": "Record verification or rework through `ap` when the active workflow authorizes evaluator-scoped updates."
17
+ "task.verification": "Record evaluator-scoped quality_review through `ap evaluator run`; use ordinary `agentplane verify --by EVALUATOR` only for legacy/manual records that are not sufficient for finish/integrate gates."
18
18
  },
19
19
  "workflow": {
20
20
  "goal": "Goal: decide whether the latest task or eval attempt satisfies the documented quality contract without relying on the runner's self-claim alone.",
21
- "success.criteria": "Success criteria: required task sections and Verify Steps are mapped to concrete evidence; result manifest and artifacts are structurally valid; hard policy/security/lifecycle gates pass; LLM quality scoring is used only where the approved rubric asks for judgement; context.maximum_assimilation work is checked for source-shaped wiki topology, useful Obsidian-compatible wikilinks, page granularity, line-addressed provenance, coverage gaps, glossary alias safety, raw-deletion resilience, and leakage risk; the final verdict is reproducible from cited evidence.",
21
+ "success.criteria": "Success criteria: required task sections and Verify Steps are mapped to concrete evidence; result manifest and artifacts are structurally valid; hard policy/security/lifecycle gates pass; pass reviews include non-empty findings and a quality-report.json evidence ref written by `ap evaluator run`; LLM quality scoring is used only where the approved rubric asks for judgement; context.maximum_assimilation work is checked for source-shaped wiki topology, useful Obsidian-compatible wikilinks, page granularity, line-addressed provenance, coverage gaps, glossary alias safety, raw-deletion resilience, and leakage risk; the final verdict is reproducible from cited evidence.",
22
22
  "constraints": "Constraints: use loaded gateway and policy modules as binding constraints; separate deterministic gates from LLM judgement; do not edit implementation files; do not finish or integrate tasks unless the approved plan explicitly assigns evaluator closure; preserve raw trace/artifact paths instead of copying assistant prose into task docs.",
23
- "stop.rules": "Stop rules: mark blocked when evidence is missing, stale, unverifiable, policy-sensitive, or outside approved scope; mark rework when criteria are testable but unmet; require human approval before changing pass criteria, promotion thresholds, or security-sensitive interpretation.",
24
- "output": "Output: verdict, failed or satisfied criteria, evidence paths, LLM judgement summary when used, rework context for the next runner pass, and finish/promote recommendation."
23
+ "stop.rules": "Stop rules: mark blocked when evidence is missing, stale, unverifiable, policy-sensitive, outside approved scope, or cannot be tied to the reviewed commit; mark rework when criteria are testable but unmet; require human approval before changing pass criteria, promotion thresholds, or security-sensitive interpretation.",
24
+ "output": "Output: run `ap evaluator run <task-id> --verdict <pass|rework|blocked|human_review> --summary \"...\" --finding \"...\" --evidence <path-or-check>` or provide the exact equivalent command; include failed or satisfied criteria, evidence paths, missing tests, hidden assumptions, residual risks, rework context for the next runner pass, and finish/promote recommendation."
25
25
  }
26
26
  }
@@ -17,15 +17,18 @@ Use AgentPlane through its CLI instead of editing `.agentplane/` state directly.
17
17
 
18
18
  1. If the repository is not initialized, run `ap init` or `agentplane init`.
19
19
  2. Run `ap quickstart`.
20
- 3. Inspect `AGENTS.md`, `ap task list`, `git status --short --untracked-files=no`, and `git rev-parse --abbrev-ref HEAD`.
21
- 4. Use `ap role ORCHESTRATOR` while planning and approvals are active.
22
- 5. Switch to `ap role <ROLE>` before owner-scoped execution or verification.
20
+ 3. Inspect `AGENTS.md`, `ap task list`, `ap task active`, `git status --short --untracked-files=no`, and `git rev-parse --abbrev-ref HEAD`.
21
+ 4. Use `ap task brief <task-id>` before owner-scoped execution; add `--remote` only when hosted PR/check/review state is needed.
22
+ 5. Use `ap role ORCHESTRATOR` while planning and approvals are active.
23
+ 6. Switch to `ap role <ROLE>` before owner-scoped execution or verification.
23
24
 
24
25
  ## Rules
25
26
 
26
27
  - Treat `AGENTS.md`, `ap quickstart`, and `ap role <ROLE>` as the policy surface.
27
28
  - Use `ap task ...`, `ap work ...`, `ap verify ...`, and `ap finish ...`; do not edit `.agentplane/tasks.json` manually.
28
- - In `branch_pr`, start from `ap work start <task-id> --agent <ROLE> --slug <slug> --worktree`.
29
+ - Prefer `ap task brief <task-id>` and `ap task next-action <task-id> --explain` over manually combining task docs, route status, Verify Steps, PR metadata, and policy notes.
30
+ - In `branch_pr`, use the concrete route command emitted by `task brief` or `task next-action` when available; fall back to `ap work start <task-id> --agent <ROLE> --slug <slug> --worktree` only as the low-level command contract.
31
+ - Treat weak `source_confidence` or non-ready `verify_steps_quality` as a context gap to resolve before mutation.
29
32
  - Keep repository artifacts in English unless the user explicitly requests another language for a specific artifact.
30
33
  - Record verification evidence in the task README and through `ap verify`.
31
34
 
@@ -29,15 +29,21 @@ Use this evaluator only when the primary implementation path already produced a
29
29
  4. Inspect concurrency-sensitive paths and classify whether observed drift belongs to active agent work, stale handoff, or unrelated workspace drift.
30
30
  5. Identify missing tests, missing docs, or verification that only proves the happy path.
31
31
  6. Do not execute fixes. Return review findings only.
32
+ 7. When recording the result, use `agentplane evaluator run <task-id>` so the task gets prompt,
33
+ `quality-report.json`, and opinion artifacts. A bare `verify --by EVALUATOR` note is legacy
34
+ evidence and is not sufficient for finish/integrate gates.
32
35
 
33
36
  ## Output
34
37
 
35
38
  Return a concise structured review:
36
39
 
37
- - `verdict`: `pass`, `rework`, or `blocked`.
40
+ - `verdict`: `pass`, `rework`, `blocked`, or `human_review`.
38
41
  - `findings`: ordered by severity, each with file/path evidence and the broken invariant.
42
+ - `evidence_refs`: concrete files, checks, PRs, traces, or reports inspected; pass reviews must
43
+ include the generated `quality-report.json`.
39
44
  - `missing_tests`: concrete tests or checks that would have caught the issue.
40
45
  - `hidden_assumptions`: assumptions the implementation relies on but did not prove.
46
+ - `residual_risks`: known risks after the review.
41
47
  - `recovery_context`: what the next agent should know only if normal context is insufficient.
42
48
 
43
49
  ## Stop Rules
@@ -58,6 +58,8 @@ Default branch names are `task/<task-id>/<slug>` for implementation branches and
58
58
  through `branch.task_prefix` and `branch.task_close_prefix`; task id, slug, and sha positions remain
59
59
  fixed.
60
60
 
61
+ Before manually filling `<slug>` or `<branch>`, use `agentplane task brief <task-id>` or `agentplane task next-action <task-id> --explain` and prefer the emitted concrete command.
62
+
61
63
  <!-- /ap:fragment -->
62
64
  <!-- ap:fragment id="policy.workflow.branch_pr.hard_constraint.constraints" slot="hard_constraint" mutability="append_only" -->
63
65
 
package/bin/agentplane.js CHANGED
@@ -415,9 +415,15 @@ async function assertDistUpToDate() {
415
415
  });
416
416
  }
417
417
 
418
+ const freshnessResults = await Promise.all(
419
+ checks.map(async (check) => ({
420
+ check,
421
+ result: await isPackageBuildFresh(check.root, { watchedPaths: check.watchedPaths }),
422
+ })),
423
+ );
424
+
418
425
  const staleReasons = [];
419
- for (const check of checks) {
420
- const result = await isPackageBuildFresh(check.root, { watchedPaths: check.watchedPaths });
426
+ for (const { check, result } of freshnessResults) {
421
427
  if (!result.ok) {
422
428
  const detail =
423
429
  Array.isArray(result.changedPaths) && result.changedPaths.length > 0
package/bin/dist-guard.js CHANGED
@@ -32,7 +32,13 @@ async function readJsonIfExists(p) {
32
32
 
33
33
  function resolveGitHead(cwd) {
34
34
  try {
35
- return execFileSync("git", ["rev-parse", "HEAD"], { cwd, encoding: "utf8" }).trim() || null;
35
+ return (
36
+ execFileSync("git", ["rev-parse", "HEAD"], {
37
+ cwd,
38
+ encoding: "utf8",
39
+ stdio: ["ignore", "pipe", "ignore"],
40
+ }).trim() || null
41
+ );
36
42
  } catch {
37
43
  return null;
38
44
  }
@@ -41,7 +47,11 @@ function resolveGitHead(cwd) {
41
47
  function listGitPaths(cwd, args, options = {}) {
42
48
  const trimLines = options.trimLines ?? true;
43
49
  try {
44
- const out = execFileSync("git", args, { cwd, encoding: "utf8" });
50
+ const out = execFileSync("git", args, {
51
+ cwd,
52
+ encoding: "utf8",
53
+ stdio: ["ignore", "pipe", "ignore"],
54
+ });
45
55
  return out
46
56
  .split(/\r?\n/u)
47
57
  .map((line) => (trimLines ? line.trim() : line))
@@ -51,6 +61,26 @@ function listGitPaths(cwd, args, options = {}) {
51
61
  }
52
62
  }
53
63
 
64
+ function tryListGitPaths(cwd, args, options = {}) {
65
+ const trimLines = options.trimLines ?? true;
66
+ try {
67
+ const out = execFileSync("git", args, {
68
+ cwd,
69
+ encoding: "utf8",
70
+ stdio: ["ignore", "pipe", "ignore"],
71
+ });
72
+ return {
73
+ ok: true,
74
+ paths: out
75
+ .split(/\r?\n/u)
76
+ .map((line) => (trimLines ? line.trim() : line))
77
+ .filter(Boolean),
78
+ };
79
+ } catch {
80
+ return { ok: false, paths: [] };
81
+ }
82
+ }
83
+
54
84
  function uniqueSorted(values) {
55
85
  return [...new Set(values)].toSorted((a, b) => a.localeCompare(b));
56
86
  }
@@ -71,6 +101,25 @@ function workingTreeChangedPaths(cwd, watchedPaths) {
71
101
  );
72
102
  }
73
103
 
104
+ function tryWorkingTreeChangedPaths(cwd, watchedPaths) {
105
+ const result = tryListGitPaths(
106
+ cwd,
107
+ ["status", "--porcelain", "--untracked-files=all", "--", ...watchedPaths],
108
+ { trimLines: false },
109
+ );
110
+ return {
111
+ ok: result.ok,
112
+ paths: uniqueSorted(
113
+ result.paths
114
+ .map((line) => {
115
+ const normalized = String(line ?? "");
116
+ return normalized.length > 3 ? normalized.slice(3).trim() : "";
117
+ })
118
+ .filter((filePath) => Boolean(filePath) && isRuntimeRelevantWatchedFile(filePath)),
119
+ ),
120
+ };
121
+ }
122
+
74
123
  function committedChangedPathsSince(cwd, fromGitHead, watchedPaths) {
75
124
  if (!fromGitHead) return [];
76
125
  return uniqueSorted(
@@ -84,6 +133,21 @@ function committedChangedPathsSince(cwd, fromGitHead, watchedPaths) {
84
133
  );
85
134
  }
86
135
 
136
+ function tryCommittedChangedPathsSince(cwd, fromGitHead, watchedPaths) {
137
+ if (!fromGitHead) return { ok: false, paths: [] };
138
+ const result = tryListGitPaths(cwd, [
139
+ "diff",
140
+ "--name-only",
141
+ `${fromGitHead}..HEAD`,
142
+ "--",
143
+ ...watchedPaths,
144
+ ]);
145
+ return {
146
+ ok: result.ok,
147
+ paths: uniqueSorted(result.paths.filter((filePath) => isRuntimeRelevantWatchedFile(filePath))),
148
+ };
149
+ }
150
+
87
151
  async function fileMtimeMs(p) {
88
152
  try {
89
153
  const s = await stat(p);
@@ -144,6 +208,26 @@ export async function isPackageBuildFresh(packageRoot, options = {}) {
144
208
  const currentHead = resolveGitHead(packageRoot);
145
209
  const manifestSnapshot = parseManifestSnapshot(manifest);
146
210
  if (manifestSnapshot) {
211
+ const committedQuickCheck = tryCommittedChangedPathsSince(
212
+ packageRoot,
213
+ manifest.git_head,
214
+ manifestSnapshot.watchedPaths,
215
+ );
216
+ const workingTreeQuickCheck = tryWorkingTreeChangedPaths(
217
+ packageRoot,
218
+ manifestSnapshot.watchedPaths,
219
+ );
220
+ const changedPaths = uniqueSorted([
221
+ ...committedQuickCheck.paths,
222
+ ...workingTreeQuickCheck.paths,
223
+ ]);
224
+ if (committedQuickCheck.ok && workingTreeQuickCheck.ok && changedPaths.length === 0) {
225
+ if (manifest.git_head && currentHead && manifest.git_head !== currentHead) {
226
+ return { ok: true, reason: "fresh_after_git_quick_check", changedPaths: [] };
227
+ }
228
+ return { ok: true, reason: "fresh", changedPaths: [] };
229
+ }
230
+
147
231
  const currentSnapshot = await collectWatchedRuntimeSnapshot(
148
232
  packageRoot,
149
233
  manifestSnapshot.watchedPaths,
@@ -2,7 +2,7 @@
2
2
  "schema_version": 1,
3
3
  "manifest_kind": "package",
4
4
  "package_name": "agentplane",
5
- "package_version": "0.6.8",
6
- "git_head": "da065aae65278348dc3e137e897e3b7f126f0865",
7
- "watched_runtime_snapshot_hash": "854198c9b26559d30ced503a59717dfa3eb6f307c2ee4280a4430195a2b45eb5"
5
+ "package_version": "0.6.10",
6
+ "git_head": "e50e620a0716c40cfa3710a3af33161a760cf7a9",
7
+ "watched_runtime_snapshot_hash": "1ad5c76d8b356c40184cb982a599adcdaedbf66e7fb1209a0dbbf294c0d2dc9e"
8
8
  }