@xn-intenton-z2a/agentic-lib 7.2.4 → 7.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@
6
6
  // asks the Copilot SDK to choose multiple concurrent actions, then dispatches them.
7
7
 
8
8
  import * as core from "@actions/core";
9
- import { existsSync, readFileSync, writeFileSync } from "fs";
9
+ import { existsSync, readFileSync, readdirSync, statSync } from "fs";
10
10
  import { runCopilotTask, readOptionalFile, scanDirectory, filterIssues } from "../copilot.js";
11
11
 
12
12
  /**
@@ -305,6 +305,61 @@ async function gatherContext(octokit, repo, config, t) {
305
305
  }
306
306
  } catch { /* ignore */ }
307
307
 
308
+ // Check for dedicated test files (not just seed tests)
309
+ // A dedicated test imports from the source directory (src/lib/) rather than being a seed test
310
+ let hasDedicatedTests = false;
311
+ let dedicatedTestFiles = [];
312
+ try {
313
+ const testDirs = ["tests", "__tests__"];
314
+ for (const dir of testDirs) {
315
+ if (existsSync(dir)) {
316
+ const testFiles = scanDirectory(dir, [".js", ".ts", ".mjs"], { limit: 20 });
317
+ for (const tf of testFiles) {
318
+ // Skip seed test files (main.test.js, web.test.js, behaviour.test.js)
319
+ if (/^(main|web|behaviour)\.test\.[jt]s$/.test(tf.name)) continue;
320
+ const content = readFileSync(tf.path, "utf8");
321
+ // Check if it imports from src/lib/ (mission-specific code)
322
+ if (/from\s+['"].*src\/lib\//.test(content) || /require\s*\(\s*['"].*src\/lib\//.test(content)) {
323
+ hasDedicatedTests = true;
324
+ dedicatedTestFiles.push(tf.name);
325
+ }
326
+ }
327
+ }
328
+ }
329
+ } catch { /* ignore */ }
330
+
331
+ // W9: Count TODO comments in source directory
332
+ let sourceTodoCount = 0;
333
+ try {
334
+ const sourcePath = config.paths.source?.path || "src/lib/";
335
+ const sourceDir = sourcePath.endsWith("/") ? sourcePath.slice(0, -1) : sourcePath;
336
+ const srcRoot = sourceDir.includes("/") ? sourceDir.split("/").slice(0, -1).join("/") || "src" : "src";
337
+ // Inline recursive TODO counter (avoids circular import with index.js)
338
+ const countTodos = (dir) => {
339
+ let n = 0;
340
+ if (!existsSync(dir)) return 0;
341
+ try {
342
+ const entries = readdirSync(dir);
343
+ for (const entry of entries) {
344
+ if (entry === "node_modules" || entry.startsWith(".")) continue;
345
+ const fp = `${dir}/${entry}`;
346
+ try {
347
+ const stat = statSync(fp);
348
+ if (stat.isDirectory()) {
349
+ n += countTodos(fp);
350
+ } else if (/\.(js|ts|mjs)$/.test(entry)) {
351
+ const content = readFileSync(fp, "utf8");
352
+ const m = content.match(/\bTODO\b/gi);
353
+ if (m) n += m.length;
354
+ }
355
+ } catch { /* skip */ }
356
+ }
357
+ } catch { /* skip */ }
358
+ return n;
359
+ };
360
+ sourceTodoCount = countTodos(srcRoot);
361
+ } catch { /* ignore */ }
362
+
308
363
  return {
309
364
  mission,
310
365
  recentActivity,
@@ -333,10 +388,13 @@ async function gatherContext(octokit, repo, config, t) {
333
388
  cumulativeTransformationCost,
334
389
  recentlyClosedSummary,
335
390
  sourceExports,
391
+ hasDedicatedTests,
392
+ dedicatedTestFiles,
393
+ sourceTodoCount,
336
394
  };
337
395
  }
338
396
 
339
- function buildPrompt(ctx, agentInstructions) {
397
+ function buildPrompt(ctx, agentInstructions, config) {
340
398
  return [
341
399
  "## Instructions",
342
400
  agentInstructions,
@@ -368,6 +426,39 @@ function buildPrompt(ctx, agentInstructions) {
368
426
  "",
369
427
  ]
370
428
  : []),
429
+ `### Test Coverage`,
430
+ ctx.hasDedicatedTests
431
+ ? `Dedicated test files: ${ctx.dedicatedTestFiles.join(", ")}`
432
+ : "**No dedicated test files found.** Only seed tests (main.test.js, web.test.js) exist. Mission-complete requires dedicated tests that import from src/lib/.",
433
+ "",
434
+ `### Source TODO Count: ${ctx.sourceTodoCount}`,
435
+ ctx.sourceTodoCount > 0
436
+ ? `**${ctx.sourceTodoCount} TODO(s) found in source.** All TODOs must be resolved before mission-complete can be declared.`
437
+ : "No TODOs found in source — this criterion is met.",
438
+ "",
439
+ ...(() => {
440
+ // W10: Build mission-complete metrics inline for the LLM
441
+ const thresholds = config?.missionCompleteThresholds || {};
442
+ const minResolved = thresholds.minResolvedIssues ?? 3;
443
+ const requireTests = thresholds.requireDedicatedTests ?? true;
444
+ const maxTodos = thresholds.maxSourceTodos ?? 0;
445
+ const resolvedCount = ctx.recentlyClosedSummary.filter((s) => s.includes("RESOLVED")).length;
446
+ const rows = [
447
+ `### Mission-Complete Metrics`,
448
+ "| Metric | Value | Target | Status |",
449
+ "|--------|-------|--------|--------|",
450
+ `| Open issues | ${ctx.issuesSummary.length} | 0 | ${ctx.issuesSummary.length === 0 ? "MET" : "NOT MET"} |`,
451
+ `| Open PRs | ${ctx.prsSummary.length} | 0 | ${ctx.prsSummary.length === 0 ? "MET" : "NOT MET"} |`,
452
+ `| Issues resolved (RESOLVED) | ${resolvedCount} | >= ${minResolved} | ${resolvedCount >= minResolved ? "MET" : "NOT MET"} |`,
453
+ `| Dedicated test files | ${ctx.hasDedicatedTests ? "YES" : "NO"} | ${requireTests ? "YES" : "—"} | ${!requireTests || ctx.hasDedicatedTests ? "MET" : "NOT MET"} |`,
454
+ `| Source TODO count | ${ctx.sourceTodoCount} | <= ${maxTodos} | ${ctx.sourceTodoCount <= maxTodos ? "MET" : "NOT MET"} |`,
455
+ `| Budget used | ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} | < ${ctx.transformationBudget || "unlimited"} | ${ctx.transformationBudget > 0 && ctx.cumulativeTransformationCost >= ctx.transformationBudget ? "EXHAUSTED" : "OK"} |`,
456
+ "",
457
+ "**All metrics must show MET/OK for mission-complete to be declared.**",
458
+ "",
459
+ ];
460
+ return rows;
461
+ })(),
371
462
  `### Recent Workflow Runs`,
372
463
  ctx.workflowsSummary.join("\n") || "none",
373
464
  "",
@@ -419,7 +510,7 @@ function buildPrompt(ctx, agentInstructions) {
419
510
  ]
420
511
  : []),
421
512
  ...(ctx.transformationBudget > 0
422
- ? [`### Transformation Budget: ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} used (${Math.max(0, ctx.transformationBudget - ctx.cumulativeTransformationCost)} remaining)`, ""]
513
+ ? [`### Transformation Budget: ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget} used (${Math.max(0, ctx.transformationBudget - ctx.cumulativeTransformationCost)} remaining)`, "Note: instability transforms (infrastructure fixes) do not count against this budget.", ""]
423
514
  : []),
424
515
  `### Issue Limits`,
425
516
  `Feature development WIP limit: ${ctx.featureIssuesWipLimit}`,
@@ -444,10 +535,6 @@ function buildPrompt(ctx, agentInstructions) {
444
535
  "### Communication",
445
536
  "- `respond:discussions | message: <text> | discussion-url: <url>` — Reply via discussions bot",
446
537
  "",
447
- "### Mission Lifecycle",
448
- "- `mission-complete | reason: <text>` — Declare mission accomplished. Writes MISSION_COMPLETE.md and sets schedule to off. Use when: all acceptance criteria in MISSION.md are satisfied, tests pass, and recently-closed issues confirm resolution.",
449
- "- `mission-failed | reason: <text>` — Declare mission failed. Writes MISSION_FAILED.md and sets schedule to off. Use when: transformation budget is exhausted with no progress, pipeline is stuck in a loop, or the mission is unachievable.",
450
- "",
451
538
  "### Schedule Control",
452
539
  "- `set-schedule:<frequency>` — Change supervisor schedule (off, weekly, daily, hourly, continuous). Use `set-schedule:weekly` when mission is substantially complete, `set-schedule:continuous` to ramp up.",
453
540
  "",
@@ -605,152 +692,11 @@ async function executeRespondDiscussions(octokit, repo, params, ctx) {
605
692
  return "skipped:respond-no-message";
606
693
  }
607
694
 
608
- async function executeMissionComplete(octokit, repo, params, ctx) {
609
- const reason = params.reason || "All acceptance criteria satisfied";
610
- const signal = [
611
- "# Mission Complete",
612
- "",
613
- `- **Timestamp:** ${new Date().toISOString()}`,
614
- `- **Detected by:** supervisor`,
615
- `- **Reason:** ${reason}`,
616
- "",
617
- "This file was created automatically. To restart transformations, delete this file or run `npx @xn-intenton-z2a/agentic-lib init --reseed`.",
618
- ].join("\n");
619
- writeFileSync("MISSION_COMPLETE.md", signal);
620
- core.info(`Mission complete signal written: ${reason}`);
621
-
622
- // Persist MISSION_COMPLETE.md to the repository via Contents API so it survives across runs
623
- try {
624
- const contentBase64 = Buffer.from(signal).toString("base64");
625
- // Check if file already exists (to get its SHA for updates)
626
- let existingSha;
627
- try {
628
- const { data } = await octokit.rest.repos.getContent({ ...repo, path: "MISSION_COMPLETE.md", ref: "main" });
629
- existingSha = data.sha;
630
- } catch {
631
- // File doesn't exist yet — that's fine
632
- }
633
- await octokit.rest.repos.createOrUpdateFileContents({
634
- ...repo,
635
- path: "MISSION_COMPLETE.md",
636
- message: "mission-complete: " + reason.substring(0, 72),
637
- content: contentBase64,
638
- branch: "main",
639
- ...(existingSha ? { sha: existingSha } : {}),
640
- });
641
- core.info("MISSION_COMPLETE.md committed to main via Contents API");
642
- } catch (err) {
643
- core.warning(`Could not commit MISSION_COMPLETE.md to repo: ${err.message}`);
644
- }
645
-
646
- if (process.env.GITHUB_REPOSITORY !== "xn-intenton-z2a/agentic-lib") {
647
- // Only turn off schedule if it's not already off or in maintenance mode
648
- let currentSupervisor = "";
649
- try {
650
- const tomlContent = readFileSync("agentic-lib.toml", "utf8");
651
- const match = tomlContent.match(/^\s*supervisor\s*=\s*"([^"]*)"/m);
652
- if (match) currentSupervisor = match[1];
653
- } catch { /* ignore */ }
654
-
655
- if (currentSupervisor === "off" || currentSupervisor === "maintenance") {
656
- core.info(`Schedule already "${currentSupervisor}" — not changing on mission-complete`);
657
- } else {
658
- try {
659
- await octokit.rest.actions.createWorkflowDispatch({
660
- ...repo,
661
- workflow_id: "agentic-lib-schedule.yml",
662
- ref: "main",
663
- inputs: { frequency: "off" },
664
- });
665
- } catch (err) {
666
- core.warning(`Could not set schedule to off: ${err.message}`);
667
- }
668
- }
669
-
670
- // Announce mission complete via bot
671
- const websiteUrl = getWebsiteUrl(repo);
672
- const discussionUrl = ctx?.activeDiscussionUrl || "";
673
- await dispatchBot(octokit, repo, `Mission complete! ${reason}\n\nWebsite: ${websiteUrl}`, discussionUrl);
674
- }
675
- return `mission-complete:${reason.substring(0, 100)}`;
676
- }
677
-
678
- async function executeMissionFailed(octokit, repo, params, ctx) {
679
- const reason = params.reason || "Mission could not be completed";
680
- const signal = [
681
- "# Mission Failed",
682
- "",
683
- `- **Timestamp:** ${new Date().toISOString()}`,
684
- `- **Detected by:** supervisor`,
685
- `- **Reason:** ${reason}`,
686
- "",
687
- "This file was created automatically. To restart, delete this file and run `npx @xn-intenton-z2a/agentic-lib init --reseed`.",
688
- ].join("\n");
689
- writeFileSync("MISSION_FAILED.md", signal);
690
- core.info(`Mission failed signal written: ${reason}`);
691
-
692
- // Persist MISSION_FAILED.md to the repository via Contents API so it survives across runs
693
- try {
694
- const contentBase64 = Buffer.from(signal).toString("base64");
695
- let existingSha;
696
- try {
697
- const { data } = await octokit.rest.repos.getContent({ ...repo, path: "MISSION_FAILED.md", ref: "main" });
698
- existingSha = data.sha;
699
- } catch {
700
- // File doesn't exist yet — that's fine
701
- }
702
- await octokit.rest.repos.createOrUpdateFileContents({
703
- ...repo,
704
- path: "MISSION_FAILED.md",
705
- message: "mission-failed: " + reason.substring(0, 72),
706
- content: contentBase64,
707
- branch: "main",
708
- ...(existingSha ? { sha: existingSha } : {}),
709
- });
710
- core.info("MISSION_FAILED.md committed to main via Contents API");
711
- } catch (err) {
712
- core.warning(`Could not commit MISSION_FAILED.md to repo: ${err.message}`);
713
- }
714
-
715
- if (process.env.GITHUB_REPOSITORY !== "xn-intenton-z2a/agentic-lib") {
716
- // Only turn off schedule if it's not already off or in maintenance mode
717
- let currentSupervisor = "";
718
- try {
719
- const tomlContent = readFileSync("agentic-lib.toml", "utf8");
720
- const match = tomlContent.match(/^\s*supervisor\s*=\s*"([^"]*)"/m);
721
- if (match) currentSupervisor = match[1];
722
- } catch { /* ignore */ }
723
-
724
- if (currentSupervisor === "off" || currentSupervisor === "maintenance") {
725
- core.info(`Schedule already "${currentSupervisor}" — not changing on mission-failed`);
726
- } else {
727
- try {
728
- await octokit.rest.actions.createWorkflowDispatch({
729
- ...repo,
730
- workflow_id: "agentic-lib-schedule.yml",
731
- ref: "main",
732
- inputs: { frequency: "off" },
733
- });
734
- } catch (err) {
735
- core.warning(`Could not set schedule to off: ${err.message}`);
736
- }
737
- }
738
-
739
- // Announce mission failed via bot
740
- const websiteUrl = getWebsiteUrl(repo);
741
- const discussionUrl = ctx?.activeDiscussionUrl || "";
742
- await dispatchBot(octokit, repo, `Mission failed. ${reason}\n\nWebsite: ${websiteUrl}`, discussionUrl);
743
- }
744
- return `mission-failed:${reason.substring(0, 100)}`;
745
- }
746
-
747
695
  const ACTION_HANDLERS = {
748
696
  "github:create-issue": executeCreateIssue,
749
697
  "github:label-issue": executeLabelIssue,
750
698
  "github:close-issue": executeCloseIssue,
751
699
  "respond:discussions": executeRespondDiscussions,
752
- "mission-complete": executeMissionComplete,
753
- "mission-failed": executeMissionFailed,
754
700
  };
755
701
 
756
702
  async function executeSetSchedule(octokit, repo, frequency) {
@@ -778,7 +724,7 @@ async function executeAction(octokit, repo, action, params, ctx) {
778
724
  if (action === "nop") return "nop";
779
725
  const handler = ACTION_HANDLERS[action];
780
726
  if (handler) return handler(octokit, repo, params, ctx);
781
- core.warning(`Unknown action: ${action}`);
727
+ core.debug(`Ignoring unrecognised action: ${action}`);
782
728
  return `unknown:${action}`;
783
729
  }
784
730
 
@@ -814,7 +760,7 @@ export async function supervise(context) {
814
760
 
815
761
  // --- LLM decision ---
816
762
  const agentInstructions = instructions || "You are the supervisor. Decide what actions to take.";
817
- const prompt = buildPrompt(ctx, agentInstructions);
763
+ const prompt = buildPrompt(ctx, agentInstructions, config);
818
764
 
819
765
  const { content, tokensUsed, inputTokens, outputTokens, cost } = await runCopilotTask({
820
766
  model,
@@ -845,28 +791,8 @@ export async function supervise(context) {
845
791
 
846
792
  // --- Deterministic lifecycle posts (after LLM) ---
847
793
 
848
- // Strategy A: Deterministic mission-complete fallback
849
- // If the LLM didn't choose mission-complete but conditions are clearly met, auto-execute it.
850
- // Skip in maintenance mode — maintenance keeps running regardless of mission status.
851
- if (!ctx.missionComplete && !ctx.missionFailed && config.supervisor !== "maintenance") {
852
- const llmChoseMissionComplete = results.some((r) => r.startsWith("mission-complete:"));
853
- if (!llmChoseMissionComplete) {
854
- const resolvedCount = ctx.recentlyClosedSummary.filter((s) => s.includes("RESOLVED")).length;
855
- const hasNoOpenIssues = ctx.issuesSummary.length === 0;
856
- const hasNoOpenPRs = ctx.prsSummary.length === 0;
857
- if (hasNoOpenIssues && hasNoOpenPRs && resolvedCount >= 1) {
858
- core.info(`Deterministic mission-complete: 0 open issues, 0 open PRs, ${resolvedCount} recently resolved — LLM did not detect completion`);
859
- try {
860
- const autoResult = await executeMissionComplete(octokit, repo,
861
- { reason: `All acceptance criteria satisfied (${resolvedCount} issues resolved, 0 open issues, 0 open PRs)` },
862
- ctx);
863
- results.push(autoResult);
864
- } catch (err) {
865
- core.warning(`Deterministic mission-complete failed: ${err.message}`);
866
- }
867
- }
868
- }
869
- }
794
+ // W12: Mission-complete/failed evaluation moved to the director task.
795
+ // The supervisor no longer declares mission-complete or mission-failed.
870
796
 
871
797
  // Step 3: Auto-respond when a message referral is present
872
798
  // If the workflow was triggered with a message (from bot's request-supervisor),
@@ -888,14 +814,8 @@ export async function supervise(context) {
888
814
 
889
815
  // Build changes list from executed actions
890
816
  const changes = results
891
- .filter((r) => r.startsWith("created-issue:") || r.startsWith("mission-complete:") || r.startsWith("mission-failed:"))
892
- .map((r) => {
893
- if (r.startsWith("created-issue:")) return { action: "created-issue", file: r.replace("created-issue:", ""), sizeInfo: "" };
894
- if (r.startsWith("mission-complete:")) return { action: "mission-complete", file: "MISSION_COMPLETE.md", sizeInfo: r.replace("mission-complete:", "") };
895
- if (r.startsWith("mission-failed:")) return { action: "mission-failed", file: "MISSION_FAILED.md", sizeInfo: r.replace("mission-failed:", "") };
896
- return null;
897
- })
898
- .filter(Boolean);
817
+ .filter((r) => r.startsWith("created-issue:"))
818
+ .map((r) => ({ action: "created-issue", file: r.replace("created-issue:", ""), sizeInfo: "" }));
899
819
 
900
820
  return {
901
821
  outcome: actions.length === 0 ? "nop" : `supervised:${actions.length}-actions`,
@@ -0,0 +1,58 @@
1
+ You are the director of an autonomous coding repository. Your sole responsibility is evaluating whether the mission is complete, failed, or in progress.
2
+
3
+ ## Your Role
4
+
5
+ You do NOT dispatch workflows, create issues, or manage the schedule. That is the supervisor's job. You ONLY assess mission status and produce a structured evaluation.
6
+
7
+ ## Input
8
+
9
+ You receive:
10
+ 1. **MISSION.md** — the acceptance criteria
11
+ 2. **Mission-Complete Metrics** — a table of mechanical checks (open issues, PRs, resolved count, test coverage, TODO count, budget)
12
+ 3. **Metric based mission complete assessment** — a pre-computed advisory from the mechanical check
13
+ 4. **Source Exports** — functions exported from source files
14
+ 5. **Recently Closed Issues** — issues resolved since init
15
+ 6. **Recent Activity** — the latest entries from the activity log
16
+
17
+ ## Decision Framework
18
+
19
+ ### Mission Complete
20
+ Declare `mission-complete` when ALL of the following are true:
21
+ 1. Every row in the Mission-Complete Metrics table shows **MET** or **OK**
22
+ 2. The Source Exports demonstrate that all functions required by MISSION.md are implemented
23
+ 3. The Recently Closed Issues confirm that acceptance criteria have been addressed
24
+ 4. No TODOs remain in source code
25
+ 5. Dedicated test files exist (not just seed tests)
26
+
27
+ ### Mission Failed
28
+ Declare `mission-failed` when ANY of the following are true:
29
+ 1. Transformation budget is EXHAUSTED and acceptance criteria are still unmet
30
+ 2. The last 3+ transforms produced no meaningful code changes
31
+ 3. The pipeline is stuck in a loop (same issues created and closed repeatedly)
32
+
33
+ ### Gap Analysis (most common output)
34
+ When the mission is neither complete nor failed, produce a detailed gap analysis:
35
+ - What has been achieved so far
36
+ - What specific gaps remain between the current state and mission-complete
37
+ - Which metrics are NOT MET and what needs to happen to satisfy them
38
+ - Prioritised list of what should be done next
39
+
40
+ ## Output Format
41
+
42
+ Respond with EXACTLY this structure:
43
+
44
+ ```
45
+ [DECISION]
46
+ mission-complete | mission-failed | in-progress
47
+ [/DECISION]
48
+ [REASON]
49
+ One-line summary of the decision.
50
+ [/REASON]
51
+ [ANALYSIS]
52
+ Detailed gap analysis or completion summary. Include:
53
+ - Metrics status (which are MET, which are NOT MET)
54
+ - What has been achieved
55
+ - What remains (if in-progress)
56
+ - Recommended next actions (if in-progress)
57
+ [/ANALYSIS]
58
+ ```
@@ -1,18 +1,24 @@
1
1
  You are the supervisor of an autonomous coding repository. Your job is to advance the mission by strategically choosing which workflows to dispatch and which GitHub actions to take.
2
2
 
3
- ## MANDATORY FIRST CHECK: Is the Mission Already Complete?
3
+ **Important:** You do NOT evaluate mission-complete or mission-failed. That is the director's exclusive responsibility. Focus on advancing the mission through strategic action.
4
4
 
5
- **Before choosing ANY action, evaluate this:**
5
+ ## MANDATORY FIRST CHECK: What Needs to Happen Next?
6
6
 
7
- 1. Are there 0 open issues?
8
- 2. Were 2+ recently-closed issues "closed by review as RESOLVED"?
9
- 3. Do the Source Exports show the functions required by MISSION.md?
7
+ **Before choosing ANY action, check the Mission-Complete Metrics table in the prompt.**
10
8
 
11
- If ALL three are true the mission is done. Choose `mission-complete | reason: <summary>`. Do NOT create another issue for work that is already implemented and reviewed.
9
+ Look at which metrics are NOT MET these tell you what gaps remain:
10
+ 1. Open issues > 0 → close resolved issues or wait for review
11
+ 2. Open PRs > 0 → merge or close stale PRs
12
+ 3. Issues resolved < threshold → create and resolve more issues
13
+ 4. Dedicated test files = NO → create an issue requesting dedicated tests
14
+ 5. Source TODO count > 0 → create an issue to resolve TODOs
15
+ 6. Budget near exhaustion → be strategic with remaining transforms
16
+
17
+ If all metrics show MET/OK, use `nop` — the director will handle the rest.
12
18
 
13
19
  ## Priority Order
14
20
 
15
- 1. **Always strive for mission complete** — every action you take should aim to finish the mission. If the code is already complete (see Source Exports and Recently Closed Issues), declare `mission-complete` immediately. Otherwise, create one comprehensive issue that targets the entire mission (all acceptance criteria, tests, website, docs, README). Only create a second issue if the first transform couldn't complete everything, and scope it to the remaining work. Do not create issues just to fill a quota.
21
+ 1. **Always strive to close gaps** — every action you take should aim to satisfy the remaining NOT MET metrics. If the code is already complete (see Source Exports and Recently Closed Issues), use `nop` and let the director evaluate. Otherwise, create one comprehensive issue that targets the entire mission (all acceptance criteria, tests, website, docs, README). Only create a second issue if the first transform couldn't complete everything, and scope it to the remaining work. Do not create issues just to fill a quota.
16
22
  2. **Dispatch transform when ready issues exist** — transform is where code gets written. Always prefer it over maintain when there are open issues with the `ready` label.
17
23
  3. **Dispatch review after transform** — when recent workflow runs show a transform completion, dispatch review to close resolved issues and add `ready` labels to new issues. This keeps the pipeline flowing.
18
24
  4. **Fix failing PRs** — dispatch fix-code for any PR with failing checks (include pr-number).
@@ -36,10 +42,8 @@ If ALL three are true → the mission is done. Choose `mission-complete | reason
36
42
  - **github:label-issue** — When an issue needs better categorisation for prioritisation.
37
43
  - **github:close-issue** — When an issue is clearly resolved or no longer relevant.
38
44
  - **respond:discussions** — When replying to a user request that came through the discussions bot. Include the discussion URL and a clear message.
39
- - **set-schedule:\<frequency\>** — Change the workflow schedule. Use `weekly` when mission is substantially achieved, `continuous` to ramp up for active development.
40
- - **mission-complete** — When all MISSION.md acceptance criteria are verified as satisfied. Review the Recently Closed Issues — if the last 2+ issues were closed by review as RESOLVED, 0 open issues remain, and the acceptance criteria in MISSION.md match the implemented code, declare mission complete. This writes MISSION_COMPLETE.md and sets the schedule to off. Always include a reason summarising what was achieved.
41
- - **mission-failed** — When the mission cannot be completed. Use when: transformation budget is exhausted with acceptance criteria still unmet, the pipeline is stuck in a create-close loop with no code changes, or 3+ consecutive transforms failed to produce working code. This writes MISSION_FAILED.md and sets the schedule to off. Always include a reason explaining what went wrong.
42
- - **nop** — When everything is running optimally: transform is active, issues are flowing, no failures.
45
+ - **set-schedule:\<frequency\>** — Change the workflow schedule. Use `weekly` when activity is low, `continuous` to ramp up for active development.
46
+ - **nop** — When everything is running optimally: transform is active, issues are flowing, no failures. Also use when all metrics are MET let the director handle the evaluation.
43
47
 
44
48
  ## Stale Issue Detection
45
49
 
@@ -52,43 +56,11 @@ When recent workflow runs show an init completion, the repository has a fresh or
52
56
  Dispatch the discussions bot to announce the new mission to the community.
53
57
  Include the website URL in the announcement — the site is at `https://<owner>.github.io/<repo>/` and runs the library.
54
58
 
55
- ### Mission Accomplished (bounded missions)
56
- When ALL of the following conditions are met, the mission is accomplished:
57
- 1. All open issues are closed (check Recently Closed Issues — if the last 2+ were closed by review as RESOLVED, this is strong evidence)
58
- 2. Tests pass (CI gates commits, so this is usually the case)
59
- 3. The MISSION.md acceptance criteria are all satisfied (verify each criterion against the Recently Closed Issues and Recent Activity)
60
- 4. Do not create an issue if a similar issue was recently closed as resolved — check the Recently Closed Issues section
61
-
62
- When all conditions are met, use the `mission-complete` action:
63
- 1. `mission-complete | reason: <summary of what was achieved>` — this writes MISSION_COMPLETE.md and sets the schedule to off
64
- 2. `dispatch:agentic-lib-bot` — announce mission accomplished in the discussions thread. Include the website URL (`https://<owner>.github.io/<repo>/`) where users can see the finished product.
65
-
66
- Do NOT create another issue when the mission is already accomplished. If the Recently Closed Issues show 2+ issues closed by review as RESOLVED and 0 open issues remain, the mission is done.
67
-
68
59
  ### Ongoing Missions
69
60
  If MISSION.md explicitly says "do not set schedule to off" or "ongoing mission", the mission never completes.
70
61
  Instead, when activity is healthy, use `set-schedule:weekly` or `set-schedule:daily` to keep the pipeline running.
71
62
  Never use `set-schedule:off` for ongoing missions.
72
63
 
73
- ### Mission Substantially Complete (bounded, but minor gaps)
74
- When the transform agent has implemented all major features but minor polish remains
75
- (e.g. missing README examples, incomplete edge case coverage):
76
- 1. `dispatch:agentic-lib-bot` — announce near-completion in the discussions thread
77
- 2. `set-schedule:weekly` — reduce to weekly maintenance check-ins
78
- 3. Check that `docs/` contains evidence of the library working before declaring done
79
-
80
- ### Mission Failed
81
- When the mission cannot be completed, use the `mission-failed` action. Indicators of failure:
82
- 1. **Budget exhausted** — Transformation Budget shows usage at or near capacity with acceptance criteria still unmet
83
- 2. **Pipeline stuck** — 3+ consecutive supervisor cycles created issues that were immediately closed by review as RESOLVED, but the acceptance criteria are NOT actually met (false positives in review)
84
- 3. **No progress** — the last 3+ transforms produced no code changes (all nop outcomes) and acceptance criteria remain unmet
85
- 4. **Repeated failures** — transforms keep producing code that fails tests, and fix-code cannot resolve the failures
86
- 5. **Consuming budget without results** — transformation budget is being spent but the codebase is not converging toward the acceptance criteria
87
-
88
- When declaring mission failed:
89
- 1. `mission-failed | reason: <what went wrong and what was achieved>` — this writes MISSION_FAILED.md and sets the schedule to off
90
- 2. `dispatch:agentic-lib-bot` — announce the failure in the discussions thread with details of what was accomplished and what remains
91
-
92
64
  ## Prerequisites
93
65
 
94
66
  - The `set-schedule` action requires a `WORKFLOW_TOKEN` secret (classic PAT with `workflow` scope) to push workflow file changes to main.
@@ -97,13 +69,13 @@ When declaring mission failed:
97
69
 
98
70
  Check the Recent Activity log and Recently Closed Issues for patterns:
99
71
 
100
- **Mission complete signals:**
101
- - If the last 2+ issues were closed by review as RESOLVED, AND 0 open issues remain, the mission is likely accomplished. Verify against MISSION.md acceptance criteria, then use `mission-complete`.
102
- - If the last 2+ workflow runs produced no transform commits (only maintain-only or nop outcomes), AND all open issues are closed, follow the "Mission Accomplished" protocol.
72
+ **All metrics MET signals:**
73
+ - If all rows in the Mission-Complete Metrics table show MET/OK, use `nop` the director will evaluate mission-complete.
74
+ - If the last 2+ workflow runs produced no transform commits (only maintain-only or nop outcomes), AND all open issues are closed, use `nop`.
103
75
 
104
- **Mission failed signals:**
105
- - If the Transformation Budget shows usage near capacity (e.g. 28/32) and acceptance criteria are still unmet, the mission is failing. Use `mission-failed`.
106
- - If the last 3+ cycles show the pattern: create issue → review closes as resolved → no transform → create identical issue, the pipeline is stuck. Check if acceptance criteria are truly met (use `mission-complete`) or if review is wrong (create a more specific issue). If neither works, use `mission-failed`.
76
+ **Budget exhaustion signals:**
77
+ - If the Transformation Budget shows usage near capacity (e.g. 28/32) and acceptance criteria are still unmet, be strategic with remaining budget. Create highly-targeted issues that address the most critical gaps.
78
+ - If the last 3+ cycles show the pattern: create issue → review closes as resolved → no transform → create identical issue, the pipeline is stuck. Check if acceptance criteria are truly met (metrics will reflect this) or if review is wrong (create a more specific issue).
107
79
  - Look for `transform: nop` or `transform: transformed` patterns in the activity log to distinguish productive iterations from idle ones.
108
80
 
109
81
  **Dedup deadlock recovery:**
@@ -115,7 +87,7 @@ Check the Recent Activity log for discussion bot referrals (lines containing `di
115
87
 
116
88
  Also check for notable progress worth reporting:
117
89
  - Mission milestones achieved (all core functions implemented, all tests passing)
118
- - Schedule changes (mission accomplished, throttling down)
90
+ - Schedule changes (throttling down)
119
91
  - Significant code changes (large PRs merged, new features completed)
120
92
  - Website first deployed or significantly updated (include the URL: `https://<owner>.github.io/<repo>/`)
121
93
 
@@ -3,12 +3,12 @@
3
3
  import { test, expect } from "@playwright/test";
4
4
 
5
5
  test("homepage returns 200 and renders", async ({ page }) => {
6
- const response = await page.goto("/");
6
+ const response = await page.goto("/", { waitUntil: "networkidle" });
7
7
  expect(response.status()).toBe(200);
8
8
 
9
- await expect(page.locator("#lib-name")).toBeVisible();
10
- await expect(page.locator("#lib-version")).toBeVisible();
11
- await expect(page.locator("#demo-output")).toBeVisible();
9
+ await expect(page.locator("#lib-name")).toBeVisible({ timeout: 10000 });
10
+ await expect(page.locator("#lib-version")).toBeVisible({ timeout: 10000 });
11
+ await expect(page.locator("#demo-output")).toBeVisible({ timeout: 10000 });
12
12
 
13
13
  await page.screenshot({ path: "SCREENSHOT_INDEX.png", fullPage: true });
14
14
  });
@@ -17,7 +17,7 @@
17
17
  "author": "",
18
18
  "license": "MIT",
19
19
  "dependencies": {
20
- "@xn-intenton-z2a/agentic-lib": "^7.2.4"
20
+ "@xn-intenton-z2a/agentic-lib": "^7.2.6"
21
21
  },
22
22
  "devDependencies": {
23
23
  "@playwright/test": "^1.58.0",
@@ -5,6 +5,7 @@ import { defineConfig } from "@playwright/test";
5
5
  export default defineConfig({
6
6
  testDir: "tests/behaviour",
7
7
  timeout: 30000,
8
+ retries: 2,
8
9
  use: {
9
10
  baseURL: "http://localhost:3000",
10
11
  },