@xn-intenton-z2a/agentic-lib 7.4.12 → 7.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/agents/agent-director.md +3 -0
- package/.github/agents/agent-implementation-review.md +63 -0
- package/.github/agents/agent-supervisor.md +11 -1
- package/.github/workflows/agentic-lib-workflow.yml +100 -48
- package/agentic-lib.toml +2 -2
- package/bin/agentic-lib.js +14 -1
- package/package.json +1 -1
- package/src/actions/agentic-step/index.js +32 -21
- package/src/actions/agentic-step/logging.js +81 -10
- package/src/actions/agentic-step/tasks/direct.js +39 -4
- package/src/actions/agentic-step/tasks/implementation-review.js +232 -0
- package/src/actions/agentic-step/tasks/supervise.js +34 -6
- package/src/actions/commit-if-changed/action.yml +2 -1
- package/src/copilot/config.js +1 -1
- package/src/copilot/guards.js +11 -5
- package/src/copilot/telemetry.js +29 -0
- package/src/seeds/zero-package.json +1 -1
|
@@ -27,6 +27,9 @@ Declare `mission-complete` when ALL of the following are true:
|
|
|
27
27
|
3. The Recently Closed Issues confirm that acceptance criteria have been addressed
|
|
28
28
|
4. No TODOs remain in source code
|
|
29
29
|
5. Dedicated test files exist (not just seed tests)
|
|
30
|
+
6. The Implementation Review shows no critical gaps (if review data is present)
|
|
31
|
+
|
|
32
|
+
**Important:** If the Implementation Review section is present in your prompt and identifies critical gaps — missing implementations, untested features, or misleading metrics — do NOT declare mission-complete even if other metrics are met. The review is ground-truth evidence; metrics can be misleading.
|
|
30
33
|
|
|
31
34
|
### Mission Failed
|
|
32
35
|
Declare `mission-failed` when ANY of the following are true:
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Trace mission elements through source code, tests, website, and behaviour tests to verify implementation completeness
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
You are an implementation review agent for an autonomous coding repository. Your job is to provide **ground-truth evidence** that the mission is actually implemented — not just that metrics say it is.
|
|
6
|
+
|
|
7
|
+
## Your Role
|
|
8
|
+
|
|
9
|
+
You do NOT write code, create issues, or dispatch workflows. You ONLY review and report. Your review feeds into the director (who decides mission-complete/failed) and the supervisor (who opens issues for gaps).
|
|
10
|
+
|
|
11
|
+
## Why This Matters
|
|
12
|
+
|
|
13
|
+
Metrics can be misleading:
|
|
14
|
+
- Issues closed in error look like "resolved" issues
|
|
15
|
+
- Trivial tests (empty assertions, tests that always pass) inflate test counts
|
|
16
|
+
- Features marked "done" in documentation but missing from actual code
|
|
17
|
+
- PRs merged that don't actually implement what the issue requested
|
|
18
|
+
|
|
19
|
+
Your job is to look past the metrics and verify the actual state of the code.
|
|
20
|
+
|
|
21
|
+
## Review Process
|
|
22
|
+
|
|
23
|
+
### Step 1: Decompose the Mission
|
|
24
|
+
Read MISSION.md and break it into discrete deliverable elements. Each element should be a specific capability or feature that the mission requires.
|
|
25
|
+
|
|
26
|
+
### Step 2: Trace Each Element
|
|
27
|
+
For each mission element, search the codebase:
|
|
28
|
+
|
|
29
|
+
1. **Implementation** (`src/lib/`): Is there actual code that implements this? Look for functions, classes, or modules that provide the capability. Read the code to verify it's substantive, not just a stub.
|
|
30
|
+
|
|
31
|
+
2. **Unit Tests** (`tests/`): Are there test files that import from `src/lib/` and test this element? Read the tests to verify they make meaningful assertions — not just `expect(true).toBe(true)`.
|
|
32
|
+
|
|
33
|
+
3. **Behaviour Tests** (`tests/behaviour/` or Playwright tests): Are there end-to-end tests that exercise this element? Check that they interact with the actual feature, not just load a page.
|
|
34
|
+
|
|
35
|
+
4. **Website Usage** (`src/web/`, `docs/`): Does the website actually use this feature? Look for imports from `src/lib/` or API calls that surface the feature to users.
|
|
36
|
+
|
|
37
|
+
5. **Integration Path**: How does the website access the library? Direct import, script tag, API endpoint? Document the actual mechanism.
|
|
38
|
+
|
|
39
|
+
6. **Behaviour Coverage**: Do the behaviour tests verify that the website presents this specific feature? Check that Playwright tests assert on feature-specific content, not just generic page structure.
|
|
40
|
+
|
|
41
|
+
### Step 3: Identify Misleading Metrics
|
|
42
|
+
Look for patterns that could give false confidence:
|
|
43
|
+
- Recently closed issues that have no associated commits or PRs
|
|
44
|
+
- Test files that exist but don't test the claimed feature
|
|
45
|
+
- Documentation that claims completion without corresponding code
|
|
46
|
+
- Issues closed with "not planned" that might have been legitimate work items
|
|
47
|
+
|
|
48
|
+
### Step 4: Report
|
|
49
|
+
Call `report_implementation_review` with:
|
|
50
|
+
- **elements**: Each mission element with its trace results
|
|
51
|
+
- **gaps**: Specific missing pieces with severity ratings
|
|
52
|
+
- **advice**: One English sentence summarising completeness
|
|
53
|
+
- **misleadingMetrics**: Any metrics that don't reflect reality
|
|
54
|
+
|
|
55
|
+
## Severity Guide
|
|
56
|
+
|
|
57
|
+
- **critical**: Mission element is not implemented at all, or a core feature has no tests
|
|
58
|
+
- **moderate**: Feature exists but lacks test coverage, or website doesn't expose it
|
|
59
|
+
- **low**: Minor coverage gaps, documentation issues, or cosmetic concerns
|
|
60
|
+
|
|
61
|
+
## Output
|
|
62
|
+
|
|
63
|
+
You MUST call `report_implementation_review` exactly once with your complete findings.
|
|
@@ -18,7 +18,17 @@ Look at which metrics are NOT MET — these tell you what gaps remain:
|
|
|
18
18
|
5. Source TODO count > 0 → create an issue to resolve TODOs
|
|
19
19
|
6. Budget near exhaustion → be strategic with remaining transforms
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
7. Implementation review gaps → create issues with label `implementation-gap` for critical gaps
|
|
22
|
+
|
|
23
|
+
If all metrics show MET/OK and no implementation review gaps exist, use `nop` — the director will handle the rest.
|
|
24
|
+
|
|
25
|
+
### Implementation Review
|
|
26
|
+
|
|
27
|
+
If an **Implementation Review** section is present in the prompt, examine it carefully. The review traces each mission element through source code, tests, website, and behaviour tests. It provides ground-truth evidence of what is actually implemented — not just what metrics suggest.
|
|
28
|
+
|
|
29
|
+
- **Critical gaps** should result in creating a focused issue (label: `implementation-gap`) that describes exactly what is missing
|
|
30
|
+
- **Moderate gaps** should be noted but may not need immediate action
|
|
31
|
+
- **Misleading metrics** should inform your decision-making — don't take actions based on metrics the review has flagged as unreliable
|
|
22
32
|
|
|
23
33
|
## Priority Order
|
|
24
34
|
|
|
@@ -178,16 +178,16 @@ jobs:
|
|
|
178
178
|
echo "dry-run=${DRY_RUN}" >> $GITHUB_OUTPUT
|
|
179
179
|
CONFIG='${{ inputs.config-path }}'
|
|
180
180
|
echo "config-path=${CONFIG:-${{ env.configPath }}}" >> $GITHUB_OUTPUT
|
|
181
|
-
# Bot config: log
|
|
182
|
-
|
|
181
|
+
# Bot config: log prefix, log branch, screenshot file
|
|
182
|
+
LOG_PREFIX=""
|
|
183
183
|
LOG_BRANCH=""
|
|
184
184
|
SCREENSHOT=""
|
|
185
185
|
if [ -f "${{ env.configPath }}" ]; then
|
|
186
|
-
|
|
186
|
+
LOG_PREFIX=$(grep '^\s*log-prefix' "${{ env.configPath }}" | head -1 | sed 's/.*= *"\([^"]*\)".*/\1/' || true)
|
|
187
187
|
LOG_BRANCH=$(grep '^\s*log-branch' "${{ env.configPath }}" | head -1 | sed 's/.*= *"\([^"]*\)".*/\1/' || true)
|
|
188
188
|
SCREENSHOT=$(grep '^\s*screenshot-file' "${{ env.configPath }}" | head -1 | sed 's/.*= *"\([^"]*\)".*/\1/' || true)
|
|
189
189
|
fi
|
|
190
|
-
echo "log-
|
|
190
|
+
echo "log-prefix=${LOG_PREFIX:-agent-log-}" >> $GITHUB_OUTPUT
|
|
191
191
|
echo "log-branch=${LOG_BRANCH:-agentic-lib-logs}" >> $GITHUB_OUTPUT
|
|
192
192
|
echo "screenshot-file=${SCREENSHOT:-SCREENSHOT_INDEX.png}" >> $GITHUB_OUTPUT
|
|
193
193
|
outputs:
|
|
@@ -200,7 +200,7 @@ jobs:
|
|
|
200
200
|
pr-number: ${{ steps.normalise.outputs.pr-number }}
|
|
201
201
|
dry-run: ${{ steps.normalise.outputs.dry-run }}
|
|
202
202
|
config-path: ${{ steps.normalise.outputs.config-path }}
|
|
203
|
-
log-
|
|
203
|
+
log-prefix: ${{ steps.normalise.outputs.log-prefix }}
|
|
204
204
|
log-branch: ${{ steps.normalise.outputs.log-branch }}
|
|
205
205
|
screenshot-file: ${{ steps.normalise.outputs.screenshot-file }}
|
|
206
206
|
|
|
@@ -373,7 +373,6 @@ jobs:
|
|
|
373
373
|
id: gather
|
|
374
374
|
uses: actions/github-script@v8
|
|
375
375
|
env:
|
|
376
|
-
LOG_FILE: ${{ needs.params.outputs.log-file }}
|
|
377
376
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
378
377
|
with:
|
|
379
378
|
script: |
|
|
@@ -478,32 +477,28 @@ jobs:
|
|
|
478
477
|
const missionComplete = fs.existsSync('MISSION_COMPLETE.md');
|
|
479
478
|
const missionFailed = fs.existsSync('MISSION_FAILED.md');
|
|
480
479
|
|
|
481
|
-
// Activity log stats (
|
|
482
|
-
const logFile = process.env.LOG_FILE || 'intentïon.md';
|
|
480
|
+
// Activity log stats (from agent-log files on log branch)
|
|
483
481
|
const logBranch = process.env.LOG_BRANCH || 'agentic-lib-logs';
|
|
484
482
|
let activityStats = null;
|
|
485
483
|
try {
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
484
|
+
const { data: tree } = await github.rest.git.getTree({
|
|
485
|
+
owner, repo, tree_sha: logBranch, recursive: false,
|
|
486
|
+
});
|
|
487
|
+
const logFiles = tree.tree
|
|
488
|
+
.filter(f => f.path.startsWith('agent-log-') && f.path.endsWith('.md'))
|
|
489
|
+
.sort((a, b) => a.path.localeCompare(b.path));
|
|
490
|
+
activityStats = { entries: logFiles.length, totalTransformCost: 0 };
|
|
491
|
+
// Sum costs from the most recent 10 log files
|
|
492
|
+
const recent = logFiles.slice(-10);
|
|
493
|
+
for (const lf of recent) {
|
|
489
494
|
try {
|
|
490
495
|
const { data } = await github.rest.repos.getContent({
|
|
491
|
-
owner, repo, path: lf, ref: logBranch,
|
|
496
|
+
owner, repo, path: lf.path, ref: logBranch,
|
|
492
497
|
});
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
// Fall back to local file
|
|
498
|
-
if (!log) {
|
|
499
|
-
const logPath = fs.existsSync(logFile) ? logFile : (fs.existsSync('intention.md') ? 'intention.md' : null);
|
|
500
|
-
if (logPath) log = fs.readFileSync(logPath, 'utf8');
|
|
501
|
-
}
|
|
502
|
-
if (log) {
|
|
503
|
-
const entries = log.split('\n## ').length - 1;
|
|
504
|
-
const costMatches = [...log.matchAll(/\*\*agentic-lib transformation cost:\*\* (\d+)/g)];
|
|
505
|
-
const totalCost = costMatches.reduce((sum, m) => sum + parseInt(m[1], 10), 0);
|
|
506
|
-
activityStats = { entries, totalTransformCost: totalCost };
|
|
498
|
+
const content = Buffer.from(data.content, 'base64').toString('utf8');
|
|
499
|
+
const costMatches = [...content.matchAll(/\*\*agentic-lib transformation cost:\*\* (\d+)/g)];
|
|
500
|
+
activityStats.totalTransformCost += costMatches.reduce((sum, m) => sum + parseInt(m[1], 10), 0);
|
|
501
|
+
} catch { /* skip unreadable files */ }
|
|
507
502
|
}
|
|
508
503
|
} catch (e) {}
|
|
509
504
|
|
|
@@ -592,13 +587,10 @@ jobs:
|
|
|
592
587
|
|
|
593
588
|
- name: Fetch log and screenshot from log branch
|
|
594
589
|
env:
|
|
595
|
-
LOG_FILE: ${{ needs.params.outputs.log-file }}
|
|
596
590
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
597
591
|
SCREENSHOT_FILE: ${{ needs.params.outputs.screenshot-file }}
|
|
598
592
|
run: |
|
|
599
|
-
|
|
600
|
-
git show "origin/${LOG_BRANCH}:${f}" > "$f" 2>/dev/null || true
|
|
601
|
-
done
|
|
593
|
+
git show "origin/${LOG_BRANCH}:${SCREENSHOT_FILE}" > "${SCREENSHOT_FILE}" 2>/dev/null || true
|
|
602
594
|
|
|
603
595
|
- name: Check mission-complete signal
|
|
604
596
|
id: mission-check
|
|
@@ -708,11 +700,76 @@ jobs:
|
|
|
708
700
|
if: github.repository != 'xn-intenton-z2a/agentic-lib' && needs.params.outputs.dry-run != 'true'
|
|
709
701
|
env:
|
|
710
702
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
711
|
-
run: bash .github/agentic-lib/scripts/push-to-logs.sh
|
|
703
|
+
run: bash .github/agentic-lib/scripts/push-to-logs.sh agent-log-*.md
|
|
704
|
+
|
|
705
|
+
# ─── Implementation Review: traces mission elements through code/tests/website ──
|
|
706
|
+
implementation-review:
|
|
707
|
+
needs: [params]
|
|
708
|
+
if: |
|
|
709
|
+
!cancelled() &&
|
|
710
|
+
(needs.params.outputs.mode == 'full' || needs.params.outputs.mode == 'maintain-only') &&
|
|
711
|
+
needs.params.result == 'success'
|
|
712
|
+
runs-on: ubuntu-latest
|
|
713
|
+
outputs:
|
|
714
|
+
review-advice: ${{ steps.review.outputs.completeness-advice }}
|
|
715
|
+
review-gaps: ${{ steps.review.outputs.gaps }}
|
|
716
|
+
steps:
|
|
717
|
+
- uses: actions/checkout@v6
|
|
718
|
+
with:
|
|
719
|
+
fetch-depth: 0
|
|
720
|
+
ref: ${{ inputs.ref || github.sha }}
|
|
721
|
+
|
|
722
|
+
- name: Fetch log and agent logs from log branch
|
|
723
|
+
env:
|
|
724
|
+
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
725
|
+
SCREENSHOT_FILE: ${{ needs.params.outputs.screenshot-file }}
|
|
726
|
+
run: |
|
|
727
|
+
git show "origin/${LOG_BRANCH}:${SCREENSHOT_FILE}" > "${SCREENSHOT_FILE}" 2>/dev/null || true
|
|
728
|
+
mkdir -p .agent-logs
|
|
729
|
+
git fetch origin "${LOG_BRANCH}" 2>/dev/null || true
|
|
730
|
+
for f in $(git ls-tree --name-only "origin/${LOG_BRANCH}" 2>/dev/null | grep '^agent-log-' || true); do
|
|
731
|
+
git show "origin/${LOG_BRANCH}:${f}" > ".agent-logs/${f}" 2>/dev/null || true
|
|
732
|
+
done
|
|
733
|
+
echo "Fetched $(ls .agent-logs/agent-log-*.md 2>/dev/null | wc -l | tr -d ' ') agent log files"
|
|
734
|
+
|
|
735
|
+
- uses: actions/setup-node@v6
|
|
736
|
+
with:
|
|
737
|
+
node-version: "24"
|
|
738
|
+
|
|
739
|
+
- name: Self-init (agentic-lib dev only)
|
|
740
|
+
if: hashFiles('scripts/self-init.sh') != '' && hashFiles('.github/agentic-lib/actions/agentic-step/package.json') == ''
|
|
741
|
+
run: bash scripts/self-init.sh
|
|
742
|
+
|
|
743
|
+
- name: Install agentic-step dependencies
|
|
744
|
+
working-directory: .github/agentic-lib/actions/agentic-step
|
|
745
|
+
run: |
|
|
746
|
+
npm ci
|
|
747
|
+
if [ -d "../../copilot" ]; then
|
|
748
|
+
ln -sf "$(pwd)/node_modules" ../../copilot/node_modules
|
|
749
|
+
fi
|
|
750
|
+
|
|
751
|
+
- name: Run implementation review
|
|
752
|
+
id: review
|
|
753
|
+
if: github.repository != 'xn-intenton-z2a/agentic-lib'
|
|
754
|
+
uses: ./.github/agentic-lib/actions/agentic-step
|
|
755
|
+
env:
|
|
756
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
757
|
+
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
|
|
758
|
+
with:
|
|
759
|
+
task: "implementation-review"
|
|
760
|
+
config: ${{ needs.params.outputs.config-path }}
|
|
761
|
+
instructions: ".github/agents/agent-implementation-review.md"
|
|
762
|
+
model: ${{ needs.params.outputs.model }}
|
|
763
|
+
|
|
764
|
+
- name: Push log to log branch
|
|
765
|
+
if: github.repository != 'xn-intenton-z2a/agentic-lib' && needs.params.outputs.dry-run != 'true'
|
|
766
|
+
env:
|
|
767
|
+
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
768
|
+
run: bash .github/agentic-lib/scripts/push-to-logs.sh agent-log-*.md
|
|
712
769
|
|
|
713
770
|
# ─── Director: LLM evaluates mission status (complete/failed/in-progress) ──
|
|
714
771
|
director:
|
|
715
|
-
needs: [params, telemetry, maintain]
|
|
772
|
+
needs: [params, telemetry, maintain, implementation-review]
|
|
716
773
|
if: |
|
|
717
774
|
!cancelled() &&
|
|
718
775
|
(needs.params.outputs.mode == 'full' || needs.params.outputs.mode == 'dev-only') &&
|
|
@@ -729,13 +786,10 @@ jobs:
|
|
|
729
786
|
|
|
730
787
|
- name: Fetch log and screenshot from log branch
|
|
731
788
|
env:
|
|
732
|
-
LOG_FILE: ${{ needs.params.outputs.log-file }}
|
|
733
789
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
734
790
|
SCREENSHOT_FILE: ${{ needs.params.outputs.screenshot-file }}
|
|
735
791
|
run: |
|
|
736
|
-
|
|
737
|
-
git show "origin/${LOG_BRANCH}:${f}" > "$f" 2>/dev/null || true
|
|
738
|
-
done
|
|
792
|
+
git show "origin/${LOG_BRANCH}:${SCREENSHOT_FILE}" > "${SCREENSHOT_FILE}" 2>/dev/null || true
|
|
739
793
|
|
|
740
794
|
- uses: actions/setup-node@v6
|
|
741
795
|
with:
|
|
@@ -760,6 +814,8 @@ jobs:
|
|
|
760
814
|
env:
|
|
761
815
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
762
816
|
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
|
|
817
|
+
REVIEW_ADVICE: ${{ needs.implementation-review.outputs.review-advice }}
|
|
818
|
+
REVIEW_GAPS: ${{ needs.implementation-review.outputs.review-gaps }}
|
|
763
819
|
with:
|
|
764
820
|
task: "direct"
|
|
765
821
|
config: ${{ needs.params.outputs.config-path }}
|
|
@@ -770,11 +826,11 @@ jobs:
|
|
|
770
826
|
if: github.repository != 'xn-intenton-z2a/agentic-lib' && needs.params.outputs.dry-run != 'true'
|
|
771
827
|
env:
|
|
772
828
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
773
|
-
run: bash .github/agentic-lib/scripts/push-to-logs.sh
|
|
829
|
+
run: bash .github/agentic-lib/scripts/push-to-logs.sh agent-log-*.md
|
|
774
830
|
|
|
775
831
|
# ─── Supervisor: LLM decides what to do (after director evaluates) ──
|
|
776
832
|
supervisor:
|
|
777
|
-
needs: [params, pr-cleanup, telemetry, maintain, director]
|
|
833
|
+
needs: [params, pr-cleanup, telemetry, maintain, implementation-review, director]
|
|
778
834
|
if: |
|
|
779
835
|
!cancelled() &&
|
|
780
836
|
(needs.params.outputs.mode == 'full' || needs.params.outputs.mode == 'dev-only') &&
|
|
@@ -790,13 +846,10 @@ jobs:
|
|
|
790
846
|
|
|
791
847
|
- name: Fetch log and screenshot from log branch
|
|
792
848
|
env:
|
|
793
|
-
LOG_FILE: ${{ needs.params.outputs.log-file }}
|
|
794
849
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
795
850
|
SCREENSHOT_FILE: ${{ needs.params.outputs.screenshot-file }}
|
|
796
851
|
run: |
|
|
797
|
-
|
|
798
|
-
git show "origin/${LOG_BRANCH}:${f}" > "$f" 2>/dev/null || true
|
|
799
|
-
done
|
|
852
|
+
git show "origin/${LOG_BRANCH}:${SCREENSHOT_FILE}" > "${SCREENSHOT_FILE}" 2>/dev/null || true
|
|
800
853
|
|
|
801
854
|
- uses: actions/setup-node@v6
|
|
802
855
|
with:
|
|
@@ -821,6 +874,8 @@ jobs:
|
|
|
821
874
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
822
875
|
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
|
|
823
876
|
INPUT_MESSAGE: ${{ needs.params.outputs.message }}
|
|
877
|
+
REVIEW_ADVICE: ${{ needs.implementation-review.outputs.review-advice }}
|
|
878
|
+
REVIEW_GAPS: ${{ needs.implementation-review.outputs.review-gaps }}
|
|
824
879
|
with:
|
|
825
880
|
task: "supervise"
|
|
826
881
|
config: ${{ needs.params.outputs.config-path }}
|
|
@@ -831,7 +886,7 @@ jobs:
|
|
|
831
886
|
if: github.repository != 'xn-intenton-z2a/agentic-lib' && needs.params.outputs.dry-run != 'true'
|
|
832
887
|
env:
|
|
833
888
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
834
|
-
run: bash .github/agentic-lib/scripts/push-to-logs.sh
|
|
889
|
+
run: bash .github/agentic-lib/scripts/push-to-logs.sh agent-log-*.md
|
|
835
890
|
|
|
836
891
|
# ─── Fix stuck PRs with failing checks ─────────────────────────────
|
|
837
892
|
fix-stuck:
|
|
@@ -1227,13 +1282,10 @@ jobs:
|
|
|
1227
1282
|
|
|
1228
1283
|
- name: Fetch log and screenshot from log branch
|
|
1229
1284
|
env:
|
|
1230
|
-
LOG_FILE: ${{ needs.params.outputs.log-file }}
|
|
1231
1285
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
1232
1286
|
SCREENSHOT_FILE: ${{ needs.params.outputs.screenshot-file }}
|
|
1233
1287
|
run: |
|
|
1234
|
-
|
|
1235
|
-
git show "origin/${LOG_BRANCH}:${f}" > "$f" 2>/dev/null || true
|
|
1236
|
-
done
|
|
1288
|
+
git show "origin/${LOG_BRANCH}:${SCREENSHOT_FILE}" > "${SCREENSHOT_FILE}" 2>/dev/null || true
|
|
1237
1289
|
|
|
1238
1290
|
- uses: actions/setup-node@v6
|
|
1239
1291
|
with:
|
|
@@ -1403,7 +1455,7 @@ jobs:
|
|
|
1403
1455
|
if: github.repository != 'xn-intenton-z2a/agentic-lib' && needs.params.outputs.dry-run != 'true'
|
|
1404
1456
|
env:
|
|
1405
1457
|
LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
|
|
1406
|
-
run: bash .github/agentic-lib/scripts/push-to-logs.sh
|
|
1458
|
+
run: bash .github/agentic-lib/scripts/push-to-logs.sh agent-log-*.md
|
|
1407
1459
|
|
|
1408
1460
|
- name: Create PR and attempt merge
|
|
1409
1461
|
if: github.repository != 'xn-intenton-z2a/agentic-lib' && steps.issue.outputs.issue-number != '' && needs.params.outputs.dry-run != 'true' && steps.pre-commit-test.outputs.tests-passed == 'true' && steps.pre-commit-behaviour-test.outputs.tests-passed != 'false'
|
package/agentic-lib.toml
CHANGED
|
@@ -114,6 +114,6 @@ min-dedicated-tests = 0 # minimum test files that import from src/lib
|
|
|
114
114
|
max-source-todos = 0 # max TODO comments allowed in ./src (0 = none)
|
|
115
115
|
|
|
116
116
|
[bot]
|
|
117
|
-
log-
|
|
118
|
-
log-branch = "agentic-lib-logs"
|
|
117
|
+
log-prefix = "tmp/agent-log-" #@dist "agent-log-"
|
|
118
|
+
log-branch = "main" #@dist "agentic-lib-logs"
|
|
119
119
|
screenshot-file = "SCREENSHOT_INDEX.png"
|
package/bin/agentic-lib.js
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
// npx @xn-intenton-z2a/agentic-lib maintain-library
|
|
15
15
|
// npx @xn-intenton-z2a/agentic-lib fix-code
|
|
16
16
|
|
|
17
|
-
import { copyFileSync, existsSync, mkdirSync, rmSync, rmdirSync, readdirSync, readFileSync, writeFileSync } from "fs";
|
|
17
|
+
import { copyFileSync, existsSync, mkdirSync, rmSync, rmdirSync, readdirSync, readFileSync, writeFileSync, unlinkSync } from "fs";
|
|
18
18
|
import { applyDistTransform } from "../src/dist-transform.js";
|
|
19
19
|
import { resolve, dirname, join } from "path";
|
|
20
20
|
import { fileURLToPath } from "url";
|
|
@@ -785,6 +785,19 @@ function initPurge(seedsDir, missionName, initTimestamp) {
|
|
|
785
785
|
initTransformFile(tomlSource, resolve(target, "agentic-lib.toml"), "SEED: agentic-lib.toml (transformed)");
|
|
786
786
|
}
|
|
787
787
|
|
|
788
|
+
// Clear agent log files (written by implementation-review and other tasks)
|
|
789
|
+
try {
|
|
790
|
+
const agentLogs = readdirSync(target).filter((f) => f.startsWith("agent-log-") && f.endsWith(".md"));
|
|
791
|
+
for (const f of agentLogs) {
|
|
792
|
+
console.log(` DELETE: ${f} (agent log)`);
|
|
793
|
+
if (!dryRun) {
|
|
794
|
+
unlinkSync(resolve(target, f));
|
|
795
|
+
}
|
|
796
|
+
initChanges++;
|
|
797
|
+
}
|
|
798
|
+
if (agentLogs.length > 0) console.log(` Cleared ${agentLogs.length} agent log file(s)`);
|
|
799
|
+
} catch { /* ignore — directory may not have agent logs */ }
|
|
800
|
+
|
|
788
801
|
// Copy mission seed file as MISSION.md
|
|
789
802
|
const missionsDir = resolve(seedsDir, "missions");
|
|
790
803
|
const missionFile = resolve(missionsDir, `${missionName}.md`);
|
package/package.json
CHANGED
|
@@ -8,11 +8,11 @@
|
|
|
8
8
|
import * as core from "@actions/core";
|
|
9
9
|
import * as github from "@actions/github";
|
|
10
10
|
import { loadConfig, getWritablePaths } from "./config-loader.js";
|
|
11
|
-
import {
|
|
12
|
-
import { readFileSync, existsSync } from "fs";
|
|
11
|
+
import { generateClosingNotes, writeAgentLog } from "./logging.js";
|
|
12
|
+
import { readFileSync, existsSync, readdirSync } from "fs";
|
|
13
13
|
import {
|
|
14
14
|
buildMissionMetrics, buildMissionReadiness,
|
|
15
|
-
computeTransformationCost,
|
|
15
|
+
computeTransformationCost, buildLimitsStatus,
|
|
16
16
|
} from "../../copilot/telemetry.js";
|
|
17
17
|
import {
|
|
18
18
|
checkInstabilityLabel, countDedicatedTests,
|
|
@@ -30,12 +30,14 @@ import { reviewIssue } from "./tasks/review-issue.js";
|
|
|
30
30
|
import { discussions } from "./tasks/discussions.js";
|
|
31
31
|
import { supervise } from "./tasks/supervise.js";
|
|
32
32
|
import { direct } from "./tasks/direct.js";
|
|
33
|
+
import { implementationReview } from "./tasks/implementation-review.js";
|
|
33
34
|
|
|
34
35
|
const TASKS = {
|
|
35
36
|
"resolve-issue": resolveIssue, "fix-code": fixCode, "transform": transform,
|
|
36
37
|
"maintain-features": maintainFeatures, "maintain-library": maintainLibrary,
|
|
37
38
|
"enhance-issue": enhanceIssue, "review-issue": reviewIssue,
|
|
38
39
|
"discussions": discussions, "supervise": supervise, "direct": direct,
|
|
40
|
+
"implementation-review": implementationReview,
|
|
39
41
|
};
|
|
40
42
|
|
|
41
43
|
async function run() {
|
|
@@ -61,9 +63,22 @@ async function run() {
|
|
|
61
63
|
if (!handler) throw new Error(`Unknown task: ${task}. Available: ${Object.keys(TASKS).join(", ")}`);
|
|
62
64
|
|
|
63
65
|
// Resolve log and screenshot paths (fetched from agentic-lib-logs branch by workflow)
|
|
64
|
-
const
|
|
66
|
+
const logPrefix = config.intentionBot?.logPrefix || "agent-log-";
|
|
65
67
|
const screenshotFile = config.intentionBot?.screenshotFile || "SCREENSHOT_INDEX.png";
|
|
66
|
-
|
|
68
|
+
// Find the most recent agent-log file matching the prefix for LLM context
|
|
69
|
+
const logDir = logPrefix.includes("/") ? logPrefix.substring(0, logPrefix.lastIndexOf("/")) : ".";
|
|
70
|
+
const logBase = logPrefix.includes("/") ? logPrefix.substring(logPrefix.lastIndexOf("/") + 1) : logPrefix;
|
|
71
|
+
let logFilePath = null;
|
|
72
|
+
try {
|
|
73
|
+
const logFiles = readdirSync(logDir)
|
|
74
|
+
.filter(f => f.startsWith(logBase) && f.endsWith(".md"))
|
|
75
|
+
.sort();
|
|
76
|
+
if (logFiles.length > 0) {
|
|
77
|
+
const newest = logFiles[logFiles.length - 1];
|
|
78
|
+
const candidate = logDir === "." ? newest : `${logDir}/${newest}`;
|
|
79
|
+
if (existsSync(candidate)) logFilePath = candidate;
|
|
80
|
+
}
|
|
81
|
+
} catch { /* no log files yet */ }
|
|
67
82
|
const screenshotFilePath = existsSync(screenshotFile) ? screenshotFile : null;
|
|
68
83
|
|
|
69
84
|
const context = {
|
|
@@ -94,8 +109,7 @@ async function run() {
|
|
|
94
109
|
&& await checkInstabilityLabel(context, issueNumber);
|
|
95
110
|
if (isInstability) core.info(`Issue #${issueNumber} has instability label — does not count against budget`);
|
|
96
111
|
const transformationCost = computeTransformationCost(task, result.outcome, isInstability);
|
|
97
|
-
const
|
|
98
|
-
const cumulativeCost = readCumulativeCost(intentionFilepath) + transformationCost;
|
|
112
|
+
const cumulativeCost = transformationCost;
|
|
99
113
|
|
|
100
114
|
if (result.dedicatedTestCount == null || result.dedicatedTestCount === 0) {
|
|
101
115
|
try {
|
|
@@ -121,21 +135,18 @@ async function run() {
|
|
|
121
135
|
|
|
122
136
|
const missionMetrics = buildMissionMetrics(config, result, limitsStatus, cumulativeCost, featureIssueCount, maintenanceIssueCount);
|
|
123
137
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
profile: config.tuning?.profileName || "unknown",
|
|
133
|
-
changes: result.changes, contextNotes: result.contextNotes,
|
|
134
|
-
limitsStatus, promptBudget: result.promptBudget,
|
|
135
|
-
missionReadiness: buildMissionReadiness(missionMetrics),
|
|
136
|
-
missionMetrics, closingNotes: result.closingNotes || generateClosingNotes(limitsStatus),
|
|
137
|
-
transformationCost, narrative: result.narrative,
|
|
138
|
+
// Write standalone agent log file (pushed to agentic-lib-logs branch by workflow)
|
|
139
|
+
try {
|
|
140
|
+
const agentLogFile = writeAgentLog({
|
|
141
|
+
task, outcome: result.outcome || "completed",
|
|
142
|
+
model: result.model || model, durationMs, tokensUsed: result.tokensUsed,
|
|
143
|
+
narrative: result.narrative, contextNotes: result.contextNotes,
|
|
144
|
+
reviewTable: result.reviewTable, completenessAdvice: result.completenessAdvice,
|
|
145
|
+
missionMetrics,
|
|
138
146
|
});
|
|
147
|
+
core.info(`Agent log written: ${agentLogFile}`);
|
|
148
|
+
} catch (err) {
|
|
149
|
+
core.warning(`Could not write agent log: ${err.message}`);
|
|
139
150
|
}
|
|
140
151
|
|
|
141
152
|
core.info(`agentic-step completed: outcome=${result.outcome}`);
|
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
// Appends structured entries to the intentïon.md activity log,
|
|
6
6
|
// including commit URLs and safety-check outcomes.
|
|
7
7
|
|
|
8
|
-
import { writeFileSync, readFileSync, appendFileSync, existsSync, mkdirSync,
|
|
9
|
-
import { dirname
|
|
8
|
+
import { writeFileSync, readFileSync, appendFileSync, existsSync, mkdirSync, readdirSync } from "fs";
|
|
9
|
+
import { dirname } from "path";
|
|
10
10
|
import { join } from "path";
|
|
11
11
|
import * as core from "@actions/core";
|
|
12
12
|
|
|
@@ -160,16 +160,87 @@ export function logActivity({
|
|
|
160
160
|
writeFileSync(filepath, `# intentïon Activity Log\n${entry}`);
|
|
161
161
|
}
|
|
162
162
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Write a standalone agent log file for a single task execution.
|
|
167
|
+
* Each file is uniquely named with a filesystem-safe datetime stamp.
|
|
168
|
+
*
|
|
169
|
+
* @param {Object} options
|
|
170
|
+
* @param {string} options.task - The task name
|
|
171
|
+
* @param {string} options.outcome - The task outcome
|
|
172
|
+
* @param {string} [options.model] - Model used
|
|
173
|
+
* @param {number} [options.durationMs] - Task duration in milliseconds
|
|
174
|
+
* @param {string} [options.narrative] - LLM-generated narrative
|
|
175
|
+
* @param {Array} [options.reviewTable] - Implementation review table rows
|
|
176
|
+
* @param {string} [options.completenessAdvice] - English completeness assessment
|
|
177
|
+
* @param {string} [options.contextNotes] - Additional context notes
|
|
178
|
+
* @param {Array} [options.missionMetrics] - Mission metrics entries
|
|
179
|
+
* @param {number} [options.tokensUsed] - Total tokens consumed
|
|
180
|
+
* @returns {string} The filename of the written log file
|
|
181
|
+
*/
|
|
182
|
+
export function writeAgentLog({
|
|
183
|
+
task, outcome, model, durationMs, narrative,
|
|
184
|
+
reviewTable, completenessAdvice, contextNotes,
|
|
185
|
+
missionMetrics, tokensUsed,
|
|
186
|
+
}) {
|
|
187
|
+
const now = new Date();
|
|
188
|
+
const stamp = now.toISOString().replace(/:/g, "-").replace(/\./g, "-");
|
|
189
|
+
const filename = `agent-log-${stamp}.md`;
|
|
190
|
+
|
|
191
|
+
const parts = [
|
|
192
|
+
`# Agent Log: ${task} at ${now.toISOString()}`,
|
|
193
|
+
"",
|
|
194
|
+
"## Summary",
|
|
195
|
+
`**Task:** ${task}`,
|
|
196
|
+
`**Outcome:** ${outcome}`,
|
|
197
|
+
];
|
|
198
|
+
|
|
199
|
+
if (model) parts.push(`**Model:** ${model}`);
|
|
200
|
+
if (tokensUsed) parts.push(`**Tokens:** ${tokensUsed}`);
|
|
201
|
+
if (durationMs) {
|
|
202
|
+
const secs = Math.round(durationMs / 1000);
|
|
203
|
+
parts.push(`**Duration:** ${secs}s`);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (reviewTable && reviewTable.length > 0) {
|
|
207
|
+
parts.push("", "## Implementation Review");
|
|
208
|
+
parts.push("| Element | Implemented | Unit Tested | Behaviour Tested | Website Used | Notes |");
|
|
209
|
+
parts.push("|---------|-------------|-------------|------------------|--------------|-------|");
|
|
210
|
+
for (const row of reviewTable) {
|
|
211
|
+
parts.push(`| ${row.element || ""} | ${row.implemented || ""} | ${row.unitTested || ""} | ${row.behaviourTested || ""} | ${row.websiteUsed || ""} | ${row.notes || ""} |`);
|
|
171
212
|
}
|
|
172
213
|
}
|
|
214
|
+
|
|
215
|
+
if (completenessAdvice) {
|
|
216
|
+
parts.push("", "## Completeness Assessment");
|
|
217
|
+
parts.push(completenessAdvice);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (missionMetrics && missionMetrics.length > 0) {
|
|
221
|
+
parts.push("", "## Mission Metrics");
|
|
222
|
+
parts.push("| Metric | Value | Target | Status |");
|
|
223
|
+
parts.push("|--------|-------|--------|--------|");
|
|
224
|
+
for (const m of missionMetrics) {
|
|
225
|
+
parts.push(`| ${m.metric} | ${m.value} | ${m.target} | ${m.status} |`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (narrative) {
|
|
230
|
+
parts.push("", "## Narrative");
|
|
231
|
+
parts.push(narrative);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (contextNotes) {
|
|
235
|
+
parts.push("", "## Context Notes");
|
|
236
|
+
parts.push(contextNotes);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
parts.push("", "---");
|
|
240
|
+
parts.push(`Generated by agentic-step ${task} at ${now.toISOString()}`);
|
|
241
|
+
|
|
242
|
+
writeFileSync(filename, parts.join("\n"));
|
|
243
|
+
return filename;
|
|
173
244
|
}
|
|
174
245
|
|
|
175
246
|
/**
|
|
@@ -71,6 +71,14 @@ function buildMetricAssessment(ctx, config) {
|
|
|
71
71
|
const minTests = thresholds.minDedicatedTests ?? 1;
|
|
72
72
|
const maxTodos = thresholds.maxSourceTodos ?? 0;
|
|
73
73
|
|
|
74
|
+
// Implementation review gaps (passed from workflow via env)
|
|
75
|
+
let reviewGaps = [];
|
|
76
|
+
try {
|
|
77
|
+
const gapsJson = process.env.REVIEW_GAPS;
|
|
78
|
+
if (gapsJson) reviewGaps = JSON.parse(gapsJson);
|
|
79
|
+
} catch { /* ignore parse errors */ }
|
|
80
|
+
const criticalGaps = reviewGaps.filter((g) => g.severity === "critical");
|
|
81
|
+
|
|
74
82
|
const metrics = [
|
|
75
83
|
{ metric: "Open issues", value: ctx.issuesSummary.length, target: 0, met: ctx.issuesSummary.length === 0 },
|
|
76
84
|
{ metric: "Open PRs", value: ctx.prsSummary.length, target: 0, met: ctx.prsSummary.length === 0 },
|
|
@@ -78,6 +86,7 @@ function buildMetricAssessment(ctx, config) {
|
|
|
78
86
|
{ metric: "Dedicated tests", value: ctx.dedicatedTestCount, target: minTests, met: ctx.dedicatedTestCount >= minTests },
|
|
79
87
|
{ metric: "Source TODOs", value: ctx.sourceTodoCount, target: maxTodos, met: ctx.sourceTodoCount <= maxTodos },
|
|
80
88
|
{ metric: "Budget", value: ctx.cumulativeTransformationCost, target: ctx.transformationBudget || "unlimited", met: !(ctx.transformationBudget > 0 && ctx.cumulativeTransformationCost >= ctx.transformationBudget) },
|
|
89
|
+
{ metric: "Implementation review", value: criticalGaps.length === 0 ? "No critical gaps" : `${criticalGaps.length} critical gap(s)`, target: "No critical gaps", met: criticalGaps.length === 0 },
|
|
81
90
|
];
|
|
82
91
|
|
|
83
92
|
const allMet = metrics.every((m) => m.met);
|
|
@@ -124,10 +133,29 @@ function buildPrompt(ctx, agentInstructions, metricAssessment) {
|
|
|
124
133
|
`Source TODOs: ${ctx.sourceTodoCount}`,
|
|
125
134
|
`Transformation budget: ${ctx.cumulativeTransformationCost}/${ctx.transformationBudget || "unlimited"}`,
|
|
126
135
|
"",
|
|
136
|
+
...(process.env.REVIEW_ADVICE ? [
|
|
137
|
+
"## Implementation Review",
|
|
138
|
+
`**Completeness:** ${process.env.REVIEW_ADVICE}`,
|
|
139
|
+
...((() => {
|
|
140
|
+
try {
|
|
141
|
+
const gaps = JSON.parse(process.env.REVIEW_GAPS || "[]");
|
|
142
|
+
if (gaps.length > 0) {
|
|
143
|
+
return [
|
|
144
|
+
"",
|
|
145
|
+
"### Gaps Found",
|
|
146
|
+
...gaps.map((g) => `- [${g.severity}] ${g.element}: ${g.description} (${g.gapType})`),
|
|
147
|
+
];
|
|
148
|
+
}
|
|
149
|
+
} catch { /* ignore */ }
|
|
150
|
+
return [];
|
|
151
|
+
})()),
|
|
152
|
+
"",
|
|
153
|
+
] : []),
|
|
127
154
|
"## Your Task",
|
|
128
155
|
"Use list_issues and list_prs to review open work items.",
|
|
129
156
|
"Use read_file to inspect source code and tests for completeness.",
|
|
130
157
|
"Use git_diff or git_status for additional context if needed.",
|
|
158
|
+
"Consider the implementation review findings — if critical gaps exist, do NOT declare mission-complete.",
|
|
131
159
|
"Then call report_director_decision with your determination.",
|
|
132
160
|
"",
|
|
133
161
|
"**You MUST call report_director_decision exactly once.**",
|
|
@@ -233,10 +261,17 @@ export async function direct(context) {
|
|
|
233
261
|
|
|
234
262
|
// --- Gather context (similar to supervisor but focused on metrics) ---
|
|
235
263
|
const mission = readOptionalFile(config.paths.mission.path);
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
264
|
+
// Read cumulative cost from agent-log files
|
|
265
|
+
let cumulativeTransformationCost = 0;
|
|
266
|
+
try {
|
|
267
|
+
const { readdirSync } = await import("fs");
|
|
268
|
+
const logFiles = readdirSync(".").filter(f => f.startsWith("agent-log-") && f.endsWith(".md")).sort();
|
|
269
|
+
for (const f of logFiles) {
|
|
270
|
+
const content = readOptionalFile(f);
|
|
271
|
+
const costMatches = content.matchAll(/\*\*agentic-lib transformation cost:\*\* (\d+)/g);
|
|
272
|
+
cumulativeTransformationCost += [...costMatches].reduce((sum, m) => sum + parseInt(m[1], 10), 0);
|
|
273
|
+
}
|
|
274
|
+
} catch { /* no agent-log files yet */ }
|
|
240
275
|
|
|
241
276
|
const missionComplete = existsSync("MISSION_COMPLETE.md");
|
|
242
277
|
const missionFailed = existsSync("MISSION_FAILED.md");
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
// SPDX-License-Identifier: GPL-3.0-only
|
|
2
|
+
// Copyright (C) 2025-2026 Polycode Limited
|
|
3
|
+
// tasks/implementation-review.js — Trace mission elements through code, tests, website
|
|
4
|
+
//
|
|
5
|
+
// Uses runCopilotSession with read-only tools to decompose the mission and
|
|
6
|
+
// verify each element is implemented, tested, and presented on the website.
|
|
7
|
+
// Produces a structured review with test-result table and completeness advice.
|
|
8
|
+
|
|
9
|
+
import * as core from "@actions/core";
|
|
10
|
+
import { existsSync, readdirSync } from "fs";
|
|
11
|
+
import { readOptionalFile, extractNarrative, NARRATIVE_INSTRUCTION } from "../copilot.js";
|
|
12
|
+
import { runCopilotSession } from "../../../copilot/copilot-session.js";
|
|
13
|
+
import { createGitHubTools, createGitTools } from "../../../copilot/github-tools.js";
|
|
14
|
+
|
|
15
|
+
function buildReviewPrompt(mission, config, agentInstructions, agentLogsSummary) {
|
|
16
|
+
const sourcePath = config.paths?.source?.path || "src/lib/";
|
|
17
|
+
const testsPath = config.paths?.tests?.path || "tests/";
|
|
18
|
+
const webPath = config.paths?.web?.path || "src/web/";
|
|
19
|
+
const behaviourPath = config.paths?.behaviour?.path || "tests/behaviour/";
|
|
20
|
+
const featuresPath = config.paths?.features?.path || "features/";
|
|
21
|
+
|
|
22
|
+
return [
|
|
23
|
+
"## Instructions",
|
|
24
|
+
agentInstructions,
|
|
25
|
+
"",
|
|
26
|
+
"## Mission",
|
|
27
|
+
mission || "(no mission defined)",
|
|
28
|
+
"",
|
|
29
|
+
"## Repository Paths",
|
|
30
|
+
`- Source: \`${sourcePath}\``,
|
|
31
|
+
`- Tests: \`${testsPath}\``,
|
|
32
|
+
`- Web: \`${webPath}\``,
|
|
33
|
+
`- Behaviour tests: \`${behaviourPath}\``,
|
|
34
|
+
`- Features: \`${featuresPath}\``,
|
|
35
|
+
"",
|
|
36
|
+
...(agentLogsSummary ? ["## Previous Reviews", agentLogsSummary, ""] : []),
|
|
37
|
+
"## Your Task",
|
|
38
|
+
"1. Read MISSION.md and decompose it into discrete deliverable elements.",
|
|
39
|
+
"2. For each element, use list_files and read_file to trace it through:",
|
|
40
|
+
" - Source implementation in src/lib/",
|
|
41
|
+
" - Unit tests in tests/",
|
|
42
|
+
" - Behaviour tests (Playwright)",
|
|
43
|
+
" - Website usage in src/web/ or docs/",
|
|
44
|
+
" - Integration path (how website accesses the library)",
|
|
45
|
+
" - Behaviour test coverage of the website feature",
|
|
46
|
+
"3. Flag misleading patterns:",
|
|
47
|
+
" - Issues closed without corresponding code changes",
|
|
48
|
+
" - Tests that don't assert anything meaningful (empty/trivial)",
|
|
49
|
+
" - Features listed as done in docs but missing from code",
|
|
50
|
+
" - PRs merged without test coverage for the claimed feature",
|
|
51
|
+
"4. Call report_implementation_review with your findings.",
|
|
52
|
+
"",
|
|
53
|
+
"**You MUST call report_implementation_review exactly once.**",
|
|
54
|
+
].join("\n");
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Implementation review task: decompose mission, trace through code/tests/website.
|
|
59
|
+
*
|
|
60
|
+
* @param {Object} context - Task context from index.js
|
|
61
|
+
* @returns {Promise<Object>} Result with outcome, review data, tokensUsed, model
|
|
62
|
+
*/
|
|
63
|
+
export async function implementationReview(context) {
|
|
64
|
+
const { config, instructions, model, octokit, repo, logFilePath, screenshotFilePath } = context;
|
|
65
|
+
const t = config.tuning || {};
|
|
66
|
+
|
|
67
|
+
const mission = readOptionalFile(config.paths.mission.path);
|
|
68
|
+
if (!mission) {
|
|
69
|
+
return { outcome: "nop", details: "No mission defined — skipping implementation review" };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (existsSync("MISSION_COMPLETE.md") && config.supervisor !== "maintenance") {
|
|
73
|
+
return { outcome: "nop", details: "Mission already complete" };
|
|
74
|
+
}
|
|
75
|
+
if (existsSync("MISSION_FAILED.md")) {
|
|
76
|
+
return { outcome: "nop", details: "Mission already failed" };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Check for previous agent logs
|
|
80
|
+
const agentLogsDir = ".agent-logs";
|
|
81
|
+
let agentLogsSummary = "";
|
|
82
|
+
if (existsSync(agentLogsDir)) {
|
|
83
|
+
try {
|
|
84
|
+
const files = readdirSync(agentLogsDir).filter((f) => f.startsWith("agent-log-") && f.endsWith(".md"));
|
|
85
|
+
if (files.length > 0) {
|
|
86
|
+
agentLogsSummary = `${files.length} previous agent log file(s) available. Use list_files and read_file on .agent-logs/ to review them.`;
|
|
87
|
+
}
|
|
88
|
+
} catch { /* ignore */ }
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const agentInstructions = instructions || "Review the implementation completeness of the mission.";
|
|
92
|
+
const prompt = buildReviewPrompt(mission, config, agentInstructions, agentLogsSummary);
|
|
93
|
+
|
|
94
|
+
const systemPrompt =
|
|
95
|
+
"You are an implementation review agent for an autonomous coding repository. " +
|
|
96
|
+
"Your job is to trace each mission element through the codebase — verifying that it is " +
|
|
97
|
+
"implemented in source code, covered by unit tests, exercised by behaviour tests, " +
|
|
98
|
+
"presented on the website, and that the behaviour tests verify the website presentation. " +
|
|
99
|
+
"Focus on ground-truth evidence, not metrics. Metrics can be misleading — issues closed " +
|
|
100
|
+
"in error, trivial tests, or features marked done without code all create false confidence." +
|
|
101
|
+
NARRATIVE_INSTRUCTION;
|
|
102
|
+
|
|
103
|
+
// Shared mutable state to capture the review
|
|
104
|
+
const reviewResult = { elements: [], gaps: [], advice: "", misleadingMetrics: [] };
|
|
105
|
+
|
|
106
|
+
const createTools = (defineTool, _wp, logger) => {
|
|
107
|
+
const ghTools = createGitHubTools(octokit, repo, defineTool, logger);
|
|
108
|
+
const gitTools = createGitTools(defineTool, logger);
|
|
109
|
+
|
|
110
|
+
const reportReview = defineTool("report_implementation_review", {
|
|
111
|
+
description: "Report the implementation review findings. Call this exactly once with all traced elements, identified gaps, and completeness advice.",
|
|
112
|
+
parameters: {
|
|
113
|
+
type: "object",
|
|
114
|
+
properties: {
|
|
115
|
+
elements: {
|
|
116
|
+
type: "array",
|
|
117
|
+
items: {
|
|
118
|
+
type: "object",
|
|
119
|
+
properties: {
|
|
120
|
+
name: { type: "string", description: "Mission element name" },
|
|
121
|
+
implemented: { type: "boolean", description: "Found in source code" },
|
|
122
|
+
unitTested: { type: "boolean", description: "Has unit test coverage" },
|
|
123
|
+
behaviourTested: { type: "boolean", description: "Has behaviour/Playwright test coverage" },
|
|
124
|
+
websiteUsed: { type: "boolean", description: "Used on the website" },
|
|
125
|
+
integrationPath: { type: "string", description: "How the website accesses this feature" },
|
|
126
|
+
behaviourCoverage: { type: "string", description: "How behaviour tests verify website presentation" },
|
|
127
|
+
notes: { type: "string", description: "Additional observations" },
|
|
128
|
+
},
|
|
129
|
+
required: ["name", "implemented"],
|
|
130
|
+
},
|
|
131
|
+
description: "Mission elements traced through the codebase",
|
|
132
|
+
},
|
|
133
|
+
gaps: {
|
|
134
|
+
type: "array",
|
|
135
|
+
items: {
|
|
136
|
+
type: "object",
|
|
137
|
+
properties: {
|
|
138
|
+
element: { type: "string", description: "Which mission element has the gap" },
|
|
139
|
+
gapType: {
|
|
140
|
+
type: "string",
|
|
141
|
+
enum: ["not-implemented", "not-tested", "not-on-website", "no-behaviour-test", "misleading-metric"],
|
|
142
|
+
description: "Type of gap",
|
|
143
|
+
},
|
|
144
|
+
description: { type: "string", description: "What is missing" },
|
|
145
|
+
severity: {
|
|
146
|
+
type: "string",
|
|
147
|
+
enum: ["critical", "moderate", "low"],
|
|
148
|
+
description: "How important this gap is",
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
required: ["element", "gapType", "description", "severity"],
|
|
152
|
+
},
|
|
153
|
+
description: "Identified implementation gaps",
|
|
154
|
+
},
|
|
155
|
+
advice: { type: "string", description: "Single English sentence summarising overall completeness" },
|
|
156
|
+
misleadingMetrics: {
|
|
157
|
+
type: "array",
|
|
158
|
+
items: {
|
|
159
|
+
type: "object",
|
|
160
|
+
properties: {
|
|
161
|
+
metric: { type: "string", description: "Which metric is misleading" },
|
|
162
|
+
reason: { type: "string", description: "Why it is misleading" },
|
|
163
|
+
evidence: { type: "string", description: "What evidence supports this" },
|
|
164
|
+
},
|
|
165
|
+
required: ["metric", "reason"],
|
|
166
|
+
},
|
|
167
|
+
description: "Metrics that may be misleading about actual progress",
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
required: ["elements", "gaps", "advice"],
|
|
171
|
+
},
|
|
172
|
+
handler: async ({ elements, gaps, advice, misleadingMetrics }) => {
|
|
173
|
+
reviewResult.elements = elements || [];
|
|
174
|
+
reviewResult.gaps = gaps || [];
|
|
175
|
+
reviewResult.advice = advice || "";
|
|
176
|
+
reviewResult.misleadingMetrics = misleadingMetrics || [];
|
|
177
|
+
return { textResultForLlm: `Review recorded: ${elements?.length || 0} elements traced, ${gaps?.length || 0} gaps found` };
|
|
178
|
+
},
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
return [...ghTools, ...gitTools, reportReview];
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
const attachments = [];
|
|
185
|
+
if (logFilePath) attachments.push({ type: "file", path: logFilePath });
|
|
186
|
+
if (screenshotFilePath) attachments.push({ type: "file", path: screenshotFilePath });
|
|
187
|
+
|
|
188
|
+
const result = await runCopilotSession({
|
|
189
|
+
workspacePath: process.cwd(),
|
|
190
|
+
model,
|
|
191
|
+
tuning: t,
|
|
192
|
+
agentPrompt: systemPrompt,
|
|
193
|
+
userPrompt: prompt,
|
|
194
|
+
writablePaths: [],
|
|
195
|
+
createTools,
|
|
196
|
+
attachments,
|
|
197
|
+
excludedTools: ["write_file", "run_command", "run_tests", "dispatch_workflow", "close_issue", "label_issue", "post_discussion_comment", "create_issue", "comment_on_issue"],
|
|
198
|
+
logger: { info: core.info, warning: core.warning, error: core.error, debug: core.debug },
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
const tokensUsed = result.tokensIn + result.tokensOut;
|
|
202
|
+
|
|
203
|
+
// Build review table for logging
|
|
204
|
+
const reviewTable = reviewResult.elements.map((e) => ({
|
|
205
|
+
element: e.name,
|
|
206
|
+
implemented: e.implemented ? "YES" : "NO",
|
|
207
|
+
unitTested: e.unitTested ? "YES" : "NO",
|
|
208
|
+
behaviourTested: e.behaviourTested ? "YES" : "NO",
|
|
209
|
+
websiteUsed: e.websiteUsed ? "YES" : "NO",
|
|
210
|
+
notes: e.notes || "",
|
|
211
|
+
}));
|
|
212
|
+
|
|
213
|
+
// Set outputs for downstream jobs
|
|
214
|
+
core.setOutput("completeness-advice", (reviewResult.advice || "").substring(0, 500));
|
|
215
|
+
core.setOutput("gaps", JSON.stringify((reviewResult.gaps || []).slice(0, 20)));
|
|
216
|
+
core.setOutput("review-table", JSON.stringify(reviewTable));
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
outcome: "implementation-reviewed",
|
|
220
|
+
tokensUsed,
|
|
221
|
+
inputTokens: result.tokensIn,
|
|
222
|
+
outputTokens: result.tokensOut,
|
|
223
|
+
cost: 0,
|
|
224
|
+
model,
|
|
225
|
+
narrative: result.narrative || reviewResult.advice,
|
|
226
|
+
reviewTable,
|
|
227
|
+
reviewGaps: reviewResult.gaps,
|
|
228
|
+
completenessAdvice: reviewResult.advice,
|
|
229
|
+
misleadingMetrics: reviewResult.misleadingMetrics,
|
|
230
|
+
details: `Traced ${reviewResult.elements.length} element(s), found ${reviewResult.gaps.length} gap(s)`,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
@@ -106,12 +106,20 @@ async function postDirectReply(octokit, repo, nodeId, body) {
|
|
|
106
106
|
|
|
107
107
|
async function gatherContext(octokit, repo, config, t) {
|
|
108
108
|
const mission = readOptionalFile(config.paths.mission.path);
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
109
|
+
// Read recent activity from agent-log files
|
|
110
|
+
let recentActivity = "";
|
|
111
|
+
let cumulativeTransformationCost = 0;
|
|
112
|
+
try {
|
|
113
|
+
const { readdirSync } = await import("fs");
|
|
114
|
+
const logFiles = readdirSync(".").filter(f => f.startsWith("agent-log-") && f.endsWith(".md")).sort();
|
|
115
|
+
const recent = logFiles.slice(-5);
|
|
116
|
+
recentActivity = recent.map(f => readOptionalFile(f)).join("\n---\n").split("\n").slice(-40).join("\n");
|
|
117
|
+
for (const f of logFiles) {
|
|
118
|
+
const content = readOptionalFile(f);
|
|
119
|
+
const costMatches = content.matchAll(/\*\*agentic-lib transformation cost:\*\* (\d+)/g);
|
|
120
|
+
cumulativeTransformationCost += [...costMatches].reduce((sum, m) => sum + parseInt(m[1], 10), 0);
|
|
121
|
+
}
|
|
122
|
+
} catch { /* no agent-log files yet */ }
|
|
115
123
|
|
|
116
124
|
// Check mission-complete signal
|
|
117
125
|
const missionComplete = existsSync("MISSION_COMPLETE.md");
|
|
@@ -474,6 +482,26 @@ function buildPrompt(ctx, agentInstructions, config) {
|
|
|
474
482
|
`### Recent Activity`,
|
|
475
483
|
ctx.recentActivity || "none",
|
|
476
484
|
"",
|
|
485
|
+
...(process.env.REVIEW_ADVICE ? [
|
|
486
|
+
"### Implementation Review",
|
|
487
|
+
`**Completeness:** ${process.env.REVIEW_ADVICE}`,
|
|
488
|
+
...((() => {
|
|
489
|
+
try {
|
|
490
|
+
const gaps = JSON.parse(process.env.REVIEW_GAPS || "[]");
|
|
491
|
+
if (gaps.length > 0) {
|
|
492
|
+
return [
|
|
493
|
+
"",
|
|
494
|
+
"**Gaps Found:**",
|
|
495
|
+
...gaps.map((g) => `- [${g.severity}] ${g.element}: ${g.description} (${g.gapType})`),
|
|
496
|
+
"",
|
|
497
|
+
"Consider creating issues with label 'implementation-gap' for critical gaps.",
|
|
498
|
+
];
|
|
499
|
+
}
|
|
500
|
+
} catch { /* ignore */ }
|
|
501
|
+
return [];
|
|
502
|
+
})()),
|
|
503
|
+
"",
|
|
504
|
+
] : []),
|
|
477
505
|
"## Your Task",
|
|
478
506
|
"Use list_issues, list_prs, read_file, and search_discussions to explore the repository state.",
|
|
479
507
|
"Then call report_supervisor_plan with your chosen actions and reasoning.",
|
|
@@ -29,7 +29,8 @@ runs:
|
|
|
29
29
|
# Unstage workflow files — GITHUB_TOKEN cannot push workflow changes
|
|
30
30
|
git reset HEAD -- '.github/workflows/' 2>/dev/null || true
|
|
31
31
|
# Unstage log/screenshot files — these live on the agentic-lib-logs branch
|
|
32
|
-
git reset HEAD -- 'intentïon.md' '
|
|
32
|
+
git reset HEAD -- 'intentïon.md' 'SCREENSHOT_INDEX.png' 2>/dev/null || true
|
|
33
|
+
git reset HEAD -- agent-log-*.md 2>/dev/null || true
|
|
33
34
|
if git diff --cached --quiet; then
|
|
34
35
|
echo "No changes to commit"
|
|
35
36
|
fi
|
package/src/copilot/config.js
CHANGED
|
@@ -261,7 +261,7 @@ export function loadConfig(configPath) {
|
|
|
261
261
|
transformationBudget: tuning.transformationBudget,
|
|
262
262
|
seeding: toml.seeding || {},
|
|
263
263
|
intentionBot: {
|
|
264
|
-
|
|
264
|
+
logPrefix: bot["log-prefix"] || "agent-log-",
|
|
265
265
|
logBranch: bot["log-branch"] || "agentic-lib-logs",
|
|
266
266
|
screenshotFile: bot["screenshot-file"] || "SCREENSHOT_INDEX.png",
|
|
267
267
|
},
|
package/src/copilot/guards.js
CHANGED
|
@@ -6,10 +6,9 @@
|
|
|
6
6
|
// (transform.js, fix-code.js, maintain-features.js, maintain-library.js)
|
|
7
7
|
// before Phase 4 convergence replaced them with unconditional runCopilotSession().
|
|
8
8
|
|
|
9
|
-
import { existsSync } from "fs";
|
|
9
|
+
import { existsSync, readdirSync, readFileSync } from "fs";
|
|
10
10
|
import { resolve } from "path";
|
|
11
11
|
import { execSync } from "child_process";
|
|
12
|
-
import { readCumulativeCost } from "./telemetry.js";
|
|
13
12
|
|
|
14
13
|
/**
|
|
15
14
|
* Task-to-guard mapping. Each task has an ordered list of guards.
|
|
@@ -71,9 +70,16 @@ export function checkGuards(taskName, config, workspacePath, { logger } = {}) {
|
|
|
71
70
|
case "budget-exhausted": {
|
|
72
71
|
const budget = config.transformationBudget || 0;
|
|
73
72
|
if (budget > 0) {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
73
|
+
// Read cumulative cost from agent-log files in workspace
|
|
74
|
+
let cumulativeCost = 0;
|
|
75
|
+
try {
|
|
76
|
+
const logFiles = readdirSync(wsPath).filter(f => f.startsWith("agent-log-") && f.endsWith(".md"));
|
|
77
|
+
for (const f of logFiles) {
|
|
78
|
+
const content = readFileSync(resolve(wsPath, f), "utf8");
|
|
79
|
+
const costMatches = content.matchAll(/\*\*agentic-lib transformation cost:\*\* (\d+)/g);
|
|
80
|
+
cumulativeCost += [...costMatches].reduce((sum, m) => sum + parseInt(m[1], 10), 0);
|
|
81
|
+
}
|
|
82
|
+
} catch { /* no agent-log files */ }
|
|
77
83
|
if (cumulativeCost >= budget) {
|
|
78
84
|
return { skip: true, reason: `Transformation budget exhausted (${cumulativeCost}/${budget})` };
|
|
79
85
|
}
|
package/src/copilot/telemetry.js
CHANGED
|
@@ -147,6 +147,35 @@ export function readCumulativeCost(intentionFilepath) {
|
|
|
147
147
|
return [...costMatches].reduce((sum, m) => sum + parseInt(m[1], 10), 0);
|
|
148
148
|
}
|
|
149
149
|
|
|
150
|
+
/**
|
|
151
|
+
* Gather and parse all agent-log-*.md files from a directory.
|
|
152
|
+
* Returns structured data from each log file for use in prompts and metrics.
|
|
153
|
+
*
|
|
154
|
+
* @param {string} logsDir - Directory containing agent-log-*.md files
|
|
155
|
+
* @returns {Array} Parsed log entries: { filename, task, outcome, advice, content }
|
|
156
|
+
*/
|
|
157
|
+
export function gatherAgentLogs(logsDir) {
|
|
158
|
+
if (!logsDir || !existsSync(logsDir)) return [];
|
|
159
|
+
try {
|
|
160
|
+
const files = readdirSync(logsDir)
|
|
161
|
+
.filter((f) => f.startsWith("agent-log-") && f.endsWith(".md"))
|
|
162
|
+
.sort();
|
|
163
|
+
return files.map((f) => {
|
|
164
|
+
const content = readFileSync(join(logsDir, f), "utf8");
|
|
165
|
+
const taskMatch = content.match(/\*\*Task:\*\* (.+)/);
|
|
166
|
+
const outcomeMatch = content.match(/\*\*Outcome:\*\* (.+)/);
|
|
167
|
+
const adviceMatch = content.match(/## Completeness Assessment\n([\s\S]*?)(?=\n##|\n---)/);
|
|
168
|
+
return {
|
|
169
|
+
filename: f,
|
|
170
|
+
task: taskMatch ? taskMatch[1].trim() : "unknown",
|
|
171
|
+
outcome: outcomeMatch ? outcomeMatch[1].trim() : "unknown",
|
|
172
|
+
advice: adviceMatch ? adviceMatch[1].trim() : "",
|
|
173
|
+
content,
|
|
174
|
+
};
|
|
175
|
+
});
|
|
176
|
+
} catch { return []; }
|
|
177
|
+
}
|
|
178
|
+
|
|
150
179
|
/**
|
|
151
180
|
* Build limits status array for activity logging.
|
|
152
181
|
*
|