@xn-intenton-z2a/agentic-lib 7.4.30 → 7.4.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,6 +40,10 @@ on:
40
40
  type: string
41
41
  required: false
42
42
  default: "false"
43
+ log-branch:
44
+ type: string
45
+ required: false
46
+ default: "agentic-lib-logs"
43
47
  ref:
44
48
  type: string
45
49
  required: false
@@ -809,6 +809,21 @@ jobs:
809
809
  instructions: ".github/agents/agent-implementation-review.md"
810
810
  model: ${{ needs.params.outputs.model }}
811
811
 
812
+ # Commit MISSION.md checkbox updates (from Fix #6) so downstream jobs see them
813
+ - name: Commit acceptance criteria updates
814
+ if: github.repository != 'xn-intenton-z2a/agentic-lib' && needs.params.outputs.dry-run != 'true'
815
+ run: |
816
+ if git diff --quiet MISSION.md 2>/dev/null; then
817
+ echo "No MISSION.md changes to commit"
818
+ else
819
+ git config user.name "github-actions[bot]"
820
+ git config user.email "github-actions[bot]@users.noreply.github.com"
821
+ git add MISSION.md
822
+ git commit -m "agentic-step: update acceptance criteria checkboxes [skip ci]"
823
+ git pull --rebase origin main 2>/dev/null || true
824
+ git push origin HEAD:main || echo "::warning::Could not push MISSION.md updates"
825
+ fi
826
+
812
827
  - name: Push log to log branch
813
828
  if: github.repository != 'xn-intenton-z2a/agentic-lib' && needs.params.outputs.dry-run != 'true'
814
829
  env:
@@ -1357,12 +1372,14 @@ jobs:
1357
1372
  # ─── Review: close resolved issues, enhance with criteria ──────────
1358
1373
  # W15: Skip review when there are no open issues to review
1359
1374
  review-features:
1360
- needs: [params, maintain, supervisor, telemetry]
1375
+ needs: [params, maintain, supervisor, telemetry, director]
1361
1376
  if: |
1362
1377
  !cancelled() &&
1363
1378
  (needs.params.outputs.mode == 'full' || needs.params.outputs.mode == 'review-only') &&
1364
1379
  needs.params.result == 'success' &&
1365
- needs.telemetry.outputs.open-issue-count != '0'
1380
+ needs.telemetry.outputs.open-issue-count != '0' &&
1381
+ needs.director.outputs.decision != 'mission-complete' &&
1382
+ needs.director.outputs.decision != 'mission-failed'
1366
1383
  runs-on: ubuntu-latest
1367
1384
  steps:
1368
1385
  - uses: actions/checkout@v6
@@ -1716,7 +1733,7 @@ jobs:
1716
1733
 
1717
1734
  # ─── Post-merge: stats, schedule, mission check ────────────────────
1718
1735
  post-merge:
1719
- needs: [params, maintain, dev, pr-cleanup, implementation-review]
1736
+ needs: [params, maintain, dev, pr-cleanup, implementation-review, director]
1720
1737
  if: ${{ !cancelled() && needs.params.result == 'success' }}
1721
1738
  runs-on: ubuntu-latest
1722
1739
  steps:
@@ -1739,10 +1756,22 @@ jobs:
1739
1756
  echo "- Website: [${SITE_URL}](${SITE_URL})" >> $GITHUB_STEP_SUMMARY
1740
1757
 
1741
1758
  # W14: Post-merge director check — re-evaluate mission status after dev/PR merges
1759
+ # Pull latest main to include dev merge (checkout was at maintain commit)
1760
+ - name: Pull latest main (include dev merge)
1761
+ if: |
1762
+ needs.params.outputs.mission-complete != 'true' &&
1763
+ needs.params.outputs.dry-run != 'true' &&
1764
+ needs.director.outputs.decision != 'mission-complete' &&
1765
+ needs.director.outputs.decision != 'mission-failed' &&
1766
+ github.repository != 'xn-intenton-z2a/agentic-lib'
1767
+ run: git pull origin main --ff-only 2>/dev/null || true
1768
+
1742
1769
  - name: Fetch log and screenshot from log branch (post-merge director)
1743
1770
  if: |
1744
1771
  needs.params.outputs.mission-complete != 'true' &&
1745
1772
  needs.params.outputs.dry-run != 'true' &&
1773
+ needs.director.outputs.decision != 'mission-complete' &&
1774
+ needs.director.outputs.decision != 'mission-failed' &&
1746
1775
  github.repository != 'xn-intenton-z2a/agentic-lib'
1747
1776
  env:
1748
1777
  LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
@@ -1756,6 +1785,8 @@ jobs:
1756
1785
  if: |
1757
1786
  needs.params.outputs.mission-complete != 'true' &&
1758
1787
  needs.params.outputs.dry-run != 'true' &&
1788
+ needs.director.outputs.decision != 'mission-complete' &&
1789
+ needs.director.outputs.decision != 'mission-failed' &&
1759
1790
  github.repository != 'xn-intenton-z2a/agentic-lib'
1760
1791
  with:
1761
1792
  node-version: "24"
@@ -1764,6 +1795,8 @@ jobs:
1764
1795
  if: |
1765
1796
  needs.params.outputs.mission-complete != 'true' &&
1766
1797
  needs.params.outputs.dry-run != 'true' &&
1798
+ needs.director.outputs.decision != 'mission-complete' &&
1799
+ needs.director.outputs.decision != 'mission-failed' &&
1767
1800
  hashFiles('scripts/self-init.sh') != '' && hashFiles('.github/agentic-lib/actions/agentic-step/package.json') == ''
1768
1801
  run: bash scripts/self-init.sh
1769
1802
 
@@ -1771,6 +1804,8 @@ jobs:
1771
1804
  if: |
1772
1805
  needs.params.outputs.mission-complete != 'true' &&
1773
1806
  needs.params.outputs.dry-run != 'true' &&
1807
+ needs.director.outputs.decision != 'mission-complete' &&
1808
+ needs.director.outputs.decision != 'mission-failed' &&
1774
1809
  github.repository != 'xn-intenton-z2a/agentic-lib'
1775
1810
  working-directory: .github/agentic-lib/actions/agentic-step
1776
1811
  run: |
@@ -1784,6 +1819,8 @@ jobs:
1784
1819
  if: |
1785
1820
  needs.params.outputs.mission-complete != 'true' &&
1786
1821
  needs.params.outputs.dry-run != 'true' &&
1822
+ needs.director.outputs.decision != 'mission-complete' &&
1823
+ needs.director.outputs.decision != 'mission-failed' &&
1787
1824
  github.repository != 'xn-intenton-z2a/agentic-lib'
1788
1825
  timeout-minutes: 10
1789
1826
  uses: ./.github/agentic-lib/actions/agentic-step
@@ -1802,6 +1839,8 @@ jobs:
1802
1839
  if: |
1803
1840
  needs.params.outputs.mission-complete != 'true' &&
1804
1841
  needs.params.outputs.dry-run != 'true' &&
1842
+ needs.director.outputs.decision != 'mission-complete' &&
1843
+ needs.director.outputs.decision != 'mission-failed' &&
1805
1844
  github.repository != 'xn-intenton-z2a/agentic-lib'
1806
1845
  env:
1807
1846
  LOG_BRANCH: ${{ needs.params.outputs.log-branch }}
@@ -1817,6 +1856,8 @@ jobs:
1817
1856
  uses: ./.github/workflows/agentic-lib-test.yml
1818
1857
  with:
1819
1858
  ref: ${{ needs.maintain.outputs.commit-sha || github.ref }}
1859
+ push-screenshot: "true"
1860
+ log-branch: ${{ needs.params.outputs.log-branch }}
1820
1861
  secrets: inherit
1821
1862
 
1822
1863
  # ─── Schedule change (if requested) ────────────────────────────────
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xn-intenton-z2a/agentic-lib",
3
- "version": "7.4.30",
3
+ "version": "7.4.32",
4
4
  "description": "Agentic-lib Agentic Coding Systems SDK powering automated GitHub workflows.",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -65,7 +65,7 @@ function detectDedicatedTests() {
65
65
  * Build the metric-based mission-complete advisory string.
66
66
  * This is the mechanical check — purely rule-based, no LLM.
67
67
  */
68
- function buildMetricAssessment(ctx, config) {
68
+ async function buildMetricAssessment(ctx, config) {
69
69
  const thresholds = config.missionCompleteThresholds || {};
70
70
  const minResolved = thresholds.minResolvedIssues ?? 3;
71
71
  const maxTodos = thresholds.maxSourceTodos ?? 0;
@@ -78,6 +78,12 @@ function buildMetricAssessment(ctx, config) {
78
78
  } catch { /* ignore parse errors */ }
79
79
  const criticalGaps = reviewGaps.filter((g) => g.severity === "critical");
80
80
 
81
+ // Acceptance criteria from MISSION.md checkboxes
82
+ const { countAcceptanceCriteria } = await import("../../../copilot/telemetry.js");
83
+ const missionPath = config.paths?.mission?.path || "MISSION.md";
84
+ const acceptance = countAcceptanceCriteria(missionPath);
85
+ const acceptanceMet = acceptance.total > 0 && acceptance.met > acceptance.total / 2;
86
+
81
87
  // C6: Removed "Dedicated tests" metric; using cumulative transforms instead
82
88
  const metrics = [
83
89
  { metric: "Open issues", value: ctx.issuesSummary.length, target: 0, met: ctx.issuesSummary.length === 0 },
@@ -87,6 +93,7 @@ function buildMetricAssessment(ctx, config) {
87
93
  { metric: "Cumulative transforms", value: ctx.cumulativeTransformationCost, target: 1, met: ctx.cumulativeTransformationCost >= 1 },
88
94
  { metric: "Budget", value: ctx.cumulativeTransformationCost, target: ctx.transformationBudget || "unlimited", met: !(ctx.transformationBudget > 0 && ctx.cumulativeTransformationCost >= ctx.transformationBudget) },
89
95
  { metric: "Implementation review", value: criticalGaps.length === 0 ? "No critical gaps" : `${criticalGaps.length} critical gap(s)`, target: "No critical gaps", met: criticalGaps.length === 0 },
96
+ { metric: "Acceptance criteria", value: acceptance.total > 0 ? `${acceptance.met}/${acceptance.total}` : "N/A", target: "> 50%", met: acceptanceMet },
90
97
  ];
91
98
 
92
99
  const allMet = metrics.every((m) => m.met);
@@ -158,6 +165,9 @@ function buildPrompt(ctx, agentInstructions, metricAssessment) {
158
165
  "Consider the implementation review findings — if critical gaps exist, do NOT declare mission-complete.",
159
166
  "Check the acceptance criteria in the Mission section above. If all criteria are clearly satisfied by the current source code and tests (verified via read_file), you SHOULD declare mission-complete even if not all mechanical metrics are MET.",
160
167
  "For simple missions (few functions, clear acceptance criteria), do not require elaborate test coverage or documentation beyond what the acceptance criteria specify.",
168
+ "",
169
+ "**Post-merge evaluation context:** This director runs AFTER a dev transformation has been merged. The source code, tests, README, and website you see are the result of that merge. The acceptance criteria checkboxes in MISSION.md reflect the implementation review's findings. If the metrics show all conditions MET and the acceptance criteria are > 50% checked, you should declare mission-complete unless you find a critical implementation gap via read_file. Do not defer to a future run — the pipeline has a structural 2-run minimum, and this is your chance to complete in 1 run.",
170
+ "",
161
171
  "Then call report_director_decision with your determination.",
162
172
  "",
163
173
  "**You MUST call report_director_decision exactly once.**",
@@ -485,7 +495,7 @@ export async function direct(context) {
485
495
  };
486
496
 
487
497
  // Build metric-based advisory
488
- const metricAssessment = buildMetricAssessment(ctx, config);
498
+ const metricAssessment = await buildMetricAssessment(ctx, config);
489
499
  core.info(`Metric assessment: ${metricAssessment.assessment}`);
490
500
 
491
501
  // --- LLM decision via hybrid session ---
@@ -48,7 +48,10 @@ function buildReviewPrompt(mission, config, agentInstructions, agentLogsSummary)
48
48
  " - Tests that don't assert anything meaningful (empty/trivial)",
49
49
  " - Features listed as done in docs but missing from code",
50
50
  " - PRs merged without test coverage for the claimed feature",
51
- "4. Call report_implementation_review with your findings.",
51
+ "4. Check the MISSION.md Acceptance Criteria checkboxes (`- [ ]`). For each criterion,",
52
+ " if you verified it is implemented AND unit-tested, include its exact text in the",
53
+ " `acceptanceCriteriaMet` array. Copy the criterion text exactly as it appears after `- [ ]`.",
54
+ "5. Call report_implementation_review with your findings.",
52
55
  "",
53
56
  "**You MUST call report_implementation_review exactly once.**",
54
57
  ].join("\n");
@@ -166,15 +169,47 @@ export async function implementationReview(context) {
166
169
  },
167
170
  description: "Metrics that may be misleading about actual progress",
168
171
  },
172
+ acceptanceCriteriaMet: {
173
+ type: "array",
174
+ items: { type: "string" },
175
+ description: "Exact text of each acceptance criterion from MISSION.md that is verified as implemented AND unit-tested. Copy the text after '- [ ]' exactly.",
176
+ },
169
177
  },
170
178
  required: ["elements", "gaps", "advice"],
171
179
  },
172
- handler: async ({ elements, gaps, advice, misleadingMetrics }) => {
180
+ handler: async ({ elements, gaps, advice, misleadingMetrics, acceptanceCriteriaMet }) => {
173
181
  reviewResult.elements = elements || [];
174
182
  reviewResult.gaps = gaps || [];
175
183
  reviewResult.advice = advice || "";
176
184
  reviewResult.misleadingMetrics = misleadingMetrics || [];
177
- return { textResultForLlm: `Review recorded: ${elements?.length || 0} elements traced, ${gaps?.length || 0} gaps found` };
185
+
186
+ // Update MISSION.md checkboxes based on verified acceptance criteria
187
+ const metCriteria = acceptanceCriteriaMet || [];
188
+ if (metCriteria.length > 0) {
189
+ try {
190
+ const missionPath = config.paths?.mission?.path || "MISSION.md";
191
+ const { readFileSync, writeFileSync } = await import("fs");
192
+ let missionContent = readFileSync(missionPath, "utf8");
193
+ let checkedCount = 0;
194
+ for (const criterionText of metCriteria) {
195
+ // Match the checkbox line containing this criterion text (fuzzy: trim whitespace)
196
+ const escaped = criterionText.replace(/[.*+?^${}()|[\]\\]/g, "\\$&").trim();
197
+ const re = new RegExp(`- \\[ \\] ${escaped}`);
198
+ if (re.test(missionContent)) {
199
+ missionContent = missionContent.replace(re, `- [x] ${criterionText.trim()}`);
200
+ checkedCount++;
201
+ }
202
+ }
203
+ if (checkedCount > 0) {
204
+ writeFileSync(missionPath, missionContent, "utf8");
205
+ core.info(`Updated ${checkedCount} acceptance criteria checkboxes in ${missionPath}`);
206
+ }
207
+ } catch (err) {
208
+ core.warning(`Could not update MISSION.md checkboxes: ${err.message}`);
209
+ }
210
+ }
211
+
212
+ return { textResultForLlm: `Review recorded: ${elements?.length || 0} elements traced, ${gaps?.length || 0} gaps found, ${metCriteria.length} criteria checked` };
178
213
  },
179
214
  });
180
215
 
@@ -79,11 +79,84 @@ else
79
79
 
80
80
  for attempt in $(seq 1 $MAX_RETRIES); do
81
81
  git push origin "${BRANCH}" && break
82
- echo "push-to-logs: push failed (attempt $attempt) — pulling and retrying"
82
+ echo "push-to-logs: push failed (attempt $attempt) — fetching latest and retrying"
83
+
84
+ # Fetch the latest remote state before rebasing
85
+ git fetch origin "${BRANCH}" 2>/dev/null || true
86
+
87
+ # Save our file contents before rebase (they may be lost on conflict)
88
+ for f in "${FILES[@]}"; do
89
+ cp "$f" "${TMPDIR}/ours-$(basename "$f")" 2>/dev/null || true
90
+ done
91
+
83
92
  git pull --rebase origin "${BRANCH}" || {
84
- echo "push-to-logs: rebase conflict — aborting and retrying"
85
- git rebase --abort 2>/dev/null || true
93
+ echo "push-to-logs: rebase conflict — resolving state file with merge strategy"
94
+
95
+ # For agentic-lib-state.toml conflicts, merge booleans (prefer true) and take max counters
96
+ STATE_FILE="agentic-lib-state.toml"
97
+ if git diff --name-only --diff-filter=U 2>/dev/null | grep -q "$STATE_FILE"; then
98
+ # Get the remote (theirs) version from the rebase base
99
+ git show "REBASE_HEAD:${STATE_FILE}" > "${TMPDIR}/ours-${STATE_FILE}" 2>/dev/null || true
100
+ git checkout --theirs "$STATE_FILE" 2>/dev/null || true
101
+
102
+ # Merge: for each boolean in our version that is true, set it true in theirs
103
+ if [ -f "${TMPDIR}/ours-${STATE_FILE}" ]; then
104
+ # Extract true booleans from our version and apply them
105
+ while IFS='=' read -r key val; do
106
+ key=$(echo "$key" | xargs)
107
+ val=$(echo "$val" | xargs)
108
+ if [ "$val" = "true" ]; then
109
+ # Set this key to true in the resolved file (theirs)
110
+ if grep -q "^${key} = " "$STATE_FILE" 2>/dev/null; then
111
+ sed -i "s/^${key} = .*/${key} = true/" "$STATE_FILE" 2>/dev/null || \
112
+ sed -i'' "s/^${key} = .*/${key} = true/" "$STATE_FILE" 2>/dev/null || true
113
+ fi
114
+ fi
115
+ done < "${TMPDIR}/ours-${STATE_FILE}"
116
+ fi
117
+
118
+ git add "$STATE_FILE"
119
+ fi
120
+
121
+ # Resolve any other conflicting files by taking ours (our log files are authoritative)
122
+ for f in "${FILES[@]}"; do
123
+ if git diff --name-only --diff-filter=U 2>/dev/null | grep -q "$(basename "$f")"; then
124
+ cp "${TMPDIR}/ours-$(basename "$f")" "$f" 2>/dev/null || true
125
+ git add "$f"
126
+ fi
127
+ done
128
+
129
+ # Continue the rebase if there are resolved conflicts
130
+ git rebase --continue 2>/dev/null || {
131
+ echo "push-to-logs: rebase continue failed — aborting"
132
+ git rebase --abort 2>/dev/null || true
133
+ }
86
134
  }
135
+
136
+ # W3b: After successful rebase (no conflict), re-apply our boolean true values.
137
+ # The rebase may have replayed our commit on top of a remote state with false values,
138
+ # causing our true values to be lost. Re-apply them from the saved copy.
139
+ STATE_FILE="agentic-lib-state.toml"
140
+ if [ -f "${TMPDIR}/ours-${STATE_FILE}" ] && [ -f "$STATE_FILE" ]; then
141
+ NEEDS_AMEND=false
142
+ while IFS='=' read -r key val; do
143
+ key=$(echo "$key" | xargs)
144
+ val=$(echo "$val" | xargs)
145
+ if [ "$val" = "true" ]; then
146
+ CURRENT=$(grep "^${key} = " "$STATE_FILE" 2>/dev/null | sed 's/.*= *//' | xargs || true)
147
+ if [ "$CURRENT" != "true" ]; then
148
+ sed -i "s/^${key} = .*/${key} = true/" "$STATE_FILE" 2>/dev/null || \
149
+ sed -i'' "s/^${key} = .*/${key} = true/" "$STATE_FILE" 2>/dev/null || true
150
+ NEEDS_AMEND=true
151
+ fi
152
+ fi
153
+ done < "${TMPDIR}/ours-${STATE_FILE}"
154
+ if [ "$NEEDS_AMEND" = "true" ]; then
155
+ echo "push-to-logs: re-applied boolean true values after rebase"
156
+ git add "$STATE_FILE"
157
+ git commit --amend --no-edit 2>/dev/null || true
158
+ fi
159
+ fi
87
160
  sleep $((attempt * 2))
88
161
  if [ "$attempt" -eq "$MAX_RETRIES" ]; then
89
162
  echo "::warning::push-to-logs: failed to push after $MAX_RETRIES attempts"
@@ -17,7 +17,7 @@
17
17
  "author": "",
18
18
  "license": "MIT",
19
19
  "dependencies": {
20
- "@xn-intenton-z2a/agentic-lib": "^7.4.30"
20
+ "@xn-intenton-z2a/agentic-lib": "^7.4.32"
21
21
  },
22
22
  "devDependencies": {
23
23
  "@playwright/test": "^1.58.0",