@xn-intenton-z2a/agentic-lib 7.2.5 → 7.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/.github/workflows/agentic-lib-init.yml +56 -0
  2. package/.github/workflows/agentic-lib-test.yml +7 -2
  3. package/.github/workflows/agentic-lib-workflow.yml +50 -3
  4. package/README.md +88 -17
  5. package/agentic-lib.toml +7 -0
  6. package/bin/agentic-lib.js +260 -496
  7. package/package.json +2 -1
  8. package/src/actions/agentic-step/config-loader.js +9 -0
  9. package/src/actions/agentic-step/index.js +104 -7
  10. package/src/actions/agentic-step/tasks/direct.js +435 -0
  11. package/src/actions/agentic-step/tasks/supervise.js +107 -180
  12. package/src/agents/agent-apply-fix.md +5 -2
  13. package/src/agents/agent-director.md +58 -0
  14. package/src/agents/agent-discovery.md +52 -0
  15. package/src/agents/agent-issue-resolution.md +18 -0
  16. package/src/agents/agent-iterate.md +45 -0
  17. package/src/agents/agent-supervisor.md +22 -50
  18. package/src/copilot/agents.js +39 -0
  19. package/src/copilot/config.js +308 -0
  20. package/src/copilot/context.js +318 -0
  21. package/src/copilot/hybrid-session.js +330 -0
  22. package/src/copilot/logger.js +43 -0
  23. package/src/copilot/sdk.js +36 -0
  24. package/src/copilot/session.js +372 -0
  25. package/src/copilot/tasks/fix-code.js +73 -0
  26. package/src/copilot/tasks/maintain-features.js +61 -0
  27. package/src/copilot/tasks/maintain-library.js +66 -0
  28. package/src/copilot/tasks/transform.js +120 -0
  29. package/src/copilot/tools.js +141 -0
  30. package/src/mcp/server.js +43 -25
  31. package/src/seeds/zero-README.md +31 -0
  32. package/src/seeds/zero-behaviour.test.js +12 -4
  33. package/src/seeds/zero-package.json +1 -1
  34. package/src/seeds/zero-playwright.config.js +1 -0
@@ -333,3 +333,59 @@ jobs:
333
333
  exit 1
334
334
  fi
335
335
  done
336
+
337
+ # W8: Create initial seed issues after purge so the pipeline has work to do
338
+ - name: Create initial seed issues
339
+ if: github.repository != 'xn-intenton-z2a/agentic-lib' && env.INIT_MODE == 'purge' && needs.params.outputs.dry-run != 'true'
340
+ uses: actions/github-script@v8
341
+ with:
342
+ script: |
343
+ const fs = require('fs');
344
+ const missionContent = fs.existsSync('MISSION.md')
345
+ ? fs.readFileSync('MISSION.md', 'utf8')
346
+ : '(no MISSION.md found)';
347
+
348
+ // Ensure labels exist
349
+ for (const label of ['automated', 'ready']) {
350
+ try {
351
+ await github.rest.issues.createLabel({
352
+ ...context.repo, name: label,
353
+ color: label === 'automated' ? '0e8a16' : '1d76db',
354
+ description: label === 'automated' ? 'Created by automation' : 'Ready for dev transform',
355
+ });
356
+ } catch (e) { /* label already exists */ }
357
+ }
358
+
359
+ // W8a: Initial unit tests issue
360
+ const unitTestBody = [
361
+ 'Create a unit test file for each of the major features in the mission ',
362
+ 'and put a TODO in a trivial empty passing test in each.',
363
+ '',
364
+ '## MISSION.md',
365
+ '',
366
+ missionContent,
367
+ ].join('\n');
368
+ const { data: issue1 } = await github.rest.issues.create({
369
+ ...context.repo,
370
+ title: 'Initial unit tests',
371
+ body: unitTestBody,
372
+ labels: ['automated', 'ready'],
373
+ });
374
+ core.info(`Created issue #${issue1.number}: Initial unit tests`);
375
+
376
+ // W8b: Initial web layout issue
377
+ const webLayoutBody = [
378
+ 'Create the home page layout to showcase each of the major features in the mission ',
379
+ 'and put a TODO in a trivial empty passing test in each.',
380
+ '',
381
+ '## MISSION.md',
382
+ '',
383
+ missionContent,
384
+ ].join('\n');
385
+ const { data: issue2 } = await github.rest.issues.create({
386
+ ...context.repo,
387
+ title: 'Initial web layout',
388
+ body: webLayoutBody,
389
+ labels: ['automated', 'ready'],
390
+ });
391
+ core.info(`Created issue #${issue2.number}: Initial web layout`);
@@ -90,8 +90,13 @@ jobs:
90
90
  - name: Install dependencies
91
91
  run: npm ci
92
92
 
93
- - name: Run behaviour tests
94
- run: npm run test:behaviour
93
+ - name: Run behaviour tests (with retry)
94
+ run: |
95
+ npm run test:behaviour || {
96
+ echo "::warning::Behaviour test attempt 1 failed — retrying"
97
+ sleep 2
98
+ npm run test:behaviour
99
+ }
95
100
  #env:
96
101
  # HOME: /root
97
102
 
@@ -621,14 +621,55 @@ jobs:
621
621
  commit-message: "agentic-step: maintain features and library"
622
622
  push-ref: ${{ github.ref_name }}
623
623
 
624
- # ─── Supervisor: LLM decides what to do (after maintain has features) ──
625
- supervisor:
626
- needs: [params, pr-cleanup, telemetry, maintain]
624
+ # ─── Director: LLM evaluates mission status (complete/failed/in-progress) ──
625
+ director:
626
+ needs: [params, telemetry, maintain]
627
627
  if: |
628
628
  !cancelled() &&
629
629
  (needs.params.outputs.mode == 'full' || needs.params.outputs.mode == 'dev-only') &&
630
630
  needs.params.result == 'success'
631
631
  runs-on: ubuntu-latest
632
+ outputs:
633
+ decision: ${{ steps.director.outputs.director-decision }}
634
+ analysis: ${{ steps.director.outputs.director-analysis }}
635
+ steps:
636
+ - uses: actions/checkout@v6
637
+
638
+ - uses: actions/setup-node@v6
639
+ with:
640
+ node-version: "24"
641
+
642
+ - name: Self-init (agentic-lib dev only)
643
+ if: hashFiles('scripts/self-init.sh') != '' && hashFiles('.github/agentic-lib/actions/agentic-step/package.json') == ''
644
+ run: bash scripts/self-init.sh
645
+
646
+ - name: Install agentic-step dependencies
647
+ working-directory: .github/agentic-lib/actions/agentic-step
648
+ run: npm ci
649
+
650
+ - name: Run director
651
+ id: director
652
+ if: github.repository != 'xn-intenton-z2a/agentic-lib'
653
+ uses: ./.github/agentic-lib/actions/agentic-step
654
+ env:
655
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
656
+ COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
657
+ with:
658
+ task: "direct"
659
+ config: ${{ needs.params.outputs.config-path }}
660
+ instructions: ".github/agentic-lib/agents/agent-director.md"
661
+ model: ${{ needs.params.outputs.model }}
662
+
663
+ # ─── Supervisor: LLM decides what to do (after director evaluates) ──
664
+ supervisor:
665
+ needs: [params, pr-cleanup, telemetry, maintain, director]
666
+ if: |
667
+ !cancelled() &&
668
+ (needs.params.outputs.mode == 'full' || needs.params.outputs.mode == 'dev-only') &&
669
+ needs.params.result == 'success' &&
670
+ needs.director.outputs.decision != 'mission-complete' &&
671
+ needs.director.outputs.decision != 'mission-failed'
672
+ runs-on: ubuntu-latest
632
673
  steps:
633
674
  - uses: actions/checkout@v6
634
675
 
@@ -1175,6 +1216,12 @@ jobs:
1175
1216
  set +e
1176
1217
  npm run --if-present test:behaviour 2>&1 | tail -30
1177
1218
  EXIT_CODE=$?
1219
+ if [ $EXIT_CODE -ne 0 ]; then
1220
+ echo "::warning::Behaviour test attempt 1 failed — retrying"
1221
+ sleep 2
1222
+ npm run --if-present test:behaviour 2>&1 | tail -30
1223
+ EXIT_CODE=$?
1224
+ fi
1178
1225
  set -e
1179
1226
  if [ $EXIT_CODE -ne 0 ]; then
1180
1227
  echo "tests-passed=false" >> $GITHUB_OUTPUT
package/README.md CHANGED
@@ -73,7 +73,7 @@ your-repo/
73
73
  │ │
74
74
  │ └── agentic-lib/ # [INIT] Internal infrastructure (always overwritten)
75
75
  │ ├── actions/
76
- │ │ ├── agentic-step/ # The Copilot SDK action (9 task handlers)
76
+ │ │ ├── agentic-step/ # The Copilot SDK action (10 task handlers)
77
77
  │ │ ├── commit-if-changed/ # Composite: conditional git commit
78
78
  │ │ └── setup-npmrc/ # Composite: npm registry auth
79
79
  │ ├── agents/ # 8 prompt files + config YAML
@@ -212,6 +212,7 @@ The core of the system is a single GitHub Action that handles all autonomous tas
212
212
  | Task | Purpose |
213
213
  |------|---------|
214
214
  | `supervise` | Gather repo context, choose and dispatch actions strategically |
215
+ | `direct` | Evaluate mission status: complete, failed, or gap analysis |
215
216
  | `transform` | Transform the codebase toward the mission |
216
217
  | `resolve-issue` | Read an issue and generate code to resolve it |
217
218
  | `fix-code` | Fix failing tests or lint errors |
@@ -242,9 +243,8 @@ All task commands accept these flags:
242
243
  | `--dry-run` | off | Show the prompt without calling the Copilot SDK |
243
244
  | `--target <path>` | current directory | Target repository to transform |
244
245
  | `--model <name>` | `claude-sonnet-4` | Copilot SDK model |
245
- | `--cycles <N>` | from budget | Max iteration cycles (iterate only) |
246
- | `--steps <list>` | all three | Comma-separated steps per cycle (iterate only) |
247
246
  | `--mission <name>` | hamming-distance | Init with --purge before iterating (iterate only) |
247
+ | `--timeout <ms>` | 600000 | Session timeout in milliseconds (iterate only) |
248
248
 
249
249
  ### Example: Full Walkthrough
250
250
 
@@ -299,25 +299,96 @@ npx @xn-intenton-z2a/agentic-lib transform --dry-run
299
299
 
300
300
  ### Iterator
301
301
 
302
- The `iterate` command runs multiple cycles of maintain transform fix with automatic stop conditions and budget tracking:
302
+ The `iterate` command runs a single persistent Copilot SDK session that autonomously implements your mission reading code, writing implementations and tests, running tests, and iterating until everything passes.
303
303
 
304
304
  ```bash
305
- # Init a mission and iterate with default budget
306
- npx @xn-intenton-z2a/agentic-lib iterate --mission fizz-buzz --model gpt-5-mini
305
+ # Init a mission and iterate
306
+ npx @xn-intenton-z2a/agentic-lib iterate --mission hamming-distance --model gpt-5-mini
307
307
 
308
- # Run 4 cycles on an existing workspace
309
- npx @xn-intenton-z2a/agentic-lib iterate --cycles 4
308
+ # Iterate on an existing workspace
309
+ npx @xn-intenton-z2a/agentic-lib iterate --target /path/to/workspace
310
310
 
311
- # Transform-only cycles (skip maintain)
312
- npx @xn-intenton-z2a/agentic-lib iterate --steps transform,fix-code --cycles 3
311
+ # With a longer timeout (10 minutes)
312
+ npx @xn-intenton-z2a/agentic-lib iterate --mission fizz-buzz --timeout 600000
313
313
  ```
314
314
 
315
- **Stop conditions:**
316
- - Tests pass for 2 consecutive cycles
317
- - No files change for 2 consecutive cycles
318
- - Transformation budget exhausted (configurable via `transformation-budget` in `agentic-lib.toml`)
315
+ The session uses SDK hooks for observability (tool call tracking, error recovery) and infinite sessions for context management. The agent drives its own read-write-test loop until the mission is complete or the timeout is reached.
319
316
 
320
- Each cycle logs `**agentic-lib transformation cost:** 1` to `intentïon.md` when source files change. The iterator reads these to track cumulative cost against the budget.
317
+ **Available missions:** hamming-distance, fizz-buzz, roman-numerals, string-utils, cron-engine, dense-encoding, markdown-compiler, and more (see `src/seeds/missions/`).
318
+
319
+ ### Running Local Benchmarks
320
+
321
+ You can benchmark mission completion locally without GitHub Actions. This is useful for comparing models, tuning profiles, and measuring iteration speed.
322
+
323
+ **Prerequisites:**
324
+
325
+ 1. A `COPILOT_GITHUB_TOKEN` (fine-grained PAT with Copilot read permission)
326
+ 2. Node.js 24+
327
+
328
+ **Setup:**
329
+
330
+ ```bash
331
+ # Set your token
332
+ export COPILOT_GITHUB_TOKEN=github_pat_...
333
+
334
+ # Or source from .env
335
+ source .env
336
+ ```
337
+
338
+ **Run a benchmark:**
339
+
340
+ ```bash
341
+ # Quick: hamming-distance with gpt-5-mini (simplest mission, ~1-2 min)
342
+ npx @xn-intenton-z2a/agentic-lib iterate \
343
+ --mission hamming-distance --model gpt-5-mini --timeout 300000
344
+
345
+ # Medium: roman-numerals with claude-sonnet-4
346
+ npx @xn-intenton-z2a/agentic-lib iterate \
347
+ --mission roman-numerals --model claude-sonnet-4
348
+
349
+ # Complex: string-utils with gpt-4.1 (10 functions, longer timeout)
350
+ npx @xn-intenton-z2a/agentic-lib iterate \
351
+ --mission string-utils --model gpt-4.1 --timeout 600000
352
+ ```
353
+
354
+ **From a local clone** (development):
355
+
356
+ ```bash
357
+ # From the agentic-lib directory
358
+ npx . iterate --mission hamming-distance --model gpt-5-mini --target /tmp/bench
359
+
360
+ # Or link globally
361
+ npm link
362
+ agentic-lib iterate --mission hamming-distance --model gpt-5-mini --target /tmp/bench
363
+ ```
364
+
365
+ **Output:**
366
+
367
+ ```
368
+ === agentic-lib iterate ===
369
+ Target: /tmp/bench
370
+ Model: gpt-5-mini
371
+
372
+ [hybrid] Creating session (model=gpt-5-mini, workspace=/tmp/bench)
373
+ [hybrid] Session: sess_abc123
374
+ [tool] read_file
375
+ [tool] read_file
376
+ [tool] write_file
377
+ [tool] run_tests
378
+ [tool] write_file
379
+ [tool] run_tests
380
+
381
+ === Results ===
382
+ Success: true
383
+ Tests passed: true
384
+ Session time: 47s
385
+ Total time: 52s
386
+ Tool calls: 6
387
+ Test runs: 2
388
+ Files written: 2
389
+ Tokens: 12400 (in=9200 out=3200)
390
+ End reason: complete
391
+ ```
321
392
 
322
393
  ### Environment
323
394
 
@@ -367,14 +438,14 @@ This repository is the source for the `@xn-intenton-z2a/agentic-lib` npm package
367
438
  src/
368
439
  ├── workflows/ 8 GitHub Actions workflow templates
369
440
  ├── actions/ 3 composite/SDK actions (agentic-step, commit-if-changed, setup-npmrc)
370
- ├── agents/ 8 agent prompt files + 1 config
441
+ ├── agents/ 9 agent prompt files + 1 config
371
442
  ├── seeds/ 7 seed files (test.yml + 6 project seed files for --purge reset)
372
443
  └── scripts/ 7 utility scripts distributed to consumers
373
444
  ```
374
445
 
375
446
  ### Testing
376
447
 
377
- 393 unit tests across 26 test files, plus system tests:
448
+ 431 unit tests across 27 test files, plus system tests:
378
449
 
379
450
  ```bash
380
451
  npm test # Run all tests (vitest)
package/agentic-lib.toml CHANGED
@@ -130,5 +130,12 @@ max-attempts-per-issue = 4 # max transform attempts before aband
130
130
  features-limit = 8 # max feature files in features/ directory
131
131
  library-limit = 64 # max library entries in library/ directory
132
132
 
133
+ [mission-complete]
134
+ # Thresholds for deterministic mission-complete declaration.
135
+ # All conditions must be met simultaneously.
136
+ min-resolved-issues = 3 # minimum closed-as-RESOLVED issues since init
137
+ require-dedicated-tests = true # require test files that import from src/lib/
138
+ max-source-todos = 0 # max TODO comments allowed in ./src (0 = none)
139
+
133
140
  [bot]
134
141
  log-file = "test/intentïon.md" #@dist "intentïon.md"