jfl 0.9.9 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/commands/init-from-service.d.ts.map +1 -1
  2. package/dist/commands/init-from-service.js +2 -2
  3. package/dist/commands/init-from-service.js.map +1 -1
  4. package/dist/commands/init.d.ts.map +1 -1
  5. package/dist/commands/init.js +88 -23
  6. package/dist/commands/init.js.map +1 -1
  7. package/dist/commands/peter.d.ts.map +1 -1
  8. package/dist/commands/peter.js +112 -35
  9. package/dist/commands/peter.js.map +1 -1
  10. package/dist/commands/repair.d.ts.map +1 -1
  11. package/dist/commands/repair.js +13 -11
  12. package/dist/commands/repair.js.map +1 -1
  13. package/dist/commands/session.d.ts.map +1 -1
  14. package/dist/commands/session.js +7 -40
  15. package/dist/commands/session.js.map +1 -1
  16. package/dist/commands/start.js +3 -3
  17. package/dist/commands/start.js.map +1 -1
  18. package/dist/lib/agent-config.d.ts +1 -0
  19. package/dist/lib/agent-config.d.ts.map +1 -1
  20. package/dist/lib/agent-config.js.map +1 -1
  21. package/dist/lib/agent-guards.d.ts +67 -0
  22. package/dist/lib/agent-guards.d.ts.map +1 -0
  23. package/dist/lib/agent-guards.js +229 -0
  24. package/dist/lib/agent-guards.js.map +1 -0
  25. package/dist/lib/agent-runtime-api.d.ts +32 -0
  26. package/dist/lib/agent-runtime-api.d.ts.map +1 -0
  27. package/dist/lib/agent-runtime-api.js +270 -0
  28. package/dist/lib/agent-runtime-api.js.map +1 -0
  29. package/dist/lib/agent-session.d.ts.map +1 -1
  30. package/dist/lib/agent-session.js +255 -25
  31. package/dist/lib/agent-session.js.map +1 -1
  32. package/dist/lib/gtm-generator.js +3 -1
  33. package/dist/lib/gtm-generator.js.map +1 -1
  34. package/dist/lib/memory-search.d.ts.map +1 -1
  35. package/dist/lib/memory-search.js +0 -8
  36. package/dist/lib/memory-search.js.map +1 -1
  37. package/dist/utils/jfl-paths.d.ts +9 -0
  38. package/dist/utils/jfl-paths.d.ts.map +1 -1
  39. package/dist/utils/jfl-paths.js +13 -0
  40. package/dist/utils/jfl-paths.js.map +1 -1
  41. package/package.json +1 -1
  42. package/packages/pi/dist/index.d.ts.map +1 -1
  43. package/packages/pi/dist/index.js +19 -1
  44. package/packages/pi/dist/index.js.map +1 -1
  45. package/packages/pi/dist/session.d.ts +5 -1
  46. package/packages/pi/dist/session.d.ts.map +1 -1
  47. package/packages/pi/dist/session.js +247 -116
  48. package/packages/pi/dist/session.js.map +1 -1
  49. package/packages/pi/extensions/index.ts +24 -1
  50. package/packages/pi/extensions/session.ts +256 -96
  51. package/packages/pi/skills/end/SKILL.md +8 -0
  52. package/scripts/session/session-cleanup.sh +19 -6
  53. package/template/.github/workflows/jfl-eval.yml +8 -1
  54. package/template/scripts/session/session-cleanup.sh +23 -8
@@ -324,22 +324,14 @@ async function shutdownPiRuntime(state) {
324
324
  }
325
325
  }
326
326
  /**
327
- * Run an agent task using pi in non-interactive mode.
328
- *
329
- * Pi automatically picks up project context:
330
- * - AGENTS.md / CLAUDE.md — project instructions, skills, commands
331
- * - JFL custom tools — jfl_context, jfl_memory_search, jfl_eval_status, etc.
332
- * - Skills — eval, context, react-best-practices, etc.
333
- *
334
- * No lock files, no session management — just runs the task and exits.
327
+ * Run an agent task using claude CLI directly (~50MB vs ~500MB for Pi).
328
+ * Karpathy pattern: all context in the prompt, agent reads/edits/exits.
329
+ * Use JFL_AGENT_USE_PI=1 to force Pi runtime for debugging.
335
330
  */
336
331
  async function runAgentWithPi(projectRoot, task) {
337
- // Minimal append — pi already loads AGENTS.md and project context.
338
- // Just add PP-specific guardrails.
339
- const appendPrompt = [
340
- "You are Peter Parker (PP), an autonomous agent in the JFL self-driving loop.",
341
- "You are fixing an issue that was auto-picked from the backlog.",
342
- "Rules:",
332
+ const ppGuardrails = [
333
+ "",
334
+ "RULES:",
343
335
  "- Make minimal, focused changes. Don't refactor unrelated code.",
344
336
  "- Run the build (npm run build) and check for type errors before finishing.",
345
337
  "- If tests exist for the area you're changing, run them.",
@@ -347,29 +339,76 @@ async function runAgentWithPi(projectRoot, task) {
347
339
  "- Do NOT modify files outside the scope of the task.",
348
340
  "- When done, stop immediately. Do not ask for confirmation.",
349
341
  ].join("\n");
350
- console.log(chalk.cyan(" PP: Using pi agent (headless, with full JFL context)"));
351
- await new Promise((resolve) => {
352
- const env = { ...process.env };
353
- // Prevent pi from detecting it's inside another agent
354
- delete env.CLAUDECODE;
355
- delete env.CLAUDE_CODE;
356
- const child = spawn("pi", [
357
- "--print", task,
358
- "--append-system-prompt", appendPrompt,
359
- "--no-session", // Ephemeral — no lock files, no session state
360
- ], {
361
- cwd: projectRoot,
362
- stdio: "inherit",
363
- env,
364
- });
365
- child.on("error", (error) => {
366
- console.error(chalk.red(` PP: Failed to start pi: ${error.message}`));
367
- resolve();
342
+ const fullTask = task + ppGuardrails;
343
+ // API-based runtime: zero memory churn. No process spawning.
344
+ // Falls back to claude CLI spawn only if ANTHROPIC_API_KEY is missing.
345
+ if ((process.env.ANTHROPIC_API_KEY || process.env.OPENROUTER_API_KEY) && !process.env.JFL_AGENT_USE_SPAWN) {
346
+ console.log(chalk.cyan(" PP: Using API runtime (zero memory churn)"));
347
+ try {
348
+ const { runAgentViaAPI } = await import("../lib/agent-runtime-api.js");
349
+ await runAgentViaAPI({ task: fullTask, cwd: projectRoot, timeoutMs: 300_000 });
350
+ return;
351
+ }
352
+ catch (err) {
353
+ console.error(chalk.yellow(` PP: API runtime failed: ${err.message} — falling back to spawn`));
354
+ }
355
+ }
356
+ // Legacy spawn fallback — each call creates a ~300MB process.
357
+ // macOS compresses dead pages, unreclaimable without reboot.
358
+ // Set ANTHROPIC_API_KEY to use API runtime instead.
359
+ const forceSpawnPi = !!process.env.JFL_AGENT_USE_PI;
360
+ if (forceSpawnPi) {
361
+ console.log(chalk.cyan(" PP: Using pi agent (heavy — JFL_AGENT_USE_PI=1)"));
362
+ await new Promise((resolve) => {
363
+ const env = { ...process.env };
364
+ delete env.CLAUDECODE;
365
+ delete env.CLAUDE_CODE;
366
+ env.JFL_AGENT_MODE = "1";
367
+ env.JFL_PP_SPAWNED = "1";
368
+ const child = spawn("pi", [
369
+ "--print", fullTask,
370
+ "--no-session",
371
+ "--no-skills",
372
+ "--no-prompt-templates",
373
+ "--no-themes",
374
+ ], {
375
+ cwd: projectRoot,
376
+ stdio: "inherit",
377
+ env,
378
+ });
379
+ child.on("error", (error) => {
380
+ console.error(chalk.red(` PP: Failed to start pi: ${error.message}`));
381
+ resolve();
382
+ });
383
+ child.on("exit", () => {
384
+ resolve();
385
+ });
368
386
  });
369
- child.on("exit", () => {
370
- resolve();
387
+ }
388
+ else {
389
+ console.log(chalk.yellow(" PP: Using claude CLI spawn (⚠ memory churn — set ANTHROPIC_API_KEY for API runtime)"));
390
+ await new Promise((resolve) => {
391
+ const env = { ...process.env };
392
+ delete env.CLAUDECODE;
393
+ delete env.CLAUDE_CODE;
394
+ const child = spawn("claude", [
395
+ "--dangerously-skip-permissions",
396
+ "-p", fullTask,
397
+ "--output-format", "text",
398
+ ], {
399
+ cwd: projectRoot,
400
+ stdio: "inherit",
401
+ env,
402
+ });
403
+ child.on("error", (error) => {
404
+ console.error(chalk.red(` PP: Failed to start claude: ${error.message}`));
405
+ resolve();
406
+ });
407
+ child.on("exit", () => {
408
+ resolve();
409
+ });
371
410
  });
372
- });
411
+ }
373
412
  }
374
413
  /**
375
414
  * Run an agent task using ralph-tui (legacy fallback).
@@ -1729,6 +1768,31 @@ async function agentRun(projectRoot, agentName, roundsOverride) {
1729
1768
  console.log();
1730
1769
  return;
1731
1770
  }
1771
+ // Pre-flight guards — verify environment before starting session
1772
+ const { runGuards } = await import("../lib/agent-guards.js");
1773
+ const guardSummary = await runGuards(projectRoot, agentName, config);
1774
+ // Log guard results
1775
+ for (const result of guardSummary.results) {
1776
+ if (result.passed) {
1777
+ console.log(chalk.green(` ✓ Guard: ${result.name}`));
1778
+ }
1779
+ else if (result.critical) {
1780
+ console.log(chalk.red(` ✗ Guard: ${result.name} — ${result.reason}`));
1781
+ }
1782
+ else {
1783
+ console.log(chalk.yellow(` ⚠ Guard: ${result.name} — ${result.reason}`));
1784
+ }
1785
+ }
1786
+ if (!guardSummary.proceed) {
1787
+ console.log(chalk.red(`\n Blocked by ${guardSummary.blockers.length} critical guard(s). Fix the issues above and retry.\n`));
1788
+ return;
1789
+ }
1790
+ if (guardSummary.warnings.length > 0) {
1791
+ console.log(chalk.yellow(`\n ${guardSummary.warnings.length} warning(s) — proceeding anyway.\n`));
1792
+ }
1793
+ else {
1794
+ console.log();
1795
+ }
1732
1796
  // Use config.rounds as default (Karpathy: ~50 experiments per session)
1733
1797
  // Allow override via CLI for debugging/testing
1734
1798
  const rounds = roundsOverride ?? config.rounds ?? 50;
@@ -1905,6 +1969,19 @@ Based on the failing queries/tests and the actual code, suggest ONE concrete cha
1905
1969
  }
1906
1970
  // End session
1907
1971
  const summary = await endSession(session, transitions);
1972
+ // Post eval:scored to hub event bus — triggers auto-merge, flag-regression, training flows
1973
+ await postHubEvent(projectRoot, "eval:scored", {
1974
+ agent: agentName,
1975
+ metric: config.metric,
1976
+ baseline: (summary.baseline).toFixed(4),
1977
+ composite: (summary.finalMetric).toFixed(4),
1978
+ delta: summary.totalDelta.toFixed(4),
1979
+ improved: String(summary.improvedRounds > 0),
1980
+ rounds: summary.rounds,
1981
+ kept: summary.improvedRounds,
1982
+ branch: session.branch,
1983
+ pr_number: summary.prUrl ? summary.prUrl.split("/").pop() : "",
1984
+ });
1908
1985
  console.log(chalk.bold(`\n ── Session Complete ${"─".repeat(35)}\n`));
1909
1986
  console.log(chalk.gray(` Rounds: ${summary.rounds}`));
1910
1987
  console.log(chalk.gray(` Improved: ${summary.improvedRounds}`));