jfl 0.8.1 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -4
- package/dist/commands/digest.d.ts +6 -0
- package/dist/commands/digest.d.ts.map +1 -1
- package/dist/commands/digest.js +70 -69
- package/dist/commands/digest.js.map +1 -1
- package/dist/commands/doctor.d.ts +1 -0
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +30 -1
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/eval.d.ts +40 -0
- package/dist/commands/eval.d.ts.map +1 -1
- package/dist/commands/eval.js +8 -8
- package/dist/commands/eval.js.map +1 -1
- package/dist/commands/findings.d.ts +7 -0
- package/dist/commands/findings.d.ts.map +1 -1
- package/dist/commands/findings.js +4 -4
- package/dist/commands/findings.js.map +1 -1
- package/dist/commands/ide.d.ts +2 -1
- package/dist/commands/ide.d.ts.map +1 -1
- package/dist/commands/ide.js +61 -1
- package/dist/commands/ide.js.map +1 -1
- package/dist/commands/init-from-service.d.ts +15 -0
- package/dist/commands/init-from-service.d.ts.map +1 -0
- package/dist/commands/init-from-service.js +541 -0
- package/dist/commands/init-from-service.js.map +1 -0
- package/dist/commands/init.d.ts +1 -0
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +32 -1
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/kanban.d.ts.map +1 -1
- package/dist/commands/kanban.js +13 -4
- package/dist/commands/kanban.js.map +1 -1
- package/dist/commands/linear.d.ts +41 -0
- package/dist/commands/linear.d.ts.map +1 -0
- package/dist/commands/linear.js +715 -0
- package/dist/commands/linear.js.map +1 -0
- package/dist/commands/peter.d.ts.map +1 -1
- package/dist/commands/peter.js +232 -25
- package/dist/commands/peter.js.map +1 -1
- package/dist/commands/portfolio.d.ts +5 -0
- package/dist/commands/portfolio.d.ts.map +1 -1
- package/dist/commands/portfolio.js +193 -203
- package/dist/commands/portfolio.js.map +1 -1
- package/dist/commands/predict.d.ts +19 -0
- package/dist/commands/predict.d.ts.map +1 -1
- package/dist/commands/predict.js +4 -4
- package/dist/commands/predict.js.map +1 -1
- package/dist/commands/services.d.ts.map +1 -1
- package/dist/commands/services.js +146 -0
- package/dist/commands/services.js.map +1 -1
- package/dist/commands/setup.d.ts.map +1 -1
- package/dist/commands/setup.js +279 -20
- package/dist/commands/setup.js.map +1 -1
- package/dist/commands/start.d.ts +25 -0
- package/dist/commands/start.d.ts.map +1 -0
- package/dist/commands/start.js +191 -0
- package/dist/commands/start.js.map +1 -0
- package/dist/commands/telemetry-monitor.d.ts +11 -0
- package/dist/commands/telemetry-monitor.d.ts.map +1 -0
- package/dist/commands/telemetry-monitor.js +224 -0
- package/dist/commands/telemetry-monitor.js.map +1 -0
- package/dist/commands/telemetry-test.d.ts +11 -0
- package/dist/commands/telemetry-test.d.ts.map +1 -0
- package/dist/commands/telemetry-test.js +67 -0
- package/dist/commands/telemetry-test.js.map +1 -0
- package/dist/commands/tenet-agents.d.ts +13 -0
- package/dist/commands/tenet-agents.d.ts.map +1 -0
- package/dist/commands/tenet-agents.js +191 -0
- package/dist/commands/tenet-agents.js.map +1 -0
- package/dist/commands/tenet-setup.d.ts +20 -0
- package/dist/commands/tenet-setup.d.ts.map +1 -0
- package/dist/commands/tenet-setup.js +135 -0
- package/dist/commands/tenet-setup.js.map +1 -0
- package/dist/commands/train.d.ts +18 -0
- package/dist/commands/train.d.ts.map +1 -1
- package/dist/commands/train.js +182 -0
- package/dist/commands/train.js.map +1 -1
- package/dist/commands/viz.d.ts +33 -0
- package/dist/commands/viz.d.ts.map +1 -1
- package/dist/commands/viz.js +9 -9
- package/dist/commands/viz.js.map +1 -1
- package/dist/commands/whoami.d.ts +2 -0
- package/dist/commands/whoami.d.ts.map +1 -0
- package/dist/commands/whoami.js +24 -0
- package/dist/commands/whoami.js.map +1 -0
- package/dist/index.js +230 -30
- package/dist/index.js.map +1 -1
- package/dist/lib/advanced-setup.d.ts +78 -0
- package/dist/lib/advanced-setup.d.ts.map +1 -0
- package/dist/lib/advanced-setup.js +433 -0
- package/dist/lib/advanced-setup.js.map +1 -0
- package/dist/lib/agent-config.d.ts +33 -0
- package/dist/lib/agent-config.d.ts.map +1 -1
- package/dist/lib/agent-config.js +26 -0
- package/dist/lib/agent-config.js.map +1 -1
- package/dist/lib/counterfactual-training-bridge.d.ts +114 -0
- package/dist/lib/counterfactual-training-bridge.d.ts.map +1 -0
- package/dist/lib/counterfactual-training-bridge.js +322 -0
- package/dist/lib/counterfactual-training-bridge.js.map +1 -0
- package/dist/lib/discovery-agent.d.ts +48 -0
- package/dist/lib/discovery-agent.d.ts.map +1 -0
- package/dist/lib/discovery-agent.js +111 -0
- package/dist/lib/discovery-agent.js.map +1 -0
- package/dist/lib/flow-engine.d.ts.map +1 -1
- package/dist/lib/flow-engine.js +46 -8
- package/dist/lib/flow-engine.js.map +1 -1
- package/dist/lib/gtm-generator.d.ts +29 -0
- package/dist/lib/gtm-generator.d.ts.map +1 -0
- package/dist/lib/gtm-generator.js +252 -0
- package/dist/lib/gtm-generator.js.map +1 -0
- package/dist/lib/hub-health.d.ts +40 -0
- package/dist/lib/hub-health.d.ts.map +1 -0
- package/dist/lib/hub-health.js +89 -0
- package/dist/lib/hub-health.js.map +1 -0
- package/dist/lib/invariant-monitor.d.ts +6 -2
- package/dist/lib/invariant-monitor.d.ts.map +1 -1
- package/dist/lib/invariant-monitor.js +89 -2
- package/dist/lib/invariant-monitor.js.map +1 -1
- package/dist/lib/journal-analyzer.d.ts +71 -0
- package/dist/lib/journal-analyzer.d.ts.map +1 -0
- package/dist/lib/journal-analyzer.js +306 -0
- package/dist/lib/journal-analyzer.js.map +1 -0
- package/dist/lib/linear-client.d.ts +73 -0
- package/dist/lib/linear-client.d.ts.map +1 -0
- package/dist/lib/linear-client.js +112 -0
- package/dist/lib/linear-client.js.map +1 -0
- package/dist/lib/linear-id-map.d.ts +20 -0
- package/dist/lib/linear-id-map.d.ts.map +1 -0
- package/dist/lib/linear-id-map.js +59 -0
- package/dist/lib/linear-id-map.js.map +1 -0
- package/dist/lib/linear-kanban.d.ts +66 -0
- package/dist/lib/linear-kanban.d.ts.map +1 -0
- package/dist/lib/linear-kanban.js +175 -0
- package/dist/lib/linear-kanban.js.map +1 -0
- package/dist/lib/onboarding.d.ts +40 -0
- package/dist/lib/onboarding.d.ts.map +1 -0
- package/dist/lib/onboarding.js +213 -0
- package/dist/lib/onboarding.js.map +1 -0
- package/dist/lib/physical-world-model.d.ts +50 -0
- package/dist/lib/physical-world-model.d.ts.map +1 -0
- package/dist/lib/physical-world-model.js +251 -0
- package/dist/lib/physical-world-model.js.map +1 -0
- package/dist/lib/planning-loop.d.ts +157 -0
- package/dist/lib/planning-loop.d.ts.map +1 -0
- package/dist/lib/planning-loop.js +537 -0
- package/dist/lib/planning-loop.js.map +1 -0
- package/dist/lib/policy-head.d.ts +13 -0
- package/dist/lib/policy-head.d.ts.map +1 -1
- package/dist/lib/policy-head.js +168 -2
- package/dist/lib/policy-head.js.map +1 -1
- package/dist/lib/resource-optimizer-middleware.d.ts +39 -0
- package/dist/lib/resource-optimizer-middleware.d.ts.map +1 -0
- package/dist/lib/resource-optimizer-middleware.js +222 -0
- package/dist/lib/resource-optimizer-middleware.js.map +1 -0
- package/dist/lib/resource-optimizer.d.ts +71 -0
- package/dist/lib/resource-optimizer.d.ts.map +1 -0
- package/dist/lib/resource-optimizer.js +228 -0
- package/dist/lib/resource-optimizer.js.map +1 -0
- package/dist/lib/rl-manager.d.ts +74 -0
- package/dist/lib/rl-manager.d.ts.map +1 -0
- package/dist/lib/rl-manager.js +245 -0
- package/dist/lib/rl-manager.js.map +1 -0
- package/dist/lib/service-analyzer.d.ts +76 -0
- package/dist/lib/service-analyzer.d.ts.map +1 -0
- package/dist/lib/service-analyzer.js +704 -0
- package/dist/lib/service-analyzer.js.map +1 -0
- package/dist/lib/service-gtm.js +2 -2
- package/dist/lib/service-gtm.js.map +1 -1
- package/dist/lib/service-questionnaire.d.ts +11 -0
- package/dist/lib/service-questionnaire.d.ts.map +1 -0
- package/dist/lib/service-questionnaire.js +89 -0
- package/dist/lib/service-questionnaire.js.map +1 -0
- package/dist/lib/setup/agent-generator.d.ts +2 -0
- package/dist/lib/setup/agent-generator.d.ts.map +1 -1
- package/dist/lib/setup/agent-generator.js +128 -4
- package/dist/lib/setup/agent-generator.js.map +1 -1
- package/dist/lib/setup/flow-generator.d.ts +10 -0
- package/dist/lib/setup/flow-generator.d.ts.map +1 -0
- package/dist/lib/setup/flow-generator.js +113 -0
- package/dist/lib/setup/flow-generator.js.map +1 -0
- package/dist/lib/setup/invariant-bridge.d.ts +91 -0
- package/dist/lib/setup/invariant-bridge.d.ts.map +1 -0
- package/dist/lib/setup/invariant-bridge.js +384 -0
- package/dist/lib/setup/invariant-bridge.js.map +1 -0
- package/dist/lib/setup/spec-generator.d.ts +41 -5
- package/dist/lib/setup/spec-generator.d.ts.map +1 -1
- package/dist/lib/setup/spec-generator.js +503 -29
- package/dist/lib/setup/spec-generator.js.map +1 -1
- package/dist/lib/setup/starter-intelligence.d.ts +25 -0
- package/dist/lib/setup/starter-intelligence.d.ts.map +1 -0
- package/dist/lib/setup/starter-intelligence.js +309 -0
- package/dist/lib/setup/starter-intelligence.js.map +1 -0
- package/dist/lib/stratus-client.js +1 -1
- package/dist/lib/stratus-client.js.map +1 -1
- package/dist/lib/surface-agent.d.ts +78 -0
- package/dist/lib/surface-agent.d.ts.map +1 -0
- package/dist/lib/surface-agent.js +105 -0
- package/dist/lib/surface-agent.js.map +1 -0
- package/dist/lib/surface-coordination-example.d.ts +30 -0
- package/dist/lib/surface-coordination-example.d.ts.map +1 -0
- package/dist/lib/surface-coordination-example.js +164 -0
- package/dist/lib/surface-coordination-example.js.map +1 -0
- package/dist/lib/telemetry/physical-world-collector.d.ts +15 -0
- package/dist/lib/telemetry/physical-world-collector.d.ts.map +1 -0
- package/dist/lib/telemetry/physical-world-collector.js +177 -0
- package/dist/lib/telemetry/physical-world-collector.js.map +1 -0
- package/dist/lib/telemetry/training-bridge.d.ts +51 -0
- package/dist/lib/telemetry/training-bridge.d.ts.map +1 -0
- package/dist/lib/telemetry/training-bridge.js +185 -0
- package/dist/lib/telemetry/training-bridge.js.map +1 -0
- package/dist/lib/telemetry.d.ts +2 -1
- package/dist/lib/telemetry.d.ts.map +1 -1
- package/dist/lib/telemetry.js +23 -2
- package/dist/lib/telemetry.js.map +1 -1
- package/dist/lib/tenet-board-agent.d.ts +52 -0
- package/dist/lib/tenet-board-agent.d.ts.map +1 -0
- package/dist/lib/tenet-board-agent.js +226 -0
- package/dist/lib/tenet-board-agent.js.map +1 -0
- package/dist/lib/tenet-ide-agent.d.ts +40 -0
- package/dist/lib/tenet-ide-agent.d.ts.map +1 -0
- package/dist/lib/tenet-ide-agent.js +199 -0
- package/dist/lib/tenet-ide-agent.js.map +1 -0
- package/dist/lib/workspace/data-pipeline.d.ts.map +1 -1
- package/dist/lib/workspace/data-pipeline.js +27 -5
- package/dist/lib/workspace/data-pipeline.js.map +1 -1
- package/dist/lib/workspace/sidebar-runner.d.ts +13 -0
- package/dist/lib/workspace/sidebar-runner.d.ts.map +1 -0
- package/dist/lib/workspace/sidebar-runner.js +419 -0
- package/dist/lib/workspace/sidebar-runner.js.map +1 -0
- package/dist/lib/workspace/surface-registry.d.ts.map +1 -1
- package/dist/lib/workspace/surface-registry.js +9 -1
- package/dist/lib/workspace/surface-registry.js.map +1 -1
- package/dist/lib/workspace/surfaces/agent-overview.d.ts +3 -3
- package/dist/lib/workspace/surfaces/agent-overview.d.ts.map +1 -1
- package/dist/lib/workspace/surfaces/agent-overview.js +3 -3
- package/dist/lib/workspace/surfaces/agent-overview.js.map +1 -1
- package/dist/lib/workspace/surfaces/index.d.ts +3 -0
- package/dist/lib/workspace/surfaces/index.d.ts.map +1 -1
- package/dist/lib/workspace/surfaces/index.js +3 -0
- package/dist/lib/workspace/surfaces/index.js.map +1 -1
- package/dist/lib/workspace/surfaces/kanban.d.ts +15 -0
- package/dist/lib/workspace/surfaces/kanban.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/kanban.js +43 -0
- package/dist/lib/workspace/surfaces/kanban.js.map +1 -0
- package/dist/lib/workspace/surfaces/physical-world.d.ts +15 -0
- package/dist/lib/workspace/surfaces/physical-world.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/physical-world.js +37 -0
- package/dist/lib/workspace/surfaces/physical-world.js.map +1 -0
- package/dist/lib/workspace/surfaces/sidebar.d.ts +22 -0
- package/dist/lib/workspace/surfaces/sidebar.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/sidebar.js +94 -0
- package/dist/lib/workspace/surfaces/sidebar.js.map +1 -0
- package/dist/lib/workspace/tmux-adapter.d.ts +8 -5
- package/dist/lib/workspace/tmux-adapter.d.ts.map +1 -1
- package/dist/lib/workspace/tmux-adapter.js +38 -7
- package/dist/lib/workspace/tmux-adapter.js.map +1 -1
- package/dist/lib/workspace/tmux-sidebar.d.ts +14 -0
- package/dist/lib/workspace/tmux-sidebar.d.ts.map +1 -0
- package/dist/lib/workspace/tmux-sidebar.js +230 -0
- package/dist/lib/workspace/tmux-sidebar.js.map +1 -0
- package/dist/types/flows.d.ts +2 -1
- package/dist/types/flows.d.ts.map +1 -1
- package/dist/types/physical-world-model.d.ts +65 -0
- package/dist/types/physical-world-model.d.ts.map +1 -0
- package/dist/types/physical-world-model.js +43 -0
- package/dist/types/physical-world-model.js.map +1 -0
- package/dist/types/telemetry.d.ts +37 -0
- package/dist/types/telemetry.d.ts.map +1 -1
- package/dist/types/world-model.d.ts.map +1 -1
- package/dist/types/world-model.js +14 -7
- package/dist/types/world-model.js.map +1 -1
- package/dist/utils/context-hub-port.d.ts.map +1 -1
- package/dist/utils/context-hub-port.js +6 -1
- package/dist/utils/context-hub-port.js.map +1 -1
- package/dist/utils/jfl-config.d.ts +7 -2
- package/dist/utils/jfl-config.d.ts.map +1 -1
- package/dist/utils/jfl-config.js +14 -4
- package/dist/utils/jfl-config.js.map +1 -1
- package/package.json +3 -2
- package/packages/pi/extensions/context.ts +51 -1
- package/packages/pi/extensions/hub-tools.ts +247 -0
- package/packages/pi/extensions/index.ts +38 -6
- package/packages/pi/extensions/memory-tool.ts +84 -4
- package/packages/pi/extensions/service-skills.ts +214 -0
- package/scripts/telemetry-dashboard.sh +44 -0
- package/scripts/test-planning-loop-e2e.ts +181 -0
- package/scripts/test-server-inference.ts +49 -0
- package/scripts/test-state-sensitivity.ts +32 -0
- package/scripts/train/v2/benchmark.py +661 -0
- package/scripts/train/v2/generate_balanced.py +439 -0
- package/scripts/train/v2/generate_hard_negatives.py +219 -0
- package/scripts/train/v2/infer.py +149 -36
- package/scripts/train/v2/infer_server.py +224 -0
- package/scripts/train/v2/online_train.py +576 -0
- package/scripts/train/v2/precompute.py +24 -6
- package/template/CLAUDE.md +74 -132
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-end test for the Planning Loop
|
|
3
|
+
*
|
|
4
|
+
* Exercises: PolicyHead v2 → DynamicsModel rollouts → InvariantMonitor → Action Selection
|
|
5
|
+
*
|
|
6
|
+
* Run: npx tsx scripts/test-planning-loop-e2e.ts
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { createPlanningLoop } from "../src/lib/planning-loop.js"
|
|
10
|
+
import { PolicyHeadInference } from "../src/lib/policy-head.js"
|
|
11
|
+
import type { PlanningResult, EvaluatedAction } from "../src/lib/planning-loop.js"
|
|
12
|
+
|
|
13
|
+
const projectRoot = process.cwd()
|
|
14
|
+
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Test scenarios — represent real situations Peter Parker faces
|
|
17
|
+
// ============================================================================
|
|
18
|
+
|
|
19
|
+
const SCENARIOS = [
|
|
20
|
+
{
|
|
21
|
+
name: "Bug reported — tests failing",
|
|
22
|
+
agentId: "error-fixer",
|
|
23
|
+
goal: "Tests are failing in planning-loop.test.ts — TypeError on undefined property. Fix the failing test.",
|
|
24
|
+
expectedTypes: ["fix"],
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
name: "Feature request — add new capability",
|
|
28
|
+
agentId: "feature-builder",
|
|
29
|
+
goal: "Add multi-step rollout support to the planning loop for deeper lookahead.",
|
|
30
|
+
expectedTypes: ["feature"],
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: "Performance optimization",
|
|
34
|
+
agentId: "optimizer",
|
|
35
|
+
goal: "Reduce PolicyHead inference latency from 2s to under 500ms for interactive use.",
|
|
36
|
+
expectedTypes: ["experiment", "refactor"],
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: "Test coverage gap",
|
|
40
|
+
agentId: "test-coverage",
|
|
41
|
+
goal: "Add unit tests for the counterfactual training bridge — currently 0% coverage.",
|
|
42
|
+
expectedTypes: ["test"],
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
name: "Config change needed",
|
|
46
|
+
agentId: "config-updater",
|
|
47
|
+
goal: "Update the nightly pipeline schedule to run at 2am MST instead of midnight.",
|
|
48
|
+
expectedTypes: ["config"],
|
|
49
|
+
},
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
// ============================================================================
|
|
53
|
+
// Helpers
|
|
54
|
+
// ============================================================================
|
|
55
|
+
|
|
56
|
+
function formatAction(ea: EvaluatedAction): string {
|
|
57
|
+
const pred = ea.prediction
|
|
58
|
+
const delta = pred.outcome?.immediate?.evalScoreChange
|
|
59
|
+
const deltaStr = delta !== undefined ? `Δ=${delta > 0 ? "+" : ""}${delta.toFixed(4)}` : "Δ=N/A"
|
|
60
|
+
const violations = ea.invariantViolations.length > 0 ? ` ⚠️${ea.invariantViolations.length} violations` : ""
|
|
61
|
+
const filtered = ea.filtered ? ` [FILTERED: ${ea.filterReason}]` : ""
|
|
62
|
+
|
|
63
|
+
return ` ${ea.action.actionType.padEnd(12)} conf=${ea.phConfidence.toFixed(3)} score=${ea.combinedScore.toFixed(3)} ${deltaStr} src=${pred.source}${violations}${filtered}`
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function logResult(scenario: typeof SCENARIOS[0], result: PlanningResult) {
|
|
67
|
+
console.log(`\n${"═".repeat(70)}`)
|
|
68
|
+
console.log(` Scenario: ${scenario.name}`)
|
|
69
|
+
console.log(` Agent: ${scenario.agentId}`)
|
|
70
|
+
console.log(` Goal: ${scenario.goal.slice(0, 70)}...`)
|
|
71
|
+
console.log(`${"─".repeat(70)}`)
|
|
72
|
+
console.log(` Time: ${result.planningTimeMs}ms`)
|
|
73
|
+
console.log(` Rollouts: ${result.rolloutsPerformed}`)
|
|
74
|
+
console.log(` Complete: ${result.completed}`)
|
|
75
|
+
console.log()
|
|
76
|
+
|
|
77
|
+
// All actions
|
|
78
|
+
console.log(` Actions evaluated (${result.allActions.length}):`)
|
|
79
|
+
for (const ea of result.allActions) {
|
|
80
|
+
const selected = result.selectedAction === ea ? " ← SELECTED" : ""
|
|
81
|
+
console.log(`${formatAction(ea)}${selected}`)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Selected action
|
|
85
|
+
if (result.selectedAction) {
|
|
86
|
+
const sel = result.selectedAction
|
|
87
|
+
const correctType = scenario.expectedTypes.includes(sel.action.actionType)
|
|
88
|
+
const check = correctType ? "✅" : "❌"
|
|
89
|
+
console.log(`\n ${check} Selected: ${sel.action.actionType} (expected: ${scenario.expectedTypes.join("|")})`)
|
|
90
|
+
} else {
|
|
91
|
+
console.log(`\n ❌ No action selected: ${result.noSelectionReason}`)
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ============================================================================
|
|
96
|
+
// Main
|
|
97
|
+
// ============================================================================
|
|
98
|
+
|
|
99
|
+
async function main() {
|
|
100
|
+
console.log("╔══════════════════════════════════════════════════════════════════════╗")
|
|
101
|
+
console.log("║ Planning Loop — End-to-End Test ║")
|
|
102
|
+
console.log("║ PH (v2 transformer) → DM (rollouts) → IM (invariants) → Select ║")
|
|
103
|
+
console.log("╚══════════════════════════════════════════════════════════════════════╝")
|
|
104
|
+
|
|
105
|
+
// Check prerequisites
|
|
106
|
+
const ph = new PolicyHeadInference(projectRoot)
|
|
107
|
+
console.log(`\n PolicyHead loaded: ${ph.isLoaded} (version: ${ph.version})`)
|
|
108
|
+
if (ph.stats) {
|
|
109
|
+
console.log(` Trained on: ${ph.stats.trained_on} examples`)
|
|
110
|
+
const valAcc = ph.stats.val_accuracy ?? ph.stats.direction_accuracy
|
|
111
|
+
console.log(` Val accuracy: ${(valAcc > 1 ? valAcc : valAcc * 100).toFixed(1)}%`)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (!ph.isLoaded) {
|
|
115
|
+
console.error("\n ❌ PolicyHead not loaded — cannot run e2e test")
|
|
116
|
+
console.error(" Copy checkpoint to .jfl/checkpoints/best_policy_head.pt")
|
|
117
|
+
process.exit(1)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Create planning loop
|
|
121
|
+
const planner = createPlanningLoop(projectRoot, {
|
|
122
|
+
topK: 5,
|
|
123
|
+
verbose: true,
|
|
124
|
+
maxPlanningTimeMs: 30000, // 30s for test
|
|
125
|
+
checkInvariants: true,
|
|
126
|
+
recordTransitions: false, // Don't pollute real data during test
|
|
127
|
+
writeTrainingTuples: false,
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
console.log(` Planning loop ready: ${planner.isReady}`)
|
|
131
|
+
|
|
132
|
+
// Run each scenario
|
|
133
|
+
let passed = 0
|
|
134
|
+
let failed = 0
|
|
135
|
+
const results: Array<{ scenario: string; result: PlanningResult; correct: boolean }> = []
|
|
136
|
+
|
|
137
|
+
for (const scenario of SCENARIOS) {
|
|
138
|
+
try {
|
|
139
|
+
console.log(`\n Running: ${scenario.name}...`)
|
|
140
|
+
const result = await planner.plan(scenario.agentId, scenario.goal)
|
|
141
|
+
logResult(scenario, result)
|
|
142
|
+
|
|
143
|
+
const correct = result.selectedAction
|
|
144
|
+
? scenario.expectedTypes.includes(result.selectedAction.action.actionType)
|
|
145
|
+
: false
|
|
146
|
+
|
|
147
|
+
if (correct) passed++
|
|
148
|
+
else failed++
|
|
149
|
+
|
|
150
|
+
results.push({ scenario: scenario.name, result, correct })
|
|
151
|
+
} catch (err: any) {
|
|
152
|
+
console.error(`\n ❌ ${scenario.name} THREW: ${err.message}`)
|
|
153
|
+
failed++
|
|
154
|
+
results.push({
|
|
155
|
+
scenario: scenario.name,
|
|
156
|
+
result: null as any,
|
|
157
|
+
correct: false,
|
|
158
|
+
})
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Summary
|
|
163
|
+
console.log(`\n${"═".repeat(70)}`)
|
|
164
|
+
console.log(` SUMMARY`)
|
|
165
|
+
console.log(`${"─".repeat(70)}`)
|
|
166
|
+
for (const r of results) {
|
|
167
|
+
const check = r.correct ? "✅" : "❌"
|
|
168
|
+
const time = r.result ? `${r.result.planningTimeMs}ms` : "ERRORED"
|
|
169
|
+
const selected = r.result?.selectedAction?.action.actionType ?? "none"
|
|
170
|
+
console.log(` ${check} ${r.scenario.padEnd(35)} → ${selected.padEnd(12)} (${time})`)
|
|
171
|
+
}
|
|
172
|
+
console.log(`\n Passed: ${passed}/${SCENARIOS.length} Failed: ${failed}/${SCENARIOS.length}`)
|
|
173
|
+
console.log(`${"═".repeat(70)}`)
|
|
174
|
+
|
|
175
|
+
process.exit(failed > 0 ? 1 : 0)
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
main().catch(err => {
|
|
179
|
+
console.error("Fatal error:", err)
|
|
180
|
+
process.exit(1)
|
|
181
|
+
})
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Test the inference server path in PolicyHead
|
|
4
|
+
*/
|
|
5
|
+
import { PolicyHeadInference } from "../src/lib/policy-head.js"
|
|
6
|
+
|
|
7
|
+
async function main() {
|
|
8
|
+
const ph = new PolicyHeadInference(".")
|
|
9
|
+
console.log("PH loaded:", ph.isLoaded, "version:", ph.version)
|
|
10
|
+
|
|
11
|
+
if (!ph.isLoaded || ph.version !== 2) {
|
|
12
|
+
console.log("v2 not loaded, aborting")
|
|
13
|
+
process.exit(1)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const state = {
|
|
17
|
+
composite_score: 0.72,
|
|
18
|
+
dimension_scores: { test_pass_rate: 0.85, build_health: 0.9, code_quality: 0.8 },
|
|
19
|
+
tests_passing: 17,
|
|
20
|
+
tests_total: 20,
|
|
21
|
+
trajectory_length: 3,
|
|
22
|
+
recent_deltas: [-0.03, 0.01],
|
|
23
|
+
agent: "error-fixer",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const goals = [
|
|
27
|
+
"Fix failing tests in planning-loop.test.ts — TypeError on undefined property",
|
|
28
|
+
"Add multi-step rollout support to the planning loop",
|
|
29
|
+
"Add unit tests for the counterfactual training bridge",
|
|
30
|
+
"Update the nightly pipeline schedule to run at 2am MST",
|
|
31
|
+
"Reduce PolicyHead inference latency from 20s to under 5s",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
for (const goal of goals) {
|
|
35
|
+
const t0 = Date.now()
|
|
36
|
+
const result = await ph.selectAction(state, goal)
|
|
37
|
+
const elapsed = Date.now() - t0
|
|
38
|
+
console.log(`[${elapsed}ms] ${result.action} (${(result.confidence * 100).toFixed(1)}%) ← ${goal.slice(0, 60)}`)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
console.log("\nStopping server...")
|
|
42
|
+
ph.stopServer()
|
|
43
|
+
console.log("Done")
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
main().catch(err => {
|
|
47
|
+
console.error(err)
|
|
48
|
+
process.exit(1)
|
|
49
|
+
})
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Test how state values affect PolicyHead predictions
|
|
4
|
+
*/
|
|
5
|
+
import { PolicyHeadInference } from "../src/lib/policy-head.js"
|
|
6
|
+
|
|
7
|
+
async function main() {
|
|
8
|
+
const ph = new PolicyHeadInference(".")
|
|
9
|
+
if (!ph.isLoaded || ph.version !== 2) { process.exit(1) }
|
|
10
|
+
|
|
11
|
+
const goal = "Add multi-step rollout support to the planning loop"
|
|
12
|
+
|
|
13
|
+
const states = [
|
|
14
|
+
{ label: "healthy (all high)", composite_score: 0.95, dimension_scores: { test_pass_rate: 1.0, build_health: 1.0, code_quality: 0.95 }, tests_passing: 25, tests_total: 25, trajectory_length: 5, recent_deltas: [0.02, 0.01], agent: "feature-builder" },
|
|
15
|
+
{ label: "degraded tests", composite_score: 0.72, dimension_scores: { test_pass_rate: 0.85, build_health: 0.9, code_quality: 0.8 }, tests_passing: 17, tests_total: 20, trajectory_length: 3, recent_deltas: [-0.03], agent: "error-fixer" },
|
|
16
|
+
{ label: "low coverage", composite_score: 0.55, dimension_scores: { test_pass_rate: 1.0, build_health: 0.9, code_quality: 0.5 }, tests_passing: 15, tests_total: 15, trajectory_length: 2, recent_deltas: [0.01], agent: "test-coverage" },
|
|
17
|
+
{ label: "fresh start", composite_score: 0.3, dimension_scores: { test_pass_rate: 0.5, build_health: 0.5, code_quality: 0.5 }, tests_passing: 5, tests_total: 10, trajectory_length: 0, recent_deltas: [], agent: "onboarding" },
|
|
18
|
+
{ label: "near perfect", composite_score: 0.98, dimension_scores: { test_pass_rate: 1.0, build_health: 1.0, code_quality: 1.0, hub_health: 1.0 }, tests_passing: 30, tests_total: 30, trajectory_length: 10, recent_deltas: [0.005, 0.003], agent: "optimizer" },
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
for (const s of states) {
|
|
22
|
+
const { label, ...state } = s
|
|
23
|
+
const t0 = Date.now()
|
|
24
|
+
const result = await ph.selectAction(state as any, goal)
|
|
25
|
+
const elapsed = Date.now() - t0
|
|
26
|
+
const alts = result.alternatives?.map(a => `${a.action}(${(a.confidence*100).toFixed(0)}%)`).join(", ") || ""
|
|
27
|
+
console.log(`[${elapsed}ms] ${label}: ${result.action} (${(result.confidence*100).toFixed(1)}%) | alts: ${alts}`)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
ph.stopServer()
|
|
31
|
+
}
|
|
32
|
+
main().catch(err => { console.error(err); process.exit(1) })
|