jfl 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/doctor.d.ts +1 -0
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +30 -1
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/ide.d.ts +2 -1
- package/dist/commands/ide.d.ts.map +1 -1
- package/dist/commands/ide.js +60 -1
- package/dist/commands/ide.js.map +1 -1
- package/dist/commands/init-from-service.d.ts +15 -0
- package/dist/commands/init-from-service.d.ts.map +1 -0
- package/dist/commands/init-from-service.js +541 -0
- package/dist/commands/init-from-service.js.map +1 -0
- package/dist/commands/init.d.ts +1 -0
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +32 -1
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/kanban.d.ts.map +1 -1
- package/dist/commands/kanban.js +13 -4
- package/dist/commands/kanban.js.map +1 -1
- package/dist/commands/linear.d.ts +41 -0
- package/dist/commands/linear.d.ts.map +1 -0
- package/dist/commands/linear.js +715 -0
- package/dist/commands/linear.js.map +1 -0
- package/dist/commands/peter.d.ts.map +1 -1
- package/dist/commands/peter.js +232 -25
- package/dist/commands/peter.js.map +1 -1
- package/dist/commands/services.d.ts.map +1 -1
- package/dist/commands/services.js +146 -0
- package/dist/commands/services.js.map +1 -1
- package/dist/commands/setup.d.ts.map +1 -1
- package/dist/commands/setup.js +173 -13
- package/dist/commands/setup.js.map +1 -1
- package/dist/commands/telemetry-monitor.d.ts +11 -0
- package/dist/commands/telemetry-monitor.d.ts.map +1 -0
- package/dist/commands/telemetry-monitor.js +224 -0
- package/dist/commands/telemetry-monitor.js.map +1 -0
- package/dist/commands/telemetry-test.d.ts +11 -0
- package/dist/commands/telemetry-test.d.ts.map +1 -0
- package/dist/commands/telemetry-test.js +67 -0
- package/dist/commands/telemetry-test.js.map +1 -0
- package/dist/commands/tenet-agents.d.ts +13 -0
- package/dist/commands/tenet-agents.d.ts.map +1 -0
- package/dist/commands/tenet-agents.js +191 -0
- package/dist/commands/tenet-agents.js.map +1 -0
- package/dist/commands/tenet-setup.d.ts +19 -0
- package/dist/commands/tenet-setup.d.ts.map +1 -0
- package/dist/commands/tenet-setup.js +131 -0
- package/dist/commands/tenet-setup.js.map +1 -0
- package/dist/commands/train.d.ts +18 -0
- package/dist/commands/train.d.ts.map +1 -1
- package/dist/commands/train.js +182 -0
- package/dist/commands/train.js.map +1 -1
- package/dist/commands/whoami.d.ts +2 -0
- package/dist/commands/whoami.d.ts.map +1 -0
- package/dist/commands/whoami.js +24 -0
- package/dist/commands/whoami.js.map +1 -0
- package/dist/index.js +159 -10
- package/dist/index.js.map +1 -1
- package/dist/lib/advanced-setup.d.ts +78 -0
- package/dist/lib/advanced-setup.d.ts.map +1 -0
- package/dist/lib/advanced-setup.js +433 -0
- package/dist/lib/advanced-setup.js.map +1 -0
- package/dist/lib/agent-config.d.ts +33 -0
- package/dist/lib/agent-config.d.ts.map +1 -1
- package/dist/lib/agent-config.js +26 -0
- package/dist/lib/agent-config.js.map +1 -1
- package/dist/lib/counterfactual-training-bridge.d.ts +114 -0
- package/dist/lib/counterfactual-training-bridge.d.ts.map +1 -0
- package/dist/lib/counterfactual-training-bridge.js +322 -0
- package/dist/lib/counterfactual-training-bridge.js.map +1 -0
- package/dist/lib/discovery-agent.d.ts +48 -0
- package/dist/lib/discovery-agent.d.ts.map +1 -0
- package/dist/lib/discovery-agent.js +111 -0
- package/dist/lib/discovery-agent.js.map +1 -0
- package/dist/lib/flow-engine.d.ts.map +1 -1
- package/dist/lib/flow-engine.js +46 -8
- package/dist/lib/flow-engine.js.map +1 -1
- package/dist/lib/gtm-generator.d.ts +29 -0
- package/dist/lib/gtm-generator.d.ts.map +1 -0
- package/dist/lib/gtm-generator.js +252 -0
- package/dist/lib/gtm-generator.js.map +1 -0
- package/dist/lib/hub-health.d.ts +40 -0
- package/dist/lib/hub-health.d.ts.map +1 -0
- package/dist/lib/hub-health.js +89 -0
- package/dist/lib/hub-health.js.map +1 -0
- package/dist/lib/invariant-monitor.d.ts +6 -2
- package/dist/lib/invariant-monitor.d.ts.map +1 -1
- package/dist/lib/invariant-monitor.js +89 -2
- package/dist/lib/invariant-monitor.js.map +1 -1
- package/dist/lib/journal-analyzer.d.ts +71 -0
- package/dist/lib/journal-analyzer.d.ts.map +1 -0
- package/dist/lib/journal-analyzer.js +306 -0
- package/dist/lib/journal-analyzer.js.map +1 -0
- package/dist/lib/linear-client.d.ts +73 -0
- package/dist/lib/linear-client.d.ts.map +1 -0
- package/dist/lib/linear-client.js +112 -0
- package/dist/lib/linear-client.js.map +1 -0
- package/dist/lib/linear-id-map.d.ts +20 -0
- package/dist/lib/linear-id-map.d.ts.map +1 -0
- package/dist/lib/linear-id-map.js +57 -0
- package/dist/lib/linear-id-map.js.map +1 -0
- package/dist/lib/linear-kanban.d.ts +66 -0
- package/dist/lib/linear-kanban.d.ts.map +1 -0
- package/dist/lib/linear-kanban.js +175 -0
- package/dist/lib/linear-kanban.js.map +1 -0
- package/dist/lib/onboarding.d.ts +40 -0
- package/dist/lib/onboarding.d.ts.map +1 -0
- package/dist/lib/onboarding.js +213 -0
- package/dist/lib/onboarding.js.map +1 -0
- package/dist/lib/physical-world-model.d.ts +50 -0
- package/dist/lib/physical-world-model.d.ts.map +1 -0
- package/dist/lib/physical-world-model.js +251 -0
- package/dist/lib/physical-world-model.js.map +1 -0
- package/dist/lib/planning-loop.d.ts +157 -0
- package/dist/lib/planning-loop.d.ts.map +1 -0
- package/dist/lib/planning-loop.js +537 -0
- package/dist/lib/planning-loop.js.map +1 -0
- package/dist/lib/policy-head.d.ts +13 -0
- package/dist/lib/policy-head.d.ts.map +1 -1
- package/dist/lib/policy-head.js +168 -2
- package/dist/lib/policy-head.js.map +1 -1
- package/dist/lib/resource-optimizer-middleware.d.ts +39 -0
- package/dist/lib/resource-optimizer-middleware.d.ts.map +1 -0
- package/dist/lib/resource-optimizer-middleware.js +222 -0
- package/dist/lib/resource-optimizer-middleware.js.map +1 -0
- package/dist/lib/resource-optimizer.d.ts +71 -0
- package/dist/lib/resource-optimizer.d.ts.map +1 -0
- package/dist/lib/resource-optimizer.js +228 -0
- package/dist/lib/resource-optimizer.js.map +1 -0
- package/dist/lib/rl-manager.d.ts +74 -0
- package/dist/lib/rl-manager.d.ts.map +1 -0
- package/dist/lib/rl-manager.js +244 -0
- package/dist/lib/rl-manager.js.map +1 -0
- package/dist/lib/service-analyzer.d.ts +76 -0
- package/dist/lib/service-analyzer.d.ts.map +1 -0
- package/dist/lib/service-analyzer.js +704 -0
- package/dist/lib/service-analyzer.js.map +1 -0
- package/dist/lib/service-gtm.js +2 -2
- package/dist/lib/service-gtm.js.map +1 -1
- package/dist/lib/service-questionnaire.d.ts +11 -0
- package/dist/lib/service-questionnaire.d.ts.map +1 -0
- package/dist/lib/service-questionnaire.js +89 -0
- package/dist/lib/service-questionnaire.js.map +1 -0
- package/dist/lib/setup/agent-generator.d.ts +2 -0
- package/dist/lib/setup/agent-generator.d.ts.map +1 -1
- package/dist/lib/setup/agent-generator.js +128 -4
- package/dist/lib/setup/agent-generator.js.map +1 -1
- package/dist/lib/setup/flow-generator.d.ts +10 -0
- package/dist/lib/setup/flow-generator.d.ts.map +1 -0
- package/dist/lib/setup/flow-generator.js +113 -0
- package/dist/lib/setup/flow-generator.js.map +1 -0
- package/dist/lib/setup/invariant-bridge.d.ts +91 -0
- package/dist/lib/setup/invariant-bridge.d.ts.map +1 -0
- package/dist/lib/setup/invariant-bridge.js +384 -0
- package/dist/lib/setup/invariant-bridge.js.map +1 -0
- package/dist/lib/setup/spec-generator.d.ts +41 -5
- package/dist/lib/setup/spec-generator.d.ts.map +1 -1
- package/dist/lib/setup/spec-generator.js +503 -29
- package/dist/lib/setup/spec-generator.js.map +1 -1
- package/dist/lib/stratus-client.js +1 -1
- package/dist/lib/stratus-client.js.map +1 -1
- package/dist/lib/surface-agent.d.ts +78 -0
- package/dist/lib/surface-agent.d.ts.map +1 -0
- package/dist/lib/surface-agent.js +105 -0
- package/dist/lib/surface-agent.js.map +1 -0
- package/dist/lib/surface-coordination-example.d.ts +30 -0
- package/dist/lib/surface-coordination-example.d.ts.map +1 -0
- package/dist/lib/surface-coordination-example.js +164 -0
- package/dist/lib/surface-coordination-example.js.map +1 -0
- package/dist/lib/telemetry/physical-world-collector.d.ts +15 -0
- package/dist/lib/telemetry/physical-world-collector.d.ts.map +1 -0
- package/dist/lib/telemetry/physical-world-collector.js +177 -0
- package/dist/lib/telemetry/physical-world-collector.js.map +1 -0
- package/dist/lib/telemetry/training-bridge.d.ts +51 -0
- package/dist/lib/telemetry/training-bridge.d.ts.map +1 -0
- package/dist/lib/telemetry/training-bridge.js +185 -0
- package/dist/lib/telemetry/training-bridge.js.map +1 -0
- package/dist/lib/telemetry.d.ts +2 -1
- package/dist/lib/telemetry.d.ts.map +1 -1
- package/dist/lib/telemetry.js +23 -2
- package/dist/lib/telemetry.js.map +1 -1
- package/dist/lib/tenet-board-agent.d.ts +52 -0
- package/dist/lib/tenet-board-agent.d.ts.map +1 -0
- package/dist/lib/tenet-board-agent.js +226 -0
- package/dist/lib/tenet-board-agent.js.map +1 -0
- package/dist/lib/tenet-ide-agent.d.ts +40 -0
- package/dist/lib/tenet-ide-agent.d.ts.map +1 -0
- package/dist/lib/tenet-ide-agent.js +199 -0
- package/dist/lib/tenet-ide-agent.js.map +1 -0
- package/dist/lib/workspace/data-pipeline.d.ts.map +1 -1
- package/dist/lib/workspace/data-pipeline.js +27 -5
- package/dist/lib/workspace/data-pipeline.js.map +1 -1
- package/dist/lib/workspace/sidebar-runner.d.ts +13 -0
- package/dist/lib/workspace/sidebar-runner.d.ts.map +1 -0
- package/dist/lib/workspace/sidebar-runner.js +419 -0
- package/dist/lib/workspace/sidebar-runner.js.map +1 -0
- package/dist/lib/workspace/surface-registry.d.ts.map +1 -1
- package/dist/lib/workspace/surface-registry.js +4 -1
- package/dist/lib/workspace/surface-registry.js.map +1 -1
- package/dist/lib/workspace/surfaces/agent-overview.d.ts +3 -3
- package/dist/lib/workspace/surfaces/agent-overview.d.ts.map +1 -1
- package/dist/lib/workspace/surfaces/agent-overview.js +3 -3
- package/dist/lib/workspace/surfaces/agent-overview.js.map +1 -1
- package/dist/lib/workspace/surfaces/index.d.ts +3 -0
- package/dist/lib/workspace/surfaces/index.d.ts.map +1 -1
- package/dist/lib/workspace/surfaces/index.js +3 -0
- package/dist/lib/workspace/surfaces/index.js.map +1 -1
- package/dist/lib/workspace/surfaces/kanban.d.ts +15 -0
- package/dist/lib/workspace/surfaces/kanban.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/kanban.js +43 -0
- package/dist/lib/workspace/surfaces/kanban.js.map +1 -0
- package/dist/lib/workspace/surfaces/physical-world.d.ts +15 -0
- package/dist/lib/workspace/surfaces/physical-world.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/physical-world.js +37 -0
- package/dist/lib/workspace/surfaces/physical-world.js.map +1 -0
- package/dist/lib/workspace/surfaces/sidebar.d.ts +22 -0
- package/dist/lib/workspace/surfaces/sidebar.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/sidebar.js +90 -0
- package/dist/lib/workspace/surfaces/sidebar.js.map +1 -0
- package/dist/types/flows.d.ts +2 -1
- package/dist/types/flows.d.ts.map +1 -1
- package/dist/types/physical-world-model.d.ts +65 -0
- package/dist/types/physical-world-model.d.ts.map +1 -0
- package/dist/types/physical-world-model.js +43 -0
- package/dist/types/physical-world-model.js.map +1 -0
- package/dist/types/telemetry.d.ts +37 -0
- package/dist/types/telemetry.d.ts.map +1 -1
- package/dist/types/world-model.d.ts.map +1 -1
- package/dist/types/world-model.js +14 -7
- package/dist/types/world-model.js.map +1 -1
- package/dist/utils/context-hub-port.d.ts.map +1 -1
- package/dist/utils/context-hub-port.js +6 -1
- package/dist/utils/context-hub-port.js.map +1 -1
- package/package.json +3 -2
- package/packages/pi/extensions/index.ts +34 -6
- package/scripts/telemetry-dashboard.sh +44 -0
- package/scripts/test-planning-loop-e2e.ts +181 -0
- package/scripts/test-server-inference.ts +49 -0
- package/scripts/test-state-sensitivity.ts +32 -0
- package/scripts/train/v2/benchmark.py +661 -0
- package/scripts/train/v2/generate_balanced.py +439 -0
- package/scripts/train/v2/generate_hard_negatives.py +219 -0
- package/scripts/train/v2/infer.py +149 -36
- package/scripts/train/v2/infer_server.py +224 -0
- package/scripts/train/v2/online_train.py +576 -0
- package/scripts/train/v2/precompute.py +24 -6
- package/template/CLAUDE.md +74 -132
|
@@ -347,15 +347,19 @@ export default async function jflExtension(pi: any): Promise<void> {
|
|
|
347
347
|
content: [
|
|
348
348
|
`JFL session ready: "${projectName}" on branch ${ctx.session.branch}.`,
|
|
349
349
|
"",
|
|
350
|
-
"
|
|
351
|
-
"
|
|
350
|
+
"Your system prompt already contains full project context: CLAUDE.md, recent journal entries, knowledge docs, and code headers.",
|
|
351
|
+
"Everything was loaded during the boot screen. You do NOT need to call any tools.",
|
|
352
352
|
"",
|
|
353
|
-
"
|
|
354
|
-
"
|
|
353
|
+
"DO NOT call jfl_context, jfl_hud, jfl_memory_search, or run any bash commands at startup.",
|
|
354
|
+
"DO NOT run session-sync.sh, jfl-doctor.sh, or read journal files.",
|
|
355
|
+
"All of that is already done and injected into your system prompt.",
|
|
356
|
+
"",
|
|
357
|
+
"Just greet the user naturally with a brief status (3-5 lines):",
|
|
358
|
+
"- What was worked on recently (from the journal entries in your system prompt)",
|
|
355
359
|
"- Current phase and any blockers",
|
|
356
360
|
"- A suggested next action",
|
|
357
361
|
"",
|
|
358
|
-
"
|
|
362
|
+
"No tool calls. No setup noise. Just talk.",
|
|
359
363
|
"Write journal entries as you work. Capture decisions immediately.",
|
|
360
364
|
].join("\n"),
|
|
361
365
|
display: false,
|
|
@@ -380,7 +384,31 @@ export default async function jflExtension(pi: any): Promise<void> {
|
|
|
380
384
|
latestPiCtx = piCtx
|
|
381
385
|
const result = await injectContext(ctx, event)
|
|
382
386
|
if (result?.systemPromptAddition) {
|
|
383
|
-
|
|
387
|
+
let current = piCtx.getSystemPrompt?.() ?? ""
|
|
388
|
+
|
|
389
|
+
// Strip Path B (Claude Code manual startup) from system prompt.
|
|
390
|
+
// We're running in Pi with the extension — Path B instructions are
|
|
391
|
+
// noise that can confuse the LLM into running manual startup commands.
|
|
392
|
+
const pathBStart = "### Path B: Claude Code / No Extension"
|
|
393
|
+
const pathBEnd = "### How to Tell Which Path You're On"
|
|
394
|
+
const startIdx = current.indexOf(pathBStart)
|
|
395
|
+
const endIdx = current.indexOf(pathBEnd)
|
|
396
|
+
if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) {
|
|
397
|
+
current = current.slice(0, startIdx) + current.slice(endIdx)
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Also strip the "How to Tell" section — it references Path B
|
|
401
|
+
const howToTell = "### How to Tell Which Path You're On"
|
|
402
|
+
const howToTellIdx = current.indexOf(howToTell)
|
|
403
|
+
if (howToTellIdx !== -1) {
|
|
404
|
+
// Find the next ### or ## heading after it
|
|
405
|
+
const afterHowToTell = current.slice(howToTellIdx + howToTell.length)
|
|
406
|
+
const nextHeading = afterHowToTell.search(/\n###? /)
|
|
407
|
+
if (nextHeading !== -1) {
|
|
408
|
+
current = current.slice(0, howToTellIdx) + afterHowToTell.slice(nextHeading)
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
384
412
|
return {
|
|
385
413
|
systemPrompt: current
|
|
386
414
|
? `${current}\n\n${result.systemPromptAddition}`
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
# Physical-World Telemetry Dashboard
|
|
4
|
+
# Quick overview of telemetry monitoring capabilities
|
|
5
|
+
|
|
6
|
+
set -euo pipefail
|
|
7
|
+
|
|
8
|
+
echo "🌍 Physical-World Telemetry Dashboard"
|
|
9
|
+
echo "====================================="
|
|
10
|
+
echo
|
|
11
|
+
|
|
12
|
+
# Check if telemetry is enabled
|
|
13
|
+
echo "📊 Telemetry Status:"
|
|
14
|
+
jfl telemetry status
|
|
15
|
+
echo
|
|
16
|
+
|
|
17
|
+
# Generate some sample data if none exists
|
|
18
|
+
echo "🧪 Generating sample data..."
|
|
19
|
+
jfl telemetry test --count 3 --interval 100 --categories "command,performance"
|
|
20
|
+
echo
|
|
21
|
+
|
|
22
|
+
# Show current snapshot
|
|
23
|
+
echo "📈 Current Physical-World Metrics:"
|
|
24
|
+
jfl telemetry monitor --compact
|
|
25
|
+
echo
|
|
26
|
+
|
|
27
|
+
echo "🔍 Available Monitoring Options:"
|
|
28
|
+
echo " jfl telemetry monitor --live # Real-time updates"
|
|
29
|
+
echo " jfl telemetry monitor --filter cpu # Filter for CPU-related events"
|
|
30
|
+
echo " jfl telemetry monitor --compact # Compact display"
|
|
31
|
+
echo
|
|
32
|
+
|
|
33
|
+
echo "💡 Physical-World Data Captured:"
|
|
34
|
+
echo " • System: CPU usage, memory, thermal state, system load"
|
|
35
|
+
echo " • Hardware: battery level, disk space, sensors"
|
|
36
|
+
echo " • Workflow: git branch, dirty files, terminal count, IDE"
|
|
37
|
+
echo " • Network: latency, connectivity, local dev servers"
|
|
38
|
+
echo " • Performance: build times, test durations, command timing"
|
|
39
|
+
echo
|
|
40
|
+
|
|
41
|
+
echo "🎯 Competitive Advantage:"
|
|
42
|
+
echo " This data is impossible for web UIs to access!"
|
|
43
|
+
echo " You own the edge - local machine context during live operations."
|
|
44
|
+
echo
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-end test for the Planning Loop
|
|
3
|
+
*
|
|
4
|
+
* Exercises: PolicyHead v2 → DynamicsModel rollouts → InvariantMonitor → Action Selection
|
|
5
|
+
*
|
|
6
|
+
* Run: npx tsx scripts/test-planning-loop-e2e.ts
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { createPlanningLoop } from "../src/lib/planning-loop.js"
|
|
10
|
+
import { PolicyHeadInference } from "../src/lib/policy-head.js"
|
|
11
|
+
import type { PlanningResult, EvaluatedAction } from "../src/lib/planning-loop.js"
|
|
12
|
+
|
|
13
|
+
const projectRoot = process.cwd()
|
|
14
|
+
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Test scenarios — represent real situations Peter Parker faces
|
|
17
|
+
// ============================================================================
|
|
18
|
+
|
|
19
|
+
const SCENARIOS = [
|
|
20
|
+
{
|
|
21
|
+
name: "Bug reported — tests failing",
|
|
22
|
+
agentId: "error-fixer",
|
|
23
|
+
goal: "Tests are failing in planning-loop.test.ts — TypeError on undefined property. Fix the failing test.",
|
|
24
|
+
expectedTypes: ["fix"],
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
name: "Feature request — add new capability",
|
|
28
|
+
agentId: "feature-builder",
|
|
29
|
+
goal: "Add multi-step rollout support to the planning loop for deeper lookahead.",
|
|
30
|
+
expectedTypes: ["feature"],
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: "Performance optimization",
|
|
34
|
+
agentId: "optimizer",
|
|
35
|
+
goal: "Reduce PolicyHead inference latency from 2s to under 500ms for interactive use.",
|
|
36
|
+
expectedTypes: ["experiment", "refactor"],
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: "Test coverage gap",
|
|
40
|
+
agentId: "test-coverage",
|
|
41
|
+
goal: "Add unit tests for the counterfactual training bridge — currently 0% coverage.",
|
|
42
|
+
expectedTypes: ["test"],
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
name: "Config change needed",
|
|
46
|
+
agentId: "config-updater",
|
|
47
|
+
goal: "Update the nightly pipeline schedule to run at 2am MST instead of midnight.",
|
|
48
|
+
expectedTypes: ["config"],
|
|
49
|
+
},
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
// ============================================================================
|
|
53
|
+
// Helpers
|
|
54
|
+
// ============================================================================
|
|
55
|
+
|
|
56
|
+
function formatAction(ea: EvaluatedAction): string {
|
|
57
|
+
const pred = ea.prediction
|
|
58
|
+
const delta = pred.outcome?.immediate?.evalScoreChange
|
|
59
|
+
const deltaStr = delta !== undefined ? `Δ=${delta > 0 ? "+" : ""}${delta.toFixed(4)}` : "Δ=N/A"
|
|
60
|
+
const violations = ea.invariantViolations.length > 0 ? ` ⚠️${ea.invariantViolations.length} violations` : ""
|
|
61
|
+
const filtered = ea.filtered ? ` [FILTERED: ${ea.filterReason}]` : ""
|
|
62
|
+
|
|
63
|
+
return ` ${ea.action.actionType.padEnd(12)} conf=${ea.phConfidence.toFixed(3)} score=${ea.combinedScore.toFixed(3)} ${deltaStr} src=${pred.source}${violations}${filtered}`
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function logResult(scenario: typeof SCENARIOS[0], result: PlanningResult) {
|
|
67
|
+
console.log(`\n${"═".repeat(70)}`)
|
|
68
|
+
console.log(` Scenario: ${scenario.name}`)
|
|
69
|
+
console.log(` Agent: ${scenario.agentId}`)
|
|
70
|
+
console.log(` Goal: ${scenario.goal.slice(0, 70)}...`)
|
|
71
|
+
console.log(`${"─".repeat(70)}`)
|
|
72
|
+
console.log(` Time: ${result.planningTimeMs}ms`)
|
|
73
|
+
console.log(` Rollouts: ${result.rolloutsPerformed}`)
|
|
74
|
+
console.log(` Complete: ${result.completed}`)
|
|
75
|
+
console.log()
|
|
76
|
+
|
|
77
|
+
// All actions
|
|
78
|
+
console.log(` Actions evaluated (${result.allActions.length}):`)
|
|
79
|
+
for (const ea of result.allActions) {
|
|
80
|
+
const selected = result.selectedAction === ea ? " ← SELECTED" : ""
|
|
81
|
+
console.log(`${formatAction(ea)}${selected}`)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Selected action
|
|
85
|
+
if (result.selectedAction) {
|
|
86
|
+
const sel = result.selectedAction
|
|
87
|
+
const correctType = scenario.expectedTypes.includes(sel.action.actionType)
|
|
88
|
+
const check = correctType ? "✅" : "❌"
|
|
89
|
+
console.log(`\n ${check} Selected: ${sel.action.actionType} (expected: ${scenario.expectedTypes.join("|")})`)
|
|
90
|
+
} else {
|
|
91
|
+
console.log(`\n ❌ No action selected: ${result.noSelectionReason}`)
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ============================================================================
|
|
96
|
+
// Main
|
|
97
|
+
// ============================================================================
|
|
98
|
+
|
|
99
|
+
async function main() {
|
|
100
|
+
console.log("╔══════════════════════════════════════════════════════════════════════╗")
|
|
101
|
+
console.log("║ Planning Loop — End-to-End Test ║")
|
|
102
|
+
console.log("║ PH (v2 transformer) → DM (rollouts) → IM (invariants) → Select ║")
|
|
103
|
+
console.log("╚══════════════════════════════════════════════════════════════════════╝")
|
|
104
|
+
|
|
105
|
+
// Check prerequisites
|
|
106
|
+
const ph = new PolicyHeadInference(projectRoot)
|
|
107
|
+
console.log(`\n PolicyHead loaded: ${ph.isLoaded} (version: ${ph.version})`)
|
|
108
|
+
if (ph.stats) {
|
|
109
|
+
console.log(` Trained on: ${ph.stats.trained_on} examples`)
|
|
110
|
+
const valAcc = ph.stats.val_accuracy ?? ph.stats.direction_accuracy
|
|
111
|
+
console.log(` Val accuracy: ${(valAcc > 1 ? valAcc : valAcc * 100).toFixed(1)}%`)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (!ph.isLoaded) {
|
|
115
|
+
console.error("\n ❌ PolicyHead not loaded — cannot run e2e test")
|
|
116
|
+
console.error(" Copy checkpoint to .jfl/checkpoints/best_policy_head.pt")
|
|
117
|
+
process.exit(1)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Create planning loop
|
|
121
|
+
const planner = createPlanningLoop(projectRoot, {
|
|
122
|
+
topK: 5,
|
|
123
|
+
verbose: true,
|
|
124
|
+
maxPlanningTimeMs: 30000, // 30s for test
|
|
125
|
+
checkInvariants: true,
|
|
126
|
+
recordTransitions: false, // Don't pollute real data during test
|
|
127
|
+
writeTrainingTuples: false,
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
console.log(` Planning loop ready: ${planner.isReady}`)
|
|
131
|
+
|
|
132
|
+
// Run each scenario
|
|
133
|
+
let passed = 0
|
|
134
|
+
let failed = 0
|
|
135
|
+
const results: Array<{ scenario: string; result: PlanningResult; correct: boolean }> = []
|
|
136
|
+
|
|
137
|
+
for (const scenario of SCENARIOS) {
|
|
138
|
+
try {
|
|
139
|
+
console.log(`\n Running: ${scenario.name}...`)
|
|
140
|
+
const result = await planner.plan(scenario.agentId, scenario.goal)
|
|
141
|
+
logResult(scenario, result)
|
|
142
|
+
|
|
143
|
+
const correct = result.selectedAction
|
|
144
|
+
? scenario.expectedTypes.includes(result.selectedAction.action.actionType)
|
|
145
|
+
: false
|
|
146
|
+
|
|
147
|
+
if (correct) passed++
|
|
148
|
+
else failed++
|
|
149
|
+
|
|
150
|
+
results.push({ scenario: scenario.name, result, correct })
|
|
151
|
+
} catch (err: any) {
|
|
152
|
+
console.error(`\n ❌ ${scenario.name} THREW: ${err.message}`)
|
|
153
|
+
failed++
|
|
154
|
+
results.push({
|
|
155
|
+
scenario: scenario.name,
|
|
156
|
+
result: null as any,
|
|
157
|
+
correct: false,
|
|
158
|
+
})
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Summary
|
|
163
|
+
console.log(`\n${"═".repeat(70)}`)
|
|
164
|
+
console.log(` SUMMARY`)
|
|
165
|
+
console.log(`${"─".repeat(70)}`)
|
|
166
|
+
for (const r of results) {
|
|
167
|
+
const check = r.correct ? "✅" : "❌"
|
|
168
|
+
const time = r.result ? `${r.result.planningTimeMs}ms` : "ERRORED"
|
|
169
|
+
const selected = r.result?.selectedAction?.action.actionType ?? "none"
|
|
170
|
+
console.log(` ${check} ${r.scenario.padEnd(35)} → ${selected.padEnd(12)} (${time})`)
|
|
171
|
+
}
|
|
172
|
+
console.log(`\n Passed: ${passed}/${SCENARIOS.length} Failed: ${failed}/${SCENARIOS.length}`)
|
|
173
|
+
console.log(`${"═".repeat(70)}`)
|
|
174
|
+
|
|
175
|
+
process.exit(failed > 0 ? 1 : 0)
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
main().catch(err => {
|
|
179
|
+
console.error("Fatal error:", err)
|
|
180
|
+
process.exit(1)
|
|
181
|
+
})
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Test the inference server path in PolicyHead
|
|
4
|
+
*/
|
|
5
|
+
import { PolicyHeadInference } from "../src/lib/policy-head.js"
|
|
6
|
+
|
|
7
|
+
async function main() {
|
|
8
|
+
const ph = new PolicyHeadInference(".")
|
|
9
|
+
console.log("PH loaded:", ph.isLoaded, "version:", ph.version)
|
|
10
|
+
|
|
11
|
+
if (!ph.isLoaded || ph.version !== 2) {
|
|
12
|
+
console.log("v2 not loaded, aborting")
|
|
13
|
+
process.exit(1)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
const state = {
|
|
17
|
+
composite_score: 0.72,
|
|
18
|
+
dimension_scores: { test_pass_rate: 0.85, build_health: 0.9, code_quality: 0.8 },
|
|
19
|
+
tests_passing: 17,
|
|
20
|
+
tests_total: 20,
|
|
21
|
+
trajectory_length: 3,
|
|
22
|
+
recent_deltas: [-0.03, 0.01],
|
|
23
|
+
agent: "error-fixer",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const goals = [
|
|
27
|
+
"Fix failing tests in planning-loop.test.ts — TypeError on undefined property",
|
|
28
|
+
"Add multi-step rollout support to the planning loop",
|
|
29
|
+
"Add unit tests for the counterfactual training bridge",
|
|
30
|
+
"Update the nightly pipeline schedule to run at 2am MST",
|
|
31
|
+
"Reduce PolicyHead inference latency from 20s to under 5s",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
for (const goal of goals) {
|
|
35
|
+
const t0 = Date.now()
|
|
36
|
+
const result = await ph.selectAction(state, goal)
|
|
37
|
+
const elapsed = Date.now() - t0
|
|
38
|
+
console.log(`[${elapsed}ms] ${result.action} (${(result.confidence * 100).toFixed(1)}%) ← ${goal.slice(0, 60)}`)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
console.log("\nStopping server...")
|
|
42
|
+
ph.stopServer()
|
|
43
|
+
console.log("Done")
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
main().catch(err => {
|
|
47
|
+
console.error(err)
|
|
48
|
+
process.exit(1)
|
|
49
|
+
})
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Test how state values affect PolicyHead predictions
|
|
4
|
+
*/
|
|
5
|
+
import { PolicyHeadInference } from "../src/lib/policy-head.js"
|
|
6
|
+
|
|
7
|
+
async function main() {
|
|
8
|
+
const ph = new PolicyHeadInference(".")
|
|
9
|
+
if (!ph.isLoaded || ph.version !== 2) { process.exit(1) }
|
|
10
|
+
|
|
11
|
+
const goal = "Add multi-step rollout support to the planning loop"
|
|
12
|
+
|
|
13
|
+
const states = [
|
|
14
|
+
{ label: "healthy (all high)", composite_score: 0.95, dimension_scores: { test_pass_rate: 1.0, build_health: 1.0, code_quality: 0.95 }, tests_passing: 25, tests_total: 25, trajectory_length: 5, recent_deltas: [0.02, 0.01], agent: "feature-builder" },
|
|
15
|
+
{ label: "degraded tests", composite_score: 0.72, dimension_scores: { test_pass_rate: 0.85, build_health: 0.9, code_quality: 0.8 }, tests_passing: 17, tests_total: 20, trajectory_length: 3, recent_deltas: [-0.03], agent: "error-fixer" },
|
|
16
|
+
{ label: "low coverage", composite_score: 0.55, dimension_scores: { test_pass_rate: 1.0, build_health: 0.9, code_quality: 0.5 }, tests_passing: 15, tests_total: 15, trajectory_length: 2, recent_deltas: [0.01], agent: "test-coverage" },
|
|
17
|
+
{ label: "fresh start", composite_score: 0.3, dimension_scores: { test_pass_rate: 0.5, build_health: 0.5, code_quality: 0.5 }, tests_passing: 5, tests_total: 10, trajectory_length: 0, recent_deltas: [], agent: "onboarding" },
|
|
18
|
+
{ label: "near perfect", composite_score: 0.98, dimension_scores: { test_pass_rate: 1.0, build_health: 1.0, code_quality: 1.0, hub_health: 1.0 }, tests_passing: 30, tests_total: 30, trajectory_length: 10, recent_deltas: [0.005, 0.003], agent: "optimizer" },
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
for (const s of states) {
|
|
22
|
+
const { label, ...state } = s
|
|
23
|
+
const t0 = Date.now()
|
|
24
|
+
const result = await ph.selectAction(state as any, goal)
|
|
25
|
+
const elapsed = Date.now() - t0
|
|
26
|
+
const alts = result.alternatives?.map(a => `${a.action}(${(a.confidence*100).toFixed(0)}%)`).join(", ") || ""
|
|
27
|
+
console.log(`[${elapsed}ms] ${label}: ${result.action} (${(result.confidence*100).toFixed(1)}%) | alts: ${alts}`)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
ph.stopServer()
|
|
31
|
+
}
|
|
32
|
+
main().catch(err => { console.error(err); process.exit(1) })
|