jfl 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/doctor.d.ts +1 -0
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +30 -1
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/ide.d.ts +2 -1
- package/dist/commands/ide.d.ts.map +1 -1
- package/dist/commands/ide.js +60 -1
- package/dist/commands/ide.js.map +1 -1
- package/dist/commands/init-from-service.d.ts +15 -0
- package/dist/commands/init-from-service.d.ts.map +1 -0
- package/dist/commands/init-from-service.js +541 -0
- package/dist/commands/init-from-service.js.map +1 -0
- package/dist/commands/init.d.ts +1 -0
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +32 -1
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/kanban.d.ts.map +1 -1
- package/dist/commands/kanban.js +13 -4
- package/dist/commands/kanban.js.map +1 -1
- package/dist/commands/linear.d.ts +41 -0
- package/dist/commands/linear.d.ts.map +1 -0
- package/dist/commands/linear.js +715 -0
- package/dist/commands/linear.js.map +1 -0
- package/dist/commands/peter.d.ts.map +1 -1
- package/dist/commands/peter.js +232 -25
- package/dist/commands/peter.js.map +1 -1
- package/dist/commands/services.d.ts.map +1 -1
- package/dist/commands/services.js +146 -0
- package/dist/commands/services.js.map +1 -1
- package/dist/commands/setup.d.ts.map +1 -1
- package/dist/commands/setup.js +173 -13
- package/dist/commands/setup.js.map +1 -1
- package/dist/commands/telemetry-monitor.d.ts +11 -0
- package/dist/commands/telemetry-monitor.d.ts.map +1 -0
- package/dist/commands/telemetry-monitor.js +224 -0
- package/dist/commands/telemetry-monitor.js.map +1 -0
- package/dist/commands/telemetry-test.d.ts +11 -0
- package/dist/commands/telemetry-test.d.ts.map +1 -0
- package/dist/commands/telemetry-test.js +67 -0
- package/dist/commands/telemetry-test.js.map +1 -0
- package/dist/commands/tenet-agents.d.ts +13 -0
- package/dist/commands/tenet-agents.d.ts.map +1 -0
- package/dist/commands/tenet-agents.js +191 -0
- package/dist/commands/tenet-agents.js.map +1 -0
- package/dist/commands/tenet-setup.d.ts +19 -0
- package/dist/commands/tenet-setup.d.ts.map +1 -0
- package/dist/commands/tenet-setup.js +131 -0
- package/dist/commands/tenet-setup.js.map +1 -0
- package/dist/commands/train.d.ts +18 -0
- package/dist/commands/train.d.ts.map +1 -1
- package/dist/commands/train.js +182 -0
- package/dist/commands/train.js.map +1 -1
- package/dist/commands/whoami.d.ts +2 -0
- package/dist/commands/whoami.d.ts.map +1 -0
- package/dist/commands/whoami.js +24 -0
- package/dist/commands/whoami.js.map +1 -0
- package/dist/index.js +159 -10
- package/dist/index.js.map +1 -1
- package/dist/lib/advanced-setup.d.ts +78 -0
- package/dist/lib/advanced-setup.d.ts.map +1 -0
- package/dist/lib/advanced-setup.js +433 -0
- package/dist/lib/advanced-setup.js.map +1 -0
- package/dist/lib/agent-config.d.ts +33 -0
- package/dist/lib/agent-config.d.ts.map +1 -1
- package/dist/lib/agent-config.js +26 -0
- package/dist/lib/agent-config.js.map +1 -1
- package/dist/lib/counterfactual-training-bridge.d.ts +114 -0
- package/dist/lib/counterfactual-training-bridge.d.ts.map +1 -0
- package/dist/lib/counterfactual-training-bridge.js +322 -0
- package/dist/lib/counterfactual-training-bridge.js.map +1 -0
- package/dist/lib/discovery-agent.d.ts +48 -0
- package/dist/lib/discovery-agent.d.ts.map +1 -0
- package/dist/lib/discovery-agent.js +111 -0
- package/dist/lib/discovery-agent.js.map +1 -0
- package/dist/lib/flow-engine.d.ts.map +1 -1
- package/dist/lib/flow-engine.js +46 -8
- package/dist/lib/flow-engine.js.map +1 -1
- package/dist/lib/gtm-generator.d.ts +29 -0
- package/dist/lib/gtm-generator.d.ts.map +1 -0
- package/dist/lib/gtm-generator.js +252 -0
- package/dist/lib/gtm-generator.js.map +1 -0
- package/dist/lib/hub-health.d.ts +40 -0
- package/dist/lib/hub-health.d.ts.map +1 -0
- package/dist/lib/hub-health.js +89 -0
- package/dist/lib/hub-health.js.map +1 -0
- package/dist/lib/invariant-monitor.d.ts +6 -2
- package/dist/lib/invariant-monitor.d.ts.map +1 -1
- package/dist/lib/invariant-monitor.js +89 -2
- package/dist/lib/invariant-monitor.js.map +1 -1
- package/dist/lib/journal-analyzer.d.ts +71 -0
- package/dist/lib/journal-analyzer.d.ts.map +1 -0
- package/dist/lib/journal-analyzer.js +306 -0
- package/dist/lib/journal-analyzer.js.map +1 -0
- package/dist/lib/linear-client.d.ts +73 -0
- package/dist/lib/linear-client.d.ts.map +1 -0
- package/dist/lib/linear-client.js +112 -0
- package/dist/lib/linear-client.js.map +1 -0
- package/dist/lib/linear-id-map.d.ts +20 -0
- package/dist/lib/linear-id-map.d.ts.map +1 -0
- package/dist/lib/linear-id-map.js +57 -0
- package/dist/lib/linear-id-map.js.map +1 -0
- package/dist/lib/linear-kanban.d.ts +66 -0
- package/dist/lib/linear-kanban.d.ts.map +1 -0
- package/dist/lib/linear-kanban.js +175 -0
- package/dist/lib/linear-kanban.js.map +1 -0
- package/dist/lib/onboarding.d.ts +40 -0
- package/dist/lib/onboarding.d.ts.map +1 -0
- package/dist/lib/onboarding.js +213 -0
- package/dist/lib/onboarding.js.map +1 -0
- package/dist/lib/physical-world-model.d.ts +50 -0
- package/dist/lib/physical-world-model.d.ts.map +1 -0
- package/dist/lib/physical-world-model.js +251 -0
- package/dist/lib/physical-world-model.js.map +1 -0
- package/dist/lib/planning-loop.d.ts +157 -0
- package/dist/lib/planning-loop.d.ts.map +1 -0
- package/dist/lib/planning-loop.js +537 -0
- package/dist/lib/planning-loop.js.map +1 -0
- package/dist/lib/policy-head.d.ts +13 -0
- package/dist/lib/policy-head.d.ts.map +1 -1
- package/dist/lib/policy-head.js +168 -2
- package/dist/lib/policy-head.js.map +1 -1
- package/dist/lib/resource-optimizer-middleware.d.ts +39 -0
- package/dist/lib/resource-optimizer-middleware.d.ts.map +1 -0
- package/dist/lib/resource-optimizer-middleware.js +222 -0
- package/dist/lib/resource-optimizer-middleware.js.map +1 -0
- package/dist/lib/resource-optimizer.d.ts +71 -0
- package/dist/lib/resource-optimizer.d.ts.map +1 -0
- package/dist/lib/resource-optimizer.js +228 -0
- package/dist/lib/resource-optimizer.js.map +1 -0
- package/dist/lib/rl-manager.d.ts +74 -0
- package/dist/lib/rl-manager.d.ts.map +1 -0
- package/dist/lib/rl-manager.js +244 -0
- package/dist/lib/rl-manager.js.map +1 -0
- package/dist/lib/service-analyzer.d.ts +76 -0
- package/dist/lib/service-analyzer.d.ts.map +1 -0
- package/dist/lib/service-analyzer.js +704 -0
- package/dist/lib/service-analyzer.js.map +1 -0
- package/dist/lib/service-gtm.js +2 -2
- package/dist/lib/service-gtm.js.map +1 -1
- package/dist/lib/service-questionnaire.d.ts +11 -0
- package/dist/lib/service-questionnaire.d.ts.map +1 -0
- package/dist/lib/service-questionnaire.js +89 -0
- package/dist/lib/service-questionnaire.js.map +1 -0
- package/dist/lib/setup/agent-generator.d.ts +2 -0
- package/dist/lib/setup/agent-generator.d.ts.map +1 -1
- package/dist/lib/setup/agent-generator.js +128 -4
- package/dist/lib/setup/agent-generator.js.map +1 -1
- package/dist/lib/setup/flow-generator.d.ts +10 -0
- package/dist/lib/setup/flow-generator.d.ts.map +1 -0
- package/dist/lib/setup/flow-generator.js +113 -0
- package/dist/lib/setup/flow-generator.js.map +1 -0
- package/dist/lib/setup/invariant-bridge.d.ts +91 -0
- package/dist/lib/setup/invariant-bridge.d.ts.map +1 -0
- package/dist/lib/setup/invariant-bridge.js +384 -0
- package/dist/lib/setup/invariant-bridge.js.map +1 -0
- package/dist/lib/setup/spec-generator.d.ts +41 -5
- package/dist/lib/setup/spec-generator.d.ts.map +1 -1
- package/dist/lib/setup/spec-generator.js +503 -29
- package/dist/lib/setup/spec-generator.js.map +1 -1
- package/dist/lib/stratus-client.js +1 -1
- package/dist/lib/stratus-client.js.map +1 -1
- package/dist/lib/surface-agent.d.ts +78 -0
- package/dist/lib/surface-agent.d.ts.map +1 -0
- package/dist/lib/surface-agent.js +105 -0
- package/dist/lib/surface-agent.js.map +1 -0
- package/dist/lib/surface-coordination-example.d.ts +30 -0
- package/dist/lib/surface-coordination-example.d.ts.map +1 -0
- package/dist/lib/surface-coordination-example.js +164 -0
- package/dist/lib/surface-coordination-example.js.map +1 -0
- package/dist/lib/telemetry/physical-world-collector.d.ts +15 -0
- package/dist/lib/telemetry/physical-world-collector.d.ts.map +1 -0
- package/dist/lib/telemetry/physical-world-collector.js +177 -0
- package/dist/lib/telemetry/physical-world-collector.js.map +1 -0
- package/dist/lib/telemetry/training-bridge.d.ts +51 -0
- package/dist/lib/telemetry/training-bridge.d.ts.map +1 -0
- package/dist/lib/telemetry/training-bridge.js +185 -0
- package/dist/lib/telemetry/training-bridge.js.map +1 -0
- package/dist/lib/telemetry.d.ts +2 -1
- package/dist/lib/telemetry.d.ts.map +1 -1
- package/dist/lib/telemetry.js +23 -2
- package/dist/lib/telemetry.js.map +1 -1
- package/dist/lib/tenet-board-agent.d.ts +52 -0
- package/dist/lib/tenet-board-agent.d.ts.map +1 -0
- package/dist/lib/tenet-board-agent.js +226 -0
- package/dist/lib/tenet-board-agent.js.map +1 -0
- package/dist/lib/tenet-ide-agent.d.ts +40 -0
- package/dist/lib/tenet-ide-agent.d.ts.map +1 -0
- package/dist/lib/tenet-ide-agent.js +199 -0
- package/dist/lib/tenet-ide-agent.js.map +1 -0
- package/dist/lib/workspace/data-pipeline.d.ts.map +1 -1
- package/dist/lib/workspace/data-pipeline.js +27 -5
- package/dist/lib/workspace/data-pipeline.js.map +1 -1
- package/dist/lib/workspace/sidebar-runner.d.ts +13 -0
- package/dist/lib/workspace/sidebar-runner.d.ts.map +1 -0
- package/dist/lib/workspace/sidebar-runner.js +419 -0
- package/dist/lib/workspace/sidebar-runner.js.map +1 -0
- package/dist/lib/workspace/surface-registry.d.ts.map +1 -1
- package/dist/lib/workspace/surface-registry.js +4 -1
- package/dist/lib/workspace/surface-registry.js.map +1 -1
- package/dist/lib/workspace/surfaces/agent-overview.d.ts +3 -3
- package/dist/lib/workspace/surfaces/agent-overview.d.ts.map +1 -1
- package/dist/lib/workspace/surfaces/agent-overview.js +3 -3
- package/dist/lib/workspace/surfaces/agent-overview.js.map +1 -1
- package/dist/lib/workspace/surfaces/index.d.ts +3 -0
- package/dist/lib/workspace/surfaces/index.d.ts.map +1 -1
- package/dist/lib/workspace/surfaces/index.js +3 -0
- package/dist/lib/workspace/surfaces/index.js.map +1 -1
- package/dist/lib/workspace/surfaces/kanban.d.ts +15 -0
- package/dist/lib/workspace/surfaces/kanban.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/kanban.js +43 -0
- package/dist/lib/workspace/surfaces/kanban.js.map +1 -0
- package/dist/lib/workspace/surfaces/physical-world.d.ts +15 -0
- package/dist/lib/workspace/surfaces/physical-world.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/physical-world.js +37 -0
- package/dist/lib/workspace/surfaces/physical-world.js.map +1 -0
- package/dist/lib/workspace/surfaces/sidebar.d.ts +22 -0
- package/dist/lib/workspace/surfaces/sidebar.d.ts.map +1 -0
- package/dist/lib/workspace/surfaces/sidebar.js +90 -0
- package/dist/lib/workspace/surfaces/sidebar.js.map +1 -0
- package/dist/types/flows.d.ts +2 -1
- package/dist/types/flows.d.ts.map +1 -1
- package/dist/types/physical-world-model.d.ts +65 -0
- package/dist/types/physical-world-model.d.ts.map +1 -0
- package/dist/types/physical-world-model.js +43 -0
- package/dist/types/physical-world-model.js.map +1 -0
- package/dist/types/telemetry.d.ts +37 -0
- package/dist/types/telemetry.d.ts.map +1 -1
- package/dist/types/world-model.d.ts.map +1 -1
- package/dist/types/world-model.js +14 -7
- package/dist/types/world-model.js.map +1 -1
- package/dist/utils/context-hub-port.d.ts.map +1 -1
- package/dist/utils/context-hub-port.js +6 -1
- package/dist/utils/context-hub-port.js.map +1 -1
- package/package.json +3 -2
- package/packages/pi/extensions/index.ts +34 -6
- package/scripts/telemetry-dashboard.sh +44 -0
- package/scripts/test-planning-loop-e2e.ts +181 -0
- package/scripts/test-server-inference.ts +49 -0
- package/scripts/test-state-sensitivity.ts +32 -0
- package/scripts/train/v2/benchmark.py +661 -0
- package/scripts/train/v2/generate_balanced.py +439 -0
- package/scripts/train/v2/generate_hard_negatives.py +219 -0
- package/scripts/train/v2/infer.py +149 -36
- package/scripts/train/v2/infer_server.py +224 -0
- package/scripts/train/v2/online_train.py +576 -0
- package/scripts/train/v2/precompute.py +24 -6
- package/template/CLAUDE.md +74 -132
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Generate hard negative training examples to address benchmark gaps.
|
|
3
|
+
|
|
4
|
+
Targets specific confusion patterns:
|
|
5
|
+
1. add_feature: model confuses with optimize_performance, add_tests, update_docs
|
|
6
|
+
2. fix_bug on infrastructure: model confuses with dependency_update, data_pipeline
|
|
7
|
+
3. security_hardening overfit: model incorrectly predicts security for refactor/deps/docs
|
|
8
|
+
4. refactor_code vs data_pipeline confusion
|
|
9
|
+
|
|
10
|
+
Strategy: create examples that LOOK like the wrong class but ARE the right class.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import random
|
|
16
|
+
import argparse
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
HARD_NEGATIVES = {
|
|
20
|
+
# ── add_feature: things that SOUND like optimization/testing/docs but are features ──
|
|
21
|
+
"add_feature": [
|
|
22
|
+
# These sound like optimization but are actually new features
|
|
23
|
+
("Add caching layer for embedding lookups", "feature-builder", 0.80),
|
|
24
|
+
("Implement connection pooling for Stratus API client", "feature-builder", 0.78),
|
|
25
|
+
("Build batch inference mode for PolicyHead", "feature-builder", 0.82),
|
|
26
|
+
("Create worker thread pool for parallel eval execution", "feature-builder", 0.75),
|
|
27
|
+
("Add streaming response support to hub WebSocket API", "feature-builder", 0.80),
|
|
28
|
+
("Implement lazy loading for heavy module imports", "feature-builder", 0.77),
|
|
29
|
+
# These sound like tests but are actually features
|
|
30
|
+
("Build benchmark scenario runner with graded reports", "feature-builder", 0.85),
|
|
31
|
+
("Create synthetic data generator for training pipeline", "feature-builder", 0.82),
|
|
32
|
+
("Implement validation harness for counterfactual predictions", "feature-builder", 0.79),
|
|
33
|
+
("Add smoke test CLI command for quick system health checks", "feature-builder", 0.81),
|
|
34
|
+
# These sound like docs but are actually features
|
|
35
|
+
("Build interactive API documentation server", "feature-builder", 0.76),
|
|
36
|
+
("Create changelog generator from git history", "feature-builder", 0.78),
|
|
37
|
+
("Implement decision record template system", "feature-builder", 0.80),
|
|
38
|
+
# These are clearly features
|
|
39
|
+
("Add planning loop that connects PolicyHead to DynamicsModel for rollout simulation", "feature-builder", 0.88),
|
|
40
|
+
("Implement experience replay buffer for online learning with reservoir sampling", "feature-builder", 0.85),
|
|
41
|
+
("Build counterfactual training bridge that transforms CF scenarios into v2 tuples", "feature-builder", 0.87),
|
|
42
|
+
("Create multi-step rollout planner with beam search and pruning", "feature-builder", 0.83),
|
|
43
|
+
("Add P2P agent communication via Subway mesh network", "feature-builder", 0.86),
|
|
44
|
+
("Implement cost monitoring dashboard for Stratus API usage tracking", "feature-builder", 0.82),
|
|
45
|
+
("Build agent fleet management with VM spawning and tuple collection", "feature-builder", 0.84),
|
|
46
|
+
("Create evaluation pyramid with L1-L4 metrics tracking", "feature-builder", 0.80),
|
|
47
|
+
("Add CRM integration via Google Sheets CLI", "feature-builder", 0.79),
|
|
48
|
+
("Implement Linear issue sync with bidirectional updates", "feature-builder", 0.85),
|
|
49
|
+
("Build self-driving loop with cron-triggered autoresearch", "feature-builder", 0.88),
|
|
50
|
+
("Create agent orchestrator with model routing per role", "feature-builder", 0.87),
|
|
51
|
+
("Add journal system with per-session JSONL files and memory indexing", "feature-builder", 0.83),
|
|
52
|
+
("Implement TLA+ invariant monitor for runtime safety checks", "feature-builder", 0.81),
|
|
53
|
+
("Build state capture module that snapshots system state for dynamics learning", "feature-builder", 0.82),
|
|
54
|
+
("Create world model store with JSONL storage for transitions and predictions", "feature-builder", 0.80),
|
|
55
|
+
],
|
|
56
|
+
|
|
57
|
+
# ── fix_bug on infrastructure crashes: NOT dependency_update or data_pipeline ──
|
|
58
|
+
"fix_bug": [
|
|
59
|
+
# Hub crashes — clearly bugs, not dependency issues
|
|
60
|
+
("Hub process crashes with SIGSEGV when receiving malformed WebSocket frame", "hub-sentinel", 0.35),
|
|
61
|
+
("Hub OOM killed after 6 hours — memory leak in event accumulator", "hub-sentinel", 0.30),
|
|
62
|
+
("100% agent stranding — hub connection pool exhausted, no new connections accepted", "hub-sentinel", 0.25),
|
|
63
|
+
("Hub crashes on startup when config.json has trailing comma", "hub-sentinel", 0.40),
|
|
64
|
+
("Agent session lost when hub restarts — state not persisted to disk", "hub-sentinel", 0.35),
|
|
65
|
+
("Hub WebSocket server fails to bind port — EADDRINUSE not handled", "hub-sentinel", 0.38),
|
|
66
|
+
# Infrastructure bugs — not data pipeline issues
|
|
67
|
+
("Worktree allocation race condition — two agents get same worktree", "system-health", 0.42),
|
|
68
|
+
("File lock not released after agent crash — blocks subsequent agents", "system-health", 0.40),
|
|
69
|
+
("Git merge fails silently, produces empty commit with no changes", "system-health", 0.45),
|
|
70
|
+
("Session branch not cleaned up after merge — stale branches accumulate", "system-health", 0.48),
|
|
71
|
+
("Eval runner hangs when test script has infinite loop — no timeout enforcement", "error-fixer", 0.35),
|
|
72
|
+
("CLI crashes with stack overflow when .jfl directory contains circular symlinks", "error-fixer", 0.42),
|
|
73
|
+
# Runtime errors — not config or dependency issues
|
|
74
|
+
("TypeError: Cannot read properties of undefined reading 'composite'", "error-fixer", 0.50),
|
|
75
|
+
("Unhandled promise rejection crashes Node.js process — no global handler", "error-fixer", 0.45),
|
|
76
|
+
("ENOENT error when accessing worktree that was garbage collected", "error-fixer", 0.48),
|
|
77
|
+
("JSON.parse fails on training buffer — corrupted entry at byte offset 34521", "error-fixer", 0.38),
|
|
78
|
+
("PolicyHead inference subprocess exits with code 139 — segfault in PyTorch", "error-fixer", 0.32),
|
|
79
|
+
("Race condition in concurrent file writes to training-buffer.jsonl", "error-fixer", 0.40),
|
|
80
|
+
("Build succeeds but tests fail because dist/ has stale compiled files", "test-coverage", 0.55),
|
|
81
|
+
("Flaky test passes 9/10 runs — timing-dependent assertion", "test-coverage", 0.60),
|
|
82
|
+
("CORS error blocks dashboard API calls — missing header in response", "error-fixer", 0.50),
|
|
83
|
+
("Memory allocation failure when loading 50MB checkpoint on low-RAM VM", "error-fixer", 0.35),
|
|
84
|
+
("Agent gets stuck in infinite retry loop when hub is unreachable", "hub-sentinel", 0.30),
|
|
85
|
+
("Graceful shutdown handler doesn't wait for in-flight requests", "hub-sentinel", 0.38),
|
|
86
|
+
("Hot reload breaks when file change event fires before write completes", "error-fixer", 0.45),
|
|
87
|
+
("Exit code 0 returned on failure — downstream scripts think it succeeded", "error-fixer", 0.50),
|
|
88
|
+
("UTC/local timezone confusion causes journal entries with future timestamps", "error-fixer", 0.55),
|
|
89
|
+
("Package.json scripts reference removed file — npm run breaks", "error-fixer", 0.52),
|
|
90
|
+
("Checkpoint loading fails silently and uses random weights instead", "error-fixer", 0.35),
|
|
91
|
+
("Request body parser rejects valid JSON with nested arrays > 3 levels", "error-fixer", 0.48),
|
|
92
|
+
],
|
|
93
|
+
|
|
94
|
+
# ── NOT security_hardening — these LOOK like security but are other tools ──
|
|
95
|
+
"refactor_code": [
|
|
96
|
+
# These mention "clean up" or "restructure" which model confuses with security
|
|
97
|
+
("Clean up error handling — 5 different try/catch patterns across codebase", "code-quality", 0.75),
|
|
98
|
+
("Restructure authentication flow — too many layers of indirection", "code-quality", 0.72),
|
|
99
|
+
("Simplify access control logic — nested conditionals are unreadable", "code-quality", 0.70),
|
|
100
|
+
("Extract validation helpers — same input checking code in 8 places", "code-quality", 0.78),
|
|
101
|
+
("Consolidate logging — mixing console.log, winston, and pino", "code-quality", 0.73),
|
|
102
|
+
("Reduce coupling between auth module and user service", "code-quality", 0.71),
|
|
103
|
+
("Simplify config parsing — 200 lines of manual env var handling", "code-quality", 0.76),
|
|
104
|
+
("Break up god class that handles routing, auth, and business logic", "code-quality", 0.74),
|
|
105
|
+
("Replace manual string concatenation for SQL with query builder", "code-quality", 0.72),
|
|
106
|
+
("Extract shared HTTP client setup from 6 different service files", "code-quality", 0.75),
|
|
107
|
+
],
|
|
108
|
+
|
|
109
|
+
"dependency_update": [
|
|
110
|
+
# These mention "security" or "CVE" but the action is updating deps, not hardening
|
|
111
|
+
("npm audit shows 5 moderate vulnerabilities — update affected packages", "dependency-updater", 0.78),
|
|
112
|
+
("CVE-2026-9999 in lodash — bump to latest patched version", "dependency-updater", 0.82),
|
|
113
|
+
("Security advisory for express — update from 4.18 to 4.21", "dependency-updater", 0.80),
|
|
114
|
+
("Dependabot PR waiting for review — axios security update", "dependency-updater", 0.75),
|
|
115
|
+
("GitHub security alert on transitive dependency — update parent package", "dependency-updater", 0.77),
|
|
116
|
+
],
|
|
117
|
+
|
|
118
|
+
"update_docs": [
|
|
119
|
+
# These mention "security" concepts but the action is writing docs
|
|
120
|
+
("Document API authentication flow for new team members", "docs-updater", 0.80),
|
|
121
|
+
("Write security best practices guide for the codebase", "docs-updater", 0.78),
|
|
122
|
+
("Document secrets management process — which env vars, where stored", "docs-updater", 0.76),
|
|
123
|
+
("Add access control section to onboarding documentation", "docs-updater", 0.82),
|
|
124
|
+
("Write incident response playbook for hub crashes", "docs-updater", 0.79),
|
|
125
|
+
],
|
|
126
|
+
|
|
127
|
+
# ── data_pipeline: distinguish from dependency_update and optimize_performance ──
|
|
128
|
+
"data_pipeline": [
|
|
129
|
+
("Training buffer entries not flowing to v2 transform — pipeline stalled", "data-engineer", 0.50),
|
|
130
|
+
("Embedding cache miss rate at 40% — precompute step skipping new texts", "data-engineer", 0.55),
|
|
131
|
+
("Counterfactual scenarios not appearing in training buffer — bridge broken", "data-engineer", 0.48),
|
|
132
|
+
("JSONL corruption in transitions file — entries missing closing brace", "data-engineer", 0.45),
|
|
133
|
+
("Eval scored events not triggering training tuple creation", "data-engineer", 0.52),
|
|
134
|
+
("Duplicate entries in training buffer after fleet tuple collection", "data-engineer", 0.50),
|
|
135
|
+
("Data split not stratified — test set has 0 examples of fix_bug class", "data-engineer", 0.55),
|
|
136
|
+
("Nightly pipeline step 2 fails — transform can't read new action types", "data-engineer", 0.48),
|
|
137
|
+
("Mining journals produces tuples with empty reward fields", "data-engineer", 0.52),
|
|
138
|
+
("Training data lineage lost — can't trace which journal entry produced which tuple", "data-engineer", 0.50),
|
|
139
|
+
],
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def generate_state(agent: str, composite: float, rng: random.Random) -> str:
|
|
144
|
+
tests_total = rng.choice([15, 20, 25, 30, 35, 40, 45])
|
|
145
|
+
tests_pass_rate = rng.uniform(0.5, 1.0) if composite > 0.5 else rng.uniform(0.3, 0.8)
|
|
146
|
+
tests_passing = int(tests_total * tests_pass_rate)
|
|
147
|
+
trajectory = rng.randint(1, 10)
|
|
148
|
+
|
|
149
|
+
dims = {}
|
|
150
|
+
dim_options = ["test_pass_rate", "build_health", "code_quality", "hub_health",
|
|
151
|
+
"error_rate", "security_score", "observability", "pipeline_health",
|
|
152
|
+
"doc_coverage", "maintainability", "test_coverage", "data_quality"]
|
|
153
|
+
for d in rng.sample(dim_options, rng.randint(2, 4)):
|
|
154
|
+
dims[d] = rng.uniform(0.2, 0.95)
|
|
155
|
+
|
|
156
|
+
dims_str = ", ".join(f"{k}={v:.4f}" for k, v in dims.items())
|
|
157
|
+
n_deltas = rng.randint(1, 4)
|
|
158
|
+
deltas = [rng.uniform(-0.08, 0.06) for _ in range(n_deltas)]
|
|
159
|
+
deltas_str = ", ".join(f"{'+' if d >= 0 else ''}{d:.4f}" for d in deltas)
|
|
160
|
+
|
|
161
|
+
return "\n".join([
|
|
162
|
+
f"Agent: {agent}",
|
|
163
|
+
f"Composite: {composite:.4f}",
|
|
164
|
+
f"Tests: {tests_passing}/{tests_total}",
|
|
165
|
+
f"Trajectory: {trajectory}",
|
|
166
|
+
f"Dimensions: {dims_str}",
|
|
167
|
+
f"Recent deltas: {deltas_str}",
|
|
168
|
+
])
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def main():
|
|
172
|
+
parser = argparse.ArgumentParser(description="Generate hard negative training examples")
|
|
173
|
+
parser.add_argument("--output", default=".jfl/v2-data", help="Output directory")
|
|
174
|
+
parser.add_argument("--seed", type=int, default=123, help="Random seed")
|
|
175
|
+
args = parser.parse_args()
|
|
176
|
+
|
|
177
|
+
rng = random.Random(args.seed)
|
|
178
|
+
examples = []
|
|
179
|
+
|
|
180
|
+
for tool_name, entries in HARD_NEGATIVES.items():
|
|
181
|
+
for goal, agent, composite in entries:
|
|
182
|
+
state = generate_state(agent, composite, rng)
|
|
183
|
+
examples.append({
|
|
184
|
+
"current_state": state,
|
|
185
|
+
"goal": goal,
|
|
186
|
+
"correct_tool": tool_name,
|
|
187
|
+
"source": "hard_negative",
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
rng.shuffle(examples)
|
|
191
|
+
|
|
192
|
+
n = len(examples)
|
|
193
|
+
train_end = int(n * 0.7)
|
|
194
|
+
val_end = int(n * 0.85)
|
|
195
|
+
|
|
196
|
+
splits = {
|
|
197
|
+
"train": examples[:train_end],
|
|
198
|
+
"val": examples[train_end:val_end],
|
|
199
|
+
"test": examples[val_end:],
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
os.makedirs(args.output, exist_ok=True)
|
|
203
|
+
|
|
204
|
+
for split_name, split_data in splits.items():
|
|
205
|
+
path = os.path.join(args.output, f"{split_name}.jsonl")
|
|
206
|
+
with open(path, "a") as f:
|
|
207
|
+
for ex in split_data:
|
|
208
|
+
f.write(json.dumps(ex) + "\n")
|
|
209
|
+
print(f" {split_name}: +{len(split_data)} hard negatives → {path}")
|
|
210
|
+
|
|
211
|
+
from collections import Counter
|
|
212
|
+
tool_counts = Counter(ex["correct_tool"] for ex in examples)
|
|
213
|
+
print(f"\nGenerated {n} hard negative examples:")
|
|
214
|
+
for tool, count in tool_counts.most_common():
|
|
215
|
+
print(f" {tool:25s} {count:4d}")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
if __name__ == "__main__":
|
|
219
|
+
main()
|
|
@@ -1,17 +1,23 @@
|
|
|
1
1
|
"""
|
|
2
2
|
v2 Policy Head Inference — CLI script for action selection.
|
|
3
3
|
|
|
4
|
+
Uses precomputed embedding cache for fast inference (<500ms).
|
|
5
|
+
Falls back to Stratus API for cache misses only.
|
|
6
|
+
|
|
4
7
|
Usage:
|
|
5
|
-
python infer.py --checkpoint
|
|
8
|
+
python infer.py --checkpoint path/to/best_policy_head.pt --state "..." --goal "..." --top-k 3 --json
|
|
9
|
+
python infer.py --checkpoint ... --batch (read JSONL from stdin)
|
|
6
10
|
|
|
7
|
-
|
|
8
|
-
|
|
11
|
+
Cache:
|
|
12
|
+
Looks for embeddings_cache.npz + text_to_idx.json next to checkpoint or in --cache-dir.
|
|
13
|
+
Cache hit: <50ms total. Cache miss: ~5s per miss (Stratus API call).
|
|
9
14
|
"""
|
|
10
15
|
|
|
11
16
|
import json
|
|
12
17
|
import os
|
|
13
18
|
import sys
|
|
14
19
|
import argparse
|
|
20
|
+
import time
|
|
15
21
|
|
|
16
22
|
import torch
|
|
17
23
|
import numpy as np
|
|
@@ -19,6 +25,80 @@ import numpy as np
|
|
|
19
25
|
from model import PolicyHead
|
|
20
26
|
|
|
21
27
|
|
|
28
|
+
# ============================================================================
|
|
29
|
+
# Embedding Cache
|
|
30
|
+
# ============================================================================
|
|
31
|
+
|
|
32
|
+
class EmbeddingCache:
|
|
33
|
+
"""Fast embedding lookup from precomputed cache, with Stratus fallback."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, cache_dir: str = None, api_url: str = None, api_key: str = None):
|
|
36
|
+
self.embeddings = None
|
|
37
|
+
self.text_to_idx = {}
|
|
38
|
+
self.api_url = api_url or os.environ.get("STRATUS_API_URL", "https://api.stratus.run")
|
|
39
|
+
self.api_key = api_key or os.environ.get("STRATUS_API_KEY", "")
|
|
40
|
+
self.hits = 0
|
|
41
|
+
self.misses = 0
|
|
42
|
+
|
|
43
|
+
if cache_dir:
|
|
44
|
+
self._load_cache(cache_dir)
|
|
45
|
+
|
|
46
|
+
def _load_cache(self, cache_dir: str):
|
|
47
|
+
npz_path = os.path.join(cache_dir, "embeddings_cache.npz")
|
|
48
|
+
idx_path = os.path.join(cache_dir, "text_to_idx.json")
|
|
49
|
+
|
|
50
|
+
if os.path.exists(npz_path) and os.path.exists(idx_path):
|
|
51
|
+
data = np.load(npz_path)
|
|
52
|
+
self.embeddings = data["embeddings"]
|
|
53
|
+
self.text_to_idx = json.load(open(idx_path))
|
|
54
|
+
print(f"Loaded embedding cache: {len(self.text_to_idx)} texts, {self.embeddings.shape[1]}-dim", file=sys.stderr)
|
|
55
|
+
else:
|
|
56
|
+
print(f"No embedding cache at {cache_dir}", file=sys.stderr)
|
|
57
|
+
|
|
58
|
+
def get(self, text: str) -> list[float]:
|
|
59
|
+
"""Get embedding for text. Cache hit = instant, miss = API call."""
|
|
60
|
+
# Try cache first
|
|
61
|
+
if self.text_to_idx and text in self.text_to_idx:
|
|
62
|
+
idx = self.text_to_idx[text]
|
|
63
|
+
self.hits += 1
|
|
64
|
+
return self.embeddings[idx].tolist()
|
|
65
|
+
|
|
66
|
+
# Cache miss — try API
|
|
67
|
+
self.misses += 1
|
|
68
|
+
if not self.api_key:
|
|
69
|
+
# No API key and no cache hit — return zero vector
|
|
70
|
+
dim = self.embeddings.shape[1] if self.embeddings is not None else 768
|
|
71
|
+
print(f"WARN: Cache miss + no API key for text: {text[:60]}...", file=sys.stderr)
|
|
72
|
+
return [0.0] * dim
|
|
73
|
+
|
|
74
|
+
return self._api_embed(text)
|
|
75
|
+
|
|
76
|
+
def _api_embed(self, text: str) -> list[float]:
|
|
77
|
+
import requests
|
|
78
|
+
response = requests.post(
|
|
79
|
+
f"{self.api_url}/v1/embeddings",
|
|
80
|
+
headers={
|
|
81
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
82
|
+
"Content-Type": "application/json",
|
|
83
|
+
},
|
|
84
|
+
json={
|
|
85
|
+
"model": "stratus-x1ac-base",
|
|
86
|
+
"input": text,
|
|
87
|
+
},
|
|
88
|
+
timeout=15,
|
|
89
|
+
)
|
|
90
|
+
response.raise_for_status()
|
|
91
|
+
data = response.json()
|
|
92
|
+
return data["data"][0]["embedding"]
|
|
93
|
+
|
|
94
|
+
def stats(self) -> dict:
|
|
95
|
+
return {"hits": self.hits, "misses": self.misses, "cache_size": len(self.text_to_idx)}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ============================================================================
|
|
99
|
+
# Model Loading
|
|
100
|
+
# ============================================================================
|
|
101
|
+
|
|
22
102
|
def load_model(checkpoint_path: str, device: str = "cpu"):
|
|
23
103
|
ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
|
|
24
104
|
config = ckpt["config"]
|
|
@@ -40,27 +120,39 @@ def load_model(checkpoint_path: str, device: str = "cpu"):
|
|
|
40
120
|
return model, tool_to_index, index_to_tool, config
|
|
41
121
|
|
|
42
122
|
|
|
43
|
-
def
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
123
|
+
def find_cache_dir(checkpoint_path: str) -> str | None:
|
|
124
|
+
"""Find embedding cache directory. Checks multiple locations."""
|
|
125
|
+
import pathlib
|
|
126
|
+
ckpt_dir = pathlib.Path(checkpoint_path).parent
|
|
127
|
+
|
|
128
|
+
# Check next to checkpoint: .jfl/checkpoints/ → .jfl/v2-data/
|
|
129
|
+
candidates = [
|
|
130
|
+
ckpt_dir.parent / "v2-data", # .jfl/v2-data/
|
|
131
|
+
ckpt_dir / "v2-data", # .jfl/checkpoints/v2-data/
|
|
132
|
+
pathlib.Path.cwd() / ".jfl" / "v2-data", # cwd/.jfl/v2-data/
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
# Also check EMBEDDING_CACHE_DIR env var
|
|
136
|
+
env_dir = os.environ.get("EMBEDDING_CACHE_DIR")
|
|
137
|
+
if env_dir:
|
|
138
|
+
candidates.insert(0, pathlib.Path(env_dir))
|
|
139
|
+
|
|
140
|
+
for candidate in candidates:
|
|
141
|
+
npz = candidate / "embeddings_cache.npz"
|
|
142
|
+
idx = candidate / "text_to_idx.json"
|
|
143
|
+
if npz.exists() and idx.exists():
|
|
144
|
+
return str(candidate)
|
|
145
|
+
|
|
146
|
+
return None
|
|
147
|
+
|
|
61
148
|
|
|
149
|
+
# ============================================================================
|
|
150
|
+
# Inference
|
|
151
|
+
# ============================================================================
|
|
62
152
|
|
|
63
153
|
def infer(args):
|
|
154
|
+
t0 = time.time()
|
|
155
|
+
|
|
64
156
|
if torch.cuda.is_available():
|
|
65
157
|
device = "cuda"
|
|
66
158
|
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
@@ -69,16 +161,21 @@ def infer(args):
|
|
|
69
161
|
device = "cpu"
|
|
70
162
|
|
|
71
163
|
model, tool_to_index, index_to_tool, config = load_model(args.checkpoint, device)
|
|
164
|
+
t_model = time.time()
|
|
165
|
+
|
|
166
|
+
# Load embedding cache
|
|
167
|
+
cache_dir = args.cache_dir or find_cache_dir(args.checkpoint)
|
|
168
|
+
cache = EmbeddingCache(
|
|
169
|
+
cache_dir=cache_dir,
|
|
170
|
+
api_url=os.environ.get("STRATUS_API_URL", "https://api.stratus.run"),
|
|
171
|
+
api_key=os.environ.get("STRATUS_API_KEY", ""),
|
|
172
|
+
)
|
|
173
|
+
t_cache = time.time()
|
|
72
174
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
print("STRATUS_API_KEY not set", file=sys.stderr)
|
|
78
|
-
sys.exit(1)
|
|
79
|
-
|
|
80
|
-
state_emb = get_embedding(args.state, api_url, api_key)
|
|
81
|
-
goal_emb = get_embedding(args.goal, api_url, api_key)
|
|
175
|
+
# Get embeddings (cache hit = instant, miss = API call)
|
|
176
|
+
state_emb = cache.get(args.state)
|
|
177
|
+
goal_emb = cache.get(args.goal)
|
|
178
|
+
t_embed = time.time()
|
|
82
179
|
|
|
83
180
|
state_tensor = torch.tensor([state_emb], dtype=torch.float32).to(device)
|
|
84
181
|
goal_tensor = torch.tensor([goal_emb], dtype=torch.float32).to(device)
|
|
@@ -87,6 +184,7 @@ def infer(args):
|
|
|
87
184
|
|
|
88
185
|
top_indices = result["top_k_indices"][0].cpu().tolist()
|
|
89
186
|
top_probs = result["top_k_probs"][0].cpu().tolist()
|
|
187
|
+
t_infer = time.time()
|
|
90
188
|
|
|
91
189
|
predictions = []
|
|
92
190
|
for idx, prob in zip(top_indices, top_probs):
|
|
@@ -101,10 +199,16 @@ def infer(args):
|
|
|
101
199
|
"confidence": predictions[0]["confidence"],
|
|
102
200
|
"alternatives": predictions[1:],
|
|
103
201
|
}
|
|
202
|
+
# Include timing in stderr for debugging
|
|
203
|
+
stats = cache.stats()
|
|
204
|
+
timing = f"model={t_model-t0:.1f}s cache={t_cache-t_model:.1f}s embed={t_embed-t_cache:.1f}s infer={t_infer-t_embed:.1f}s total={t_infer-t0:.1f}s hits={stats['hits']} misses={stats['misses']}"
|
|
205
|
+
print(f"timing: {timing}", file=sys.stderr)
|
|
104
206
|
print(json.dumps(output))
|
|
105
207
|
else:
|
|
106
|
-
|
|
107
|
-
|
|
208
|
+
t_total = t_infer - t0
|
|
209
|
+
stats = cache.stats()
|
|
210
|
+
print(f"\nv2 Policy Head Prediction ({t_total:.2f}s, {stats['hits']} cache hits, {stats['misses']} misses)")
|
|
211
|
+
print(f"{'─' * 50}")
|
|
108
212
|
print(f"State: {args.state[:80]}...")
|
|
109
213
|
print(f"Goal: {args.goal[:80]}...")
|
|
110
214
|
print(f"\nTop {args.top_k} actions:")
|
|
@@ -125,8 +229,13 @@ def batch_infer(args):
|
|
|
125
229
|
|
|
126
230
|
model, tool_to_index, index_to_tool, config = load_model(args.checkpoint, device)
|
|
127
231
|
|
|
128
|
-
|
|
129
|
-
|
|
232
|
+
# Load embedding cache
|
|
233
|
+
cache_dir = args.cache_dir or find_cache_dir(args.checkpoint)
|
|
234
|
+
cache = EmbeddingCache(
|
|
235
|
+
cache_dir=cache_dir,
|
|
236
|
+
api_url=os.environ.get("STRATUS_API_URL", "https://api.stratus.run"),
|
|
237
|
+
api_key=os.environ.get("STRATUS_API_KEY", ""),
|
|
238
|
+
)
|
|
130
239
|
|
|
131
240
|
for line in sys.stdin:
|
|
132
241
|
line = line.strip()
|
|
@@ -135,8 +244,8 @@ def batch_infer(args):
|
|
|
135
244
|
|
|
136
245
|
try:
|
|
137
246
|
req = json.loads(line)
|
|
138
|
-
state_emb =
|
|
139
|
-
goal_emb =
|
|
247
|
+
state_emb = cache.get(req["state"])
|
|
248
|
+
goal_emb = cache.get(req["goal"])
|
|
140
249
|
|
|
141
250
|
state_tensor = torch.tensor([state_emb], dtype=torch.float32).to(device)
|
|
142
251
|
goal_tensor = torch.tensor([goal_emb], dtype=torch.float32).to(device)
|
|
@@ -164,6 +273,9 @@ def batch_infer(args):
|
|
|
164
273
|
print(json.dumps({"error": str(e)}))
|
|
165
274
|
sys.stdout.flush()
|
|
166
275
|
|
|
276
|
+
stats = cache.stats()
|
|
277
|
+
print(f"Batch complete: {stats['hits']} hits, {stats['misses']} misses", file=sys.stderr)
|
|
278
|
+
|
|
167
279
|
|
|
168
280
|
def main():
|
|
169
281
|
parser = argparse.ArgumentParser(description="v2 policy head inference")
|
|
@@ -173,6 +285,7 @@ def main():
|
|
|
173
285
|
parser.add_argument("--top-k", type=int, default=3, help="Number of top actions")
|
|
174
286
|
parser.add_argument("--json", action="store_true", help="JSON output for TypeScript bridge")
|
|
175
287
|
parser.add_argument("--batch", action="store_true", help="Batch mode: read JSONL from stdin")
|
|
288
|
+
parser.add_argument("--cache-dir", default=None, help="Directory with embeddings_cache.npz + text_to_idx.json")
|
|
176
289
|
args = parser.parse_args()
|
|
177
290
|
|
|
178
291
|
if args.batch:
|