jfl 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/dist/commands/doctor.d.ts +1 -0
  2. package/dist/commands/doctor.d.ts.map +1 -1
  3. package/dist/commands/doctor.js +30 -1
  4. package/dist/commands/doctor.js.map +1 -1
  5. package/dist/commands/ide.d.ts +2 -1
  6. package/dist/commands/ide.d.ts.map +1 -1
  7. package/dist/commands/ide.js +60 -1
  8. package/dist/commands/ide.js.map +1 -1
  9. package/dist/commands/init-from-service.d.ts +15 -0
  10. package/dist/commands/init-from-service.d.ts.map +1 -0
  11. package/dist/commands/init-from-service.js +541 -0
  12. package/dist/commands/init-from-service.js.map +1 -0
  13. package/dist/commands/init.d.ts +1 -0
  14. package/dist/commands/init.d.ts.map +1 -1
  15. package/dist/commands/init.js +32 -1
  16. package/dist/commands/init.js.map +1 -1
  17. package/dist/commands/kanban.d.ts.map +1 -1
  18. package/dist/commands/kanban.js +13 -4
  19. package/dist/commands/kanban.js.map +1 -1
  20. package/dist/commands/linear.d.ts +41 -0
  21. package/dist/commands/linear.d.ts.map +1 -0
  22. package/dist/commands/linear.js +715 -0
  23. package/dist/commands/linear.js.map +1 -0
  24. package/dist/commands/peter.d.ts.map +1 -1
  25. package/dist/commands/peter.js +232 -25
  26. package/dist/commands/peter.js.map +1 -1
  27. package/dist/commands/services.d.ts.map +1 -1
  28. package/dist/commands/services.js +146 -0
  29. package/dist/commands/services.js.map +1 -1
  30. package/dist/commands/setup.d.ts.map +1 -1
  31. package/dist/commands/setup.js +173 -13
  32. package/dist/commands/setup.js.map +1 -1
  33. package/dist/commands/telemetry-monitor.d.ts +11 -0
  34. package/dist/commands/telemetry-monitor.d.ts.map +1 -0
  35. package/dist/commands/telemetry-monitor.js +224 -0
  36. package/dist/commands/telemetry-monitor.js.map +1 -0
  37. package/dist/commands/telemetry-test.d.ts +11 -0
  38. package/dist/commands/telemetry-test.d.ts.map +1 -0
  39. package/dist/commands/telemetry-test.js +67 -0
  40. package/dist/commands/telemetry-test.js.map +1 -0
  41. package/dist/commands/tenet-agents.d.ts +13 -0
  42. package/dist/commands/tenet-agents.d.ts.map +1 -0
  43. package/dist/commands/tenet-agents.js +191 -0
  44. package/dist/commands/tenet-agents.js.map +1 -0
  45. package/dist/commands/tenet-setup.d.ts +19 -0
  46. package/dist/commands/tenet-setup.d.ts.map +1 -0
  47. package/dist/commands/tenet-setup.js +131 -0
  48. package/dist/commands/tenet-setup.js.map +1 -0
  49. package/dist/commands/train.d.ts +18 -0
  50. package/dist/commands/train.d.ts.map +1 -1
  51. package/dist/commands/train.js +182 -0
  52. package/dist/commands/train.js.map +1 -1
  53. package/dist/commands/whoami.d.ts +2 -0
  54. package/dist/commands/whoami.d.ts.map +1 -0
  55. package/dist/commands/whoami.js +24 -0
  56. package/dist/commands/whoami.js.map +1 -0
  57. package/dist/index.js +159 -10
  58. package/dist/index.js.map +1 -1
  59. package/dist/lib/advanced-setup.d.ts +78 -0
  60. package/dist/lib/advanced-setup.d.ts.map +1 -0
  61. package/dist/lib/advanced-setup.js +433 -0
  62. package/dist/lib/advanced-setup.js.map +1 -0
  63. package/dist/lib/agent-config.d.ts +33 -0
  64. package/dist/lib/agent-config.d.ts.map +1 -1
  65. package/dist/lib/agent-config.js +26 -0
  66. package/dist/lib/agent-config.js.map +1 -1
  67. package/dist/lib/counterfactual-training-bridge.d.ts +114 -0
  68. package/dist/lib/counterfactual-training-bridge.d.ts.map +1 -0
  69. package/dist/lib/counterfactual-training-bridge.js +322 -0
  70. package/dist/lib/counterfactual-training-bridge.js.map +1 -0
  71. package/dist/lib/discovery-agent.d.ts +48 -0
  72. package/dist/lib/discovery-agent.d.ts.map +1 -0
  73. package/dist/lib/discovery-agent.js +111 -0
  74. package/dist/lib/discovery-agent.js.map +1 -0
  75. package/dist/lib/flow-engine.d.ts.map +1 -1
  76. package/dist/lib/flow-engine.js +46 -8
  77. package/dist/lib/flow-engine.js.map +1 -1
  78. package/dist/lib/gtm-generator.d.ts +29 -0
  79. package/dist/lib/gtm-generator.d.ts.map +1 -0
  80. package/dist/lib/gtm-generator.js +252 -0
  81. package/dist/lib/gtm-generator.js.map +1 -0
  82. package/dist/lib/hub-health.d.ts +40 -0
  83. package/dist/lib/hub-health.d.ts.map +1 -0
  84. package/dist/lib/hub-health.js +89 -0
  85. package/dist/lib/hub-health.js.map +1 -0
  86. package/dist/lib/invariant-monitor.d.ts +6 -2
  87. package/dist/lib/invariant-monitor.d.ts.map +1 -1
  88. package/dist/lib/invariant-monitor.js +89 -2
  89. package/dist/lib/invariant-monitor.js.map +1 -1
  90. package/dist/lib/journal-analyzer.d.ts +71 -0
  91. package/dist/lib/journal-analyzer.d.ts.map +1 -0
  92. package/dist/lib/journal-analyzer.js +306 -0
  93. package/dist/lib/journal-analyzer.js.map +1 -0
  94. package/dist/lib/linear-client.d.ts +73 -0
  95. package/dist/lib/linear-client.d.ts.map +1 -0
  96. package/dist/lib/linear-client.js +112 -0
  97. package/dist/lib/linear-client.js.map +1 -0
  98. package/dist/lib/linear-id-map.d.ts +20 -0
  99. package/dist/lib/linear-id-map.d.ts.map +1 -0
  100. package/dist/lib/linear-id-map.js +57 -0
  101. package/dist/lib/linear-id-map.js.map +1 -0
  102. package/dist/lib/linear-kanban.d.ts +66 -0
  103. package/dist/lib/linear-kanban.d.ts.map +1 -0
  104. package/dist/lib/linear-kanban.js +175 -0
  105. package/dist/lib/linear-kanban.js.map +1 -0
  106. package/dist/lib/onboarding.d.ts +40 -0
  107. package/dist/lib/onboarding.d.ts.map +1 -0
  108. package/dist/lib/onboarding.js +213 -0
  109. package/dist/lib/onboarding.js.map +1 -0
  110. package/dist/lib/physical-world-model.d.ts +50 -0
  111. package/dist/lib/physical-world-model.d.ts.map +1 -0
  112. package/dist/lib/physical-world-model.js +251 -0
  113. package/dist/lib/physical-world-model.js.map +1 -0
  114. package/dist/lib/planning-loop.d.ts +157 -0
  115. package/dist/lib/planning-loop.d.ts.map +1 -0
  116. package/dist/lib/planning-loop.js +537 -0
  117. package/dist/lib/planning-loop.js.map +1 -0
  118. package/dist/lib/policy-head.d.ts +13 -0
  119. package/dist/lib/policy-head.d.ts.map +1 -1
  120. package/dist/lib/policy-head.js +168 -2
  121. package/dist/lib/policy-head.js.map +1 -1
  122. package/dist/lib/resource-optimizer-middleware.d.ts +39 -0
  123. package/dist/lib/resource-optimizer-middleware.d.ts.map +1 -0
  124. package/dist/lib/resource-optimizer-middleware.js +222 -0
  125. package/dist/lib/resource-optimizer-middleware.js.map +1 -0
  126. package/dist/lib/resource-optimizer.d.ts +71 -0
  127. package/dist/lib/resource-optimizer.d.ts.map +1 -0
  128. package/dist/lib/resource-optimizer.js +228 -0
  129. package/dist/lib/resource-optimizer.js.map +1 -0
  130. package/dist/lib/rl-manager.d.ts +74 -0
  131. package/dist/lib/rl-manager.d.ts.map +1 -0
  132. package/dist/lib/rl-manager.js +244 -0
  133. package/dist/lib/rl-manager.js.map +1 -0
  134. package/dist/lib/service-analyzer.d.ts +76 -0
  135. package/dist/lib/service-analyzer.d.ts.map +1 -0
  136. package/dist/lib/service-analyzer.js +704 -0
  137. package/dist/lib/service-analyzer.js.map +1 -0
  138. package/dist/lib/service-gtm.js +2 -2
  139. package/dist/lib/service-gtm.js.map +1 -1
  140. package/dist/lib/service-questionnaire.d.ts +11 -0
  141. package/dist/lib/service-questionnaire.d.ts.map +1 -0
  142. package/dist/lib/service-questionnaire.js +89 -0
  143. package/dist/lib/service-questionnaire.js.map +1 -0
  144. package/dist/lib/setup/agent-generator.d.ts +2 -0
  145. package/dist/lib/setup/agent-generator.d.ts.map +1 -1
  146. package/dist/lib/setup/agent-generator.js +128 -4
  147. package/dist/lib/setup/agent-generator.js.map +1 -1
  148. package/dist/lib/setup/flow-generator.d.ts +10 -0
  149. package/dist/lib/setup/flow-generator.d.ts.map +1 -0
  150. package/dist/lib/setup/flow-generator.js +113 -0
  151. package/dist/lib/setup/flow-generator.js.map +1 -0
  152. package/dist/lib/setup/invariant-bridge.d.ts +91 -0
  153. package/dist/lib/setup/invariant-bridge.d.ts.map +1 -0
  154. package/dist/lib/setup/invariant-bridge.js +384 -0
  155. package/dist/lib/setup/invariant-bridge.js.map +1 -0
  156. package/dist/lib/setup/spec-generator.d.ts +41 -5
  157. package/dist/lib/setup/spec-generator.d.ts.map +1 -1
  158. package/dist/lib/setup/spec-generator.js +503 -29
  159. package/dist/lib/setup/spec-generator.js.map +1 -1
  160. package/dist/lib/stratus-client.js +1 -1
  161. package/dist/lib/stratus-client.js.map +1 -1
  162. package/dist/lib/surface-agent.d.ts +78 -0
  163. package/dist/lib/surface-agent.d.ts.map +1 -0
  164. package/dist/lib/surface-agent.js +105 -0
  165. package/dist/lib/surface-agent.js.map +1 -0
  166. package/dist/lib/surface-coordination-example.d.ts +30 -0
  167. package/dist/lib/surface-coordination-example.d.ts.map +1 -0
  168. package/dist/lib/surface-coordination-example.js +164 -0
  169. package/dist/lib/surface-coordination-example.js.map +1 -0
  170. package/dist/lib/telemetry/physical-world-collector.d.ts +15 -0
  171. package/dist/lib/telemetry/physical-world-collector.d.ts.map +1 -0
  172. package/dist/lib/telemetry/physical-world-collector.js +177 -0
  173. package/dist/lib/telemetry/physical-world-collector.js.map +1 -0
  174. package/dist/lib/telemetry/training-bridge.d.ts +51 -0
  175. package/dist/lib/telemetry/training-bridge.d.ts.map +1 -0
  176. package/dist/lib/telemetry/training-bridge.js +185 -0
  177. package/dist/lib/telemetry/training-bridge.js.map +1 -0
  178. package/dist/lib/telemetry.d.ts +2 -1
  179. package/dist/lib/telemetry.d.ts.map +1 -1
  180. package/dist/lib/telemetry.js +23 -2
  181. package/dist/lib/telemetry.js.map +1 -1
  182. package/dist/lib/tenet-board-agent.d.ts +52 -0
  183. package/dist/lib/tenet-board-agent.d.ts.map +1 -0
  184. package/dist/lib/tenet-board-agent.js +226 -0
  185. package/dist/lib/tenet-board-agent.js.map +1 -0
  186. package/dist/lib/tenet-ide-agent.d.ts +40 -0
  187. package/dist/lib/tenet-ide-agent.d.ts.map +1 -0
  188. package/dist/lib/tenet-ide-agent.js +199 -0
  189. package/dist/lib/tenet-ide-agent.js.map +1 -0
  190. package/dist/lib/workspace/data-pipeline.d.ts.map +1 -1
  191. package/dist/lib/workspace/data-pipeline.js +27 -5
  192. package/dist/lib/workspace/data-pipeline.js.map +1 -1
  193. package/dist/lib/workspace/sidebar-runner.d.ts +13 -0
  194. package/dist/lib/workspace/sidebar-runner.d.ts.map +1 -0
  195. package/dist/lib/workspace/sidebar-runner.js +419 -0
  196. package/dist/lib/workspace/sidebar-runner.js.map +1 -0
  197. package/dist/lib/workspace/surface-registry.d.ts.map +1 -1
  198. package/dist/lib/workspace/surface-registry.js +4 -1
  199. package/dist/lib/workspace/surface-registry.js.map +1 -1
  200. package/dist/lib/workspace/surfaces/agent-overview.d.ts +3 -3
  201. package/dist/lib/workspace/surfaces/agent-overview.d.ts.map +1 -1
  202. package/dist/lib/workspace/surfaces/agent-overview.js +3 -3
  203. package/dist/lib/workspace/surfaces/agent-overview.js.map +1 -1
  204. package/dist/lib/workspace/surfaces/index.d.ts +3 -0
  205. package/dist/lib/workspace/surfaces/index.d.ts.map +1 -1
  206. package/dist/lib/workspace/surfaces/index.js +3 -0
  207. package/dist/lib/workspace/surfaces/index.js.map +1 -1
  208. package/dist/lib/workspace/surfaces/kanban.d.ts +15 -0
  209. package/dist/lib/workspace/surfaces/kanban.d.ts.map +1 -0
  210. package/dist/lib/workspace/surfaces/kanban.js +43 -0
  211. package/dist/lib/workspace/surfaces/kanban.js.map +1 -0
  212. package/dist/lib/workspace/surfaces/physical-world.d.ts +15 -0
  213. package/dist/lib/workspace/surfaces/physical-world.d.ts.map +1 -0
  214. package/dist/lib/workspace/surfaces/physical-world.js +37 -0
  215. package/dist/lib/workspace/surfaces/physical-world.js.map +1 -0
  216. package/dist/lib/workspace/surfaces/sidebar.d.ts +22 -0
  217. package/dist/lib/workspace/surfaces/sidebar.d.ts.map +1 -0
  218. package/dist/lib/workspace/surfaces/sidebar.js +90 -0
  219. package/dist/lib/workspace/surfaces/sidebar.js.map +1 -0
  220. package/dist/types/flows.d.ts +2 -1
  221. package/dist/types/flows.d.ts.map +1 -1
  222. package/dist/types/physical-world-model.d.ts +65 -0
  223. package/dist/types/physical-world-model.d.ts.map +1 -0
  224. package/dist/types/physical-world-model.js +43 -0
  225. package/dist/types/physical-world-model.js.map +1 -0
  226. package/dist/types/telemetry.d.ts +37 -0
  227. package/dist/types/telemetry.d.ts.map +1 -1
  228. package/dist/types/world-model.d.ts.map +1 -1
  229. package/dist/types/world-model.js +14 -7
  230. package/dist/types/world-model.js.map +1 -1
  231. package/dist/utils/context-hub-port.d.ts.map +1 -1
  232. package/dist/utils/context-hub-port.js +6 -1
  233. package/dist/utils/context-hub-port.js.map +1 -1
  234. package/package.json +3 -2
  235. package/packages/pi/extensions/index.ts +34 -6
  236. package/scripts/telemetry-dashboard.sh +44 -0
  237. package/scripts/test-planning-loop-e2e.ts +181 -0
  238. package/scripts/test-server-inference.ts +49 -0
  239. package/scripts/test-state-sensitivity.ts +32 -0
  240. package/scripts/train/v2/benchmark.py +661 -0
  241. package/scripts/train/v2/generate_balanced.py +439 -0
  242. package/scripts/train/v2/generate_hard_negatives.py +219 -0
  243. package/scripts/train/v2/infer.py +149 -36
  244. package/scripts/train/v2/infer_server.py +224 -0
  245. package/scripts/train/v2/online_train.py +576 -0
  246. package/scripts/train/v2/precompute.py +24 -6
  247. package/template/CLAUDE.md +74 -132
@@ -0,0 +1,219 @@
1
+ """
2
+ Generate hard negative training examples to address benchmark gaps.
3
+
4
+ Targets specific confusion patterns:
5
+ 1. add_feature: model confuses with optimize_performance, add_tests, update_docs
6
+ 2. fix_bug on infrastructure: model confuses with dependency_update, data_pipeline
7
+ 3. security_hardening overfit: model incorrectly predicts security for refactor/deps/docs
8
+ 4. refactor_code vs data_pipeline confusion
9
+
10
+ Strategy: create examples that LOOK like the wrong class but ARE the right class.
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import random
16
+ import argparse
17
+
18
+
19
+ HARD_NEGATIVES = {
20
+ # ── add_feature: things that SOUND like optimization/testing/docs but are features ──
21
+ "add_feature": [
22
+ # These sound like optimization but are actually new features
23
+ ("Add caching layer for embedding lookups", "feature-builder", 0.80),
24
+ ("Implement connection pooling for Stratus API client", "feature-builder", 0.78),
25
+ ("Build batch inference mode for PolicyHead", "feature-builder", 0.82),
26
+ ("Create worker thread pool for parallel eval execution", "feature-builder", 0.75),
27
+ ("Add streaming response support to hub WebSocket API", "feature-builder", 0.80),
28
+ ("Implement lazy loading for heavy module imports", "feature-builder", 0.77),
29
+ # These sound like tests but are actually features
30
+ ("Build benchmark scenario runner with graded reports", "feature-builder", 0.85),
31
+ ("Create synthetic data generator for training pipeline", "feature-builder", 0.82),
32
+ ("Implement validation harness for counterfactual predictions", "feature-builder", 0.79),
33
+ ("Add smoke test CLI command for quick system health checks", "feature-builder", 0.81),
34
+ # These sound like docs but are actually features
35
+ ("Build interactive API documentation server", "feature-builder", 0.76),
36
+ ("Create changelog generator from git history", "feature-builder", 0.78),
37
+ ("Implement decision record template system", "feature-builder", 0.80),
38
+ # These are clearly features
39
+ ("Add planning loop that connects PolicyHead to DynamicsModel for rollout simulation", "feature-builder", 0.88),
40
+ ("Implement experience replay buffer for online learning with reservoir sampling", "feature-builder", 0.85),
41
+ ("Build counterfactual training bridge that transforms CF scenarios into v2 tuples", "feature-builder", 0.87),
42
+ ("Create multi-step rollout planner with beam search and pruning", "feature-builder", 0.83),
43
+ ("Add P2P agent communication via Subway mesh network", "feature-builder", 0.86),
44
+ ("Implement cost monitoring dashboard for Stratus API usage tracking", "feature-builder", 0.82),
45
+ ("Build agent fleet management with VM spawning and tuple collection", "feature-builder", 0.84),
46
+ ("Create evaluation pyramid with L1-L4 metrics tracking", "feature-builder", 0.80),
47
+ ("Add CRM integration via Google Sheets CLI", "feature-builder", 0.79),
48
+ ("Implement Linear issue sync with bidirectional updates", "feature-builder", 0.85),
49
+ ("Build self-driving loop with cron-triggered autoresearch", "feature-builder", 0.88),
50
+ ("Create agent orchestrator with model routing per role", "feature-builder", 0.87),
51
+ ("Add journal system with per-session JSONL files and memory indexing", "feature-builder", 0.83),
52
+ ("Implement TLA+ invariant monitor for runtime safety checks", "feature-builder", 0.81),
53
+ ("Build state capture module that snapshots system state for dynamics learning", "feature-builder", 0.82),
54
+ ("Create world model store with JSONL storage for transitions and predictions", "feature-builder", 0.80),
55
+ ],
56
+
57
+ # ── fix_bug on infrastructure crashes: NOT dependency_update or data_pipeline ──
58
+ "fix_bug": [
59
+ # Hub crashes — clearly bugs, not dependency issues
60
+ ("Hub process crashes with SIGSEGV when receiving malformed WebSocket frame", "hub-sentinel", 0.35),
61
+ ("Hub OOM killed after 6 hours — memory leak in event accumulator", "hub-sentinel", 0.30),
62
+ ("100% agent stranding — hub connection pool exhausted, no new connections accepted", "hub-sentinel", 0.25),
63
+ ("Hub crashes on startup when config.json has trailing comma", "hub-sentinel", 0.40),
64
+ ("Agent session lost when hub restarts — state not persisted to disk", "hub-sentinel", 0.35),
65
+ ("Hub WebSocket server fails to bind port — EADDRINUSE not handled", "hub-sentinel", 0.38),
66
+ # Infrastructure bugs — not data pipeline issues
67
+ ("Worktree allocation race condition — two agents get same worktree", "system-health", 0.42),
68
+ ("File lock not released after agent crash — blocks subsequent agents", "system-health", 0.40),
69
+ ("Git merge fails silently, produces empty commit with no changes", "system-health", 0.45),
70
+ ("Session branch not cleaned up after merge — stale branches accumulate", "system-health", 0.48),
71
+ ("Eval runner hangs when test script has infinite loop — no timeout enforcement", "error-fixer", 0.35),
72
+ ("CLI crashes with stack overflow when .jfl directory contains circular symlinks", "error-fixer", 0.42),
73
+ # Runtime errors — not config or dependency issues
74
+ ("TypeError: Cannot read properties of undefined reading 'composite'", "error-fixer", 0.50),
75
+ ("Unhandled promise rejection crashes Node.js process — no global handler", "error-fixer", 0.45),
76
+ ("ENOENT error when accessing worktree that was garbage collected", "error-fixer", 0.48),
77
+ ("JSON.parse fails on training buffer — corrupted entry at byte offset 34521", "error-fixer", 0.38),
78
+ ("PolicyHead inference subprocess exits with code 139 — segfault in PyTorch", "error-fixer", 0.32),
79
+ ("Race condition in concurrent file writes to training-buffer.jsonl", "error-fixer", 0.40),
80
+ ("Build succeeds but tests fail because dist/ has stale compiled files", "test-coverage", 0.55),
81
+ ("Flaky test passes 9/10 runs — timing-dependent assertion", "test-coverage", 0.60),
82
+ ("CORS error blocks dashboard API calls — missing header in response", "error-fixer", 0.50),
83
+ ("Memory allocation failure when loading 50MB checkpoint on low-RAM VM", "error-fixer", 0.35),
84
+ ("Agent gets stuck in infinite retry loop when hub is unreachable", "hub-sentinel", 0.30),
85
+ ("Graceful shutdown handler doesn't wait for in-flight requests", "hub-sentinel", 0.38),
86
+ ("Hot reload breaks when file change event fires before write completes", "error-fixer", 0.45),
87
+ ("Exit code 0 returned on failure — downstream scripts think it succeeded", "error-fixer", 0.50),
88
+ ("UTC/local timezone confusion causes journal entries with future timestamps", "error-fixer", 0.55),
89
+ ("Package.json scripts reference removed file — npm run breaks", "error-fixer", 0.52),
90
+ ("Checkpoint loading fails silently and uses random weights instead", "error-fixer", 0.35),
91
+ ("Request body parser rejects valid JSON with nested arrays > 3 levels", "error-fixer", 0.48),
92
+ ],
93
+
94
+ # ── NOT security_hardening — these LOOK like security but are other tools ──
95
+ "refactor_code": [
96
+ # These mention "clean up" or "restructure" which model confuses with security
97
+ ("Clean up error handling — 5 different try/catch patterns across codebase", "code-quality", 0.75),
98
+ ("Restructure authentication flow — too many layers of indirection", "code-quality", 0.72),
99
+ ("Simplify access control logic — nested conditionals are unreadable", "code-quality", 0.70),
100
+ ("Extract validation helpers — same input checking code in 8 places", "code-quality", 0.78),
101
+ ("Consolidate logging — mixing console.log, winston, and pino", "code-quality", 0.73),
102
+ ("Reduce coupling between auth module and user service", "code-quality", 0.71),
103
+ ("Simplify config parsing — 200 lines of manual env var handling", "code-quality", 0.76),
104
+ ("Break up god class that handles routing, auth, and business logic", "code-quality", 0.74),
105
+ ("Replace manual string concatenation for SQL with query builder", "code-quality", 0.72),
106
+ ("Extract shared HTTP client setup from 6 different service files", "code-quality", 0.75),
107
+ ],
108
+
109
+ "dependency_update": [
110
+ # These mention "security" or "CVE" but the action is updating deps, not hardening
111
+ ("npm audit shows 5 moderate vulnerabilities — update affected packages", "dependency-updater", 0.78),
112
+ ("CVE-2026-9999 in lodash — bump to latest patched version", "dependency-updater", 0.82),
113
+ ("Security advisory for express — update from 4.18 to 4.21", "dependency-updater", 0.80),
114
+ ("Dependabot PR waiting for review — axios security update", "dependency-updater", 0.75),
115
+ ("GitHub security alert on transitive dependency — update parent package", "dependency-updater", 0.77),
116
+ ],
117
+
118
+ "update_docs": [
119
+ # These mention "security" concepts but the action is writing docs
120
+ ("Document API authentication flow for new team members", "docs-updater", 0.80),
121
+ ("Write security best practices guide for the codebase", "docs-updater", 0.78),
122
+ ("Document secrets management process — which env vars, where stored", "docs-updater", 0.76),
123
+ ("Add access control section to onboarding documentation", "docs-updater", 0.82),
124
+ ("Write incident response playbook for hub crashes", "docs-updater", 0.79),
125
+ ],
126
+
127
+ # ── data_pipeline: distinguish from dependency_update and optimize_performance ──
128
+ "data_pipeline": [
129
+ ("Training buffer entries not flowing to v2 transform — pipeline stalled", "data-engineer", 0.50),
130
+ ("Embedding cache miss rate at 40% — precompute step skipping new texts", "data-engineer", 0.55),
131
+ ("Counterfactual scenarios not appearing in training buffer — bridge broken", "data-engineer", 0.48),
132
+ ("JSONL corruption in transitions file — entries missing closing brace", "data-engineer", 0.45),
133
+ ("Eval scored events not triggering training tuple creation", "data-engineer", 0.52),
134
+ ("Duplicate entries in training buffer after fleet tuple collection", "data-engineer", 0.50),
135
+ ("Data split not stratified — test set has 0 examples of fix_bug class", "data-engineer", 0.55),
136
+ ("Nightly pipeline step 2 fails — transform can't read new action types", "data-engineer", 0.48),
137
+ ("Mining journals produces tuples with empty reward fields", "data-engineer", 0.52),
138
+ ("Training data lineage lost — can't trace which journal entry produced which tuple", "data-engineer", 0.50),
139
+ ],
140
+ }
141
+
142
+
143
+ def generate_state(agent: str, composite: float, rng: random.Random) -> str:
144
+ tests_total = rng.choice([15, 20, 25, 30, 35, 40, 45])
145
+ tests_pass_rate = rng.uniform(0.5, 1.0) if composite > 0.5 else rng.uniform(0.3, 0.8)
146
+ tests_passing = int(tests_total * tests_pass_rate)
147
+ trajectory = rng.randint(1, 10)
148
+
149
+ dims = {}
150
+ dim_options = ["test_pass_rate", "build_health", "code_quality", "hub_health",
151
+ "error_rate", "security_score", "observability", "pipeline_health",
152
+ "doc_coverage", "maintainability", "test_coverage", "data_quality"]
153
+ for d in rng.sample(dim_options, rng.randint(2, 4)):
154
+ dims[d] = rng.uniform(0.2, 0.95)
155
+
156
+ dims_str = ", ".join(f"{k}={v:.4f}" for k, v in dims.items())
157
+ n_deltas = rng.randint(1, 4)
158
+ deltas = [rng.uniform(-0.08, 0.06) for _ in range(n_deltas)]
159
+ deltas_str = ", ".join(f"{'+' if d >= 0 else ''}{d:.4f}" for d in deltas)
160
+
161
+ return "\n".join([
162
+ f"Agent: {agent}",
163
+ f"Composite: {composite:.4f}",
164
+ f"Tests: {tests_passing}/{tests_total}",
165
+ f"Trajectory: {trajectory}",
166
+ f"Dimensions: {dims_str}",
167
+ f"Recent deltas: {deltas_str}",
168
+ ])
169
+
170
+
171
+ def main():
172
+ parser = argparse.ArgumentParser(description="Generate hard negative training examples")
173
+ parser.add_argument("--output", default=".jfl/v2-data", help="Output directory")
174
+ parser.add_argument("--seed", type=int, default=123, help="Random seed")
175
+ args = parser.parse_args()
176
+
177
+ rng = random.Random(args.seed)
178
+ examples = []
179
+
180
+ for tool_name, entries in HARD_NEGATIVES.items():
181
+ for goal, agent, composite in entries:
182
+ state = generate_state(agent, composite, rng)
183
+ examples.append({
184
+ "current_state": state,
185
+ "goal": goal,
186
+ "correct_tool": tool_name,
187
+ "source": "hard_negative",
188
+ })
189
+
190
+ rng.shuffle(examples)
191
+
192
+ n = len(examples)
193
+ train_end = int(n * 0.7)
194
+ val_end = int(n * 0.85)
195
+
196
+ splits = {
197
+ "train": examples[:train_end],
198
+ "val": examples[train_end:val_end],
199
+ "test": examples[val_end:],
200
+ }
201
+
202
+ os.makedirs(args.output, exist_ok=True)
203
+
204
+ for split_name, split_data in splits.items():
205
+ path = os.path.join(args.output, f"{split_name}.jsonl")
206
+ with open(path, "a") as f:
207
+ for ex in split_data:
208
+ f.write(json.dumps(ex) + "\n")
209
+ print(f" {split_name}: +{len(split_data)} hard negatives → {path}")
210
+
211
+ from collections import Counter
212
+ tool_counts = Counter(ex["correct_tool"] for ex in examples)
213
+ print(f"\nGenerated {n} hard negative examples:")
214
+ for tool, count in tool_counts.most_common():
215
+ print(f" {tool:25s} {count:4d}")
216
+
217
+
218
+ if __name__ == "__main__":
219
+ main()
@@ -1,17 +1,23 @@
1
1
  """
2
2
  v2 Policy Head Inference — CLI script for action selection.
3
3
 
4
+ Uses precomputed embedding cache for fast inference (<500ms).
5
+ Falls back to Stratus API for cache misses only.
6
+
4
7
  Usage:
5
- python infer.py --checkpoint .jfl/checkpoints/best_policy_head.pt --state "..." --goal "..." --top-k 3
8
+ python infer.py --checkpoint path/to/best_policy_head.pt --state "..." --goal "..." --top-k 3 --json
9
+ python infer.py --checkpoint ... --batch (read JSONL from stdin)
6
10
 
7
- Also supports JSON mode for TypeScript bridge:
8
- python infer.py --checkpoint ... --state "..." --goal "..." --json
11
+ Cache:
12
+ Looks for embeddings_cache.npz + text_to_idx.json next to checkpoint or in --cache-dir.
13
+ Cache hit: <50ms total. Cache miss: ~5s per miss (Stratus API call).
9
14
  """
10
15
 
11
16
  import json
12
17
  import os
13
18
  import sys
14
19
  import argparse
20
+ import time
15
21
 
16
22
  import torch
17
23
  import numpy as np
@@ -19,6 +25,80 @@ import numpy as np
19
25
  from model import PolicyHead
20
26
 
21
27
 
28
+ # ============================================================================
29
+ # Embedding Cache
30
+ # ============================================================================
31
+
32
+ class EmbeddingCache:
33
+ """Fast embedding lookup from precomputed cache, with Stratus fallback."""
34
+
35
+ def __init__(self, cache_dir: str = None, api_url: str = None, api_key: str = None):
36
+ self.embeddings = None
37
+ self.text_to_idx = {}
38
+ self.api_url = api_url or os.environ.get("STRATUS_API_URL", "https://api.stratus.run")
39
+ self.api_key = api_key or os.environ.get("STRATUS_API_KEY", "")
40
+ self.hits = 0
41
+ self.misses = 0
42
+
43
+ if cache_dir:
44
+ self._load_cache(cache_dir)
45
+
46
+ def _load_cache(self, cache_dir: str):
47
+ npz_path = os.path.join(cache_dir, "embeddings_cache.npz")
48
+ idx_path = os.path.join(cache_dir, "text_to_idx.json")
49
+
50
+ if os.path.exists(npz_path) and os.path.exists(idx_path):
51
+ data = np.load(npz_path)
52
+ self.embeddings = data["embeddings"]
53
+ self.text_to_idx = json.load(open(idx_path))
54
+ print(f"Loaded embedding cache: {len(self.text_to_idx)} texts, {self.embeddings.shape[1]}-dim", file=sys.stderr)
55
+ else:
56
+ print(f"No embedding cache at {cache_dir}", file=sys.stderr)
57
+
58
+ def get(self, text: str) -> list[float]:
59
+ """Get embedding for text. Cache hit = instant, miss = API call."""
60
+ # Try cache first
61
+ if self.text_to_idx and text in self.text_to_idx:
62
+ idx = self.text_to_idx[text]
63
+ self.hits += 1
64
+ return self.embeddings[idx].tolist()
65
+
66
+ # Cache miss — try API
67
+ self.misses += 1
68
+ if not self.api_key:
69
+ # No API key and no cache hit — return zero vector
70
+ dim = self.embeddings.shape[1] if self.embeddings is not None else 768
71
+ print(f"WARN: Cache miss + no API key for text: {text[:60]}...", file=sys.stderr)
72
+ return [0.0] * dim
73
+
74
+ return self._api_embed(text)
75
+
76
+ def _api_embed(self, text: str) -> list[float]:
77
+ import requests
78
+ response = requests.post(
79
+ f"{self.api_url}/v1/embeddings",
80
+ headers={
81
+ "Authorization": f"Bearer {self.api_key}",
82
+ "Content-Type": "application/json",
83
+ },
84
+ json={
85
+ "model": "stratus-x1ac-base",
86
+ "input": text,
87
+ },
88
+ timeout=15,
89
+ )
90
+ response.raise_for_status()
91
+ data = response.json()
92
+ return data["data"][0]["embedding"]
93
+
94
+ def stats(self) -> dict:
95
+ return {"hits": self.hits, "misses": self.misses, "cache_size": len(self.text_to_idx)}
96
+
97
+
98
+ # ============================================================================
99
+ # Model Loading
100
+ # ============================================================================
101
+
22
102
  def load_model(checkpoint_path: str, device: str = "cpu"):
23
103
  ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
24
104
  config = ckpt["config"]
@@ -40,27 +120,39 @@ def load_model(checkpoint_path: str, device: str = "cpu"):
40
120
  return model, tool_to_index, index_to_tool, config
41
121
 
42
122
 
43
- def get_embedding(text: str, api_url: str, api_key: str) -> list[float]:
44
- import requests
45
-
46
- response = requests.post(
47
- f"{api_url}/v1/embeddings",
48
- headers={
49
- "Authorization": f"Bearer {api_key}",
50
- "Content-Type": "application/json",
51
- },
52
- json={
53
- "model": "stratus-x1ac-base",
54
- "input": text,
55
- },
56
- timeout=15,
57
- )
58
- response.raise_for_status()
59
- data = response.json()
60
- return data["data"][0]["embedding"]
123
+ def find_cache_dir(checkpoint_path: str) -> str | None:
124
+ """Find embedding cache directory. Checks multiple locations."""
125
+ import pathlib
126
+ ckpt_dir = pathlib.Path(checkpoint_path).parent
127
+
128
+ # Check next to checkpoint: .jfl/checkpoints/ → .jfl/v2-data/
129
+ candidates = [
130
+ ckpt_dir.parent / "v2-data", # .jfl/v2-data/
131
+ ckpt_dir / "v2-data", # .jfl/checkpoints/v2-data/
132
+ pathlib.Path.cwd() / ".jfl" / "v2-data", # cwd/.jfl/v2-data/
133
+ ]
134
+
135
+ # Also check EMBEDDING_CACHE_DIR env var
136
+ env_dir = os.environ.get("EMBEDDING_CACHE_DIR")
137
+ if env_dir:
138
+ candidates.insert(0, pathlib.Path(env_dir))
139
+
140
+ for candidate in candidates:
141
+ npz = candidate / "embeddings_cache.npz"
142
+ idx = candidate / "text_to_idx.json"
143
+ if npz.exists() and idx.exists():
144
+ return str(candidate)
145
+
146
+ return None
147
+
61
148
 
149
+ # ============================================================================
150
+ # Inference
151
+ # ============================================================================
62
152
 
63
153
  def infer(args):
154
+ t0 = time.time()
155
+
64
156
  if torch.cuda.is_available():
65
157
  device = "cuda"
66
158
  elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
@@ -69,16 +161,21 @@ def infer(args):
69
161
  device = "cpu"
70
162
 
71
163
  model, tool_to_index, index_to_tool, config = load_model(args.checkpoint, device)
164
+ t_model = time.time()
165
+
166
+ # Load embedding cache
167
+ cache_dir = args.cache_dir or find_cache_dir(args.checkpoint)
168
+ cache = EmbeddingCache(
169
+ cache_dir=cache_dir,
170
+ api_url=os.environ.get("STRATUS_API_URL", "https://api.stratus.run"),
171
+ api_key=os.environ.get("STRATUS_API_KEY", ""),
172
+ )
173
+ t_cache = time.time()
72
174
 
73
- api_url = os.environ.get("STRATUS_API_URL", "https://api.stratus.run")
74
- api_key = os.environ.get("STRATUS_API_KEY", "")
75
-
76
- if not api_key:
77
- print("STRATUS_API_KEY not set", file=sys.stderr)
78
- sys.exit(1)
79
-
80
- state_emb = get_embedding(args.state, api_url, api_key)
81
- goal_emb = get_embedding(args.goal, api_url, api_key)
175
+ # Get embeddings (cache hit = instant, miss = API call)
176
+ state_emb = cache.get(args.state)
177
+ goal_emb = cache.get(args.goal)
178
+ t_embed = time.time()
82
179
 
83
180
  state_tensor = torch.tensor([state_emb], dtype=torch.float32).to(device)
84
181
  goal_tensor = torch.tensor([goal_emb], dtype=torch.float32).to(device)
@@ -87,6 +184,7 @@ def infer(args):
87
184
 
88
185
  top_indices = result["top_k_indices"][0].cpu().tolist()
89
186
  top_probs = result["top_k_probs"][0].cpu().tolist()
187
+ t_infer = time.time()
90
188
 
91
189
  predictions = []
92
190
  for idx, prob in zip(top_indices, top_probs):
@@ -101,10 +199,16 @@ def infer(args):
101
199
  "confidence": predictions[0]["confidence"],
102
200
  "alternatives": predictions[1:],
103
201
  }
202
+ # Include timing in stderr for debugging
203
+ stats = cache.stats()
204
+ timing = f"model={t_model-t0:.1f}s cache={t_cache-t_model:.1f}s embed={t_embed-t_cache:.1f}s infer={t_infer-t_embed:.1f}s total={t_infer-t0:.1f}s hits={stats['hits']} misses={stats['misses']}"
205
+ print(f"timing: {timing}", file=sys.stderr)
104
206
  print(json.dumps(output))
105
207
  else:
106
- print(f"\nv2 Policy Head Prediction")
107
- print(f"{'─' * 40}")
208
+ t_total = t_infer - t0
209
+ stats = cache.stats()
210
+ print(f"\nv2 Policy Head Prediction ({t_total:.2f}s, {stats['hits']} cache hits, {stats['misses']} misses)")
211
+ print(f"{'─' * 50}")
108
212
  print(f"State: {args.state[:80]}...")
109
213
  print(f"Goal: {args.goal[:80]}...")
110
214
  print(f"\nTop {args.top_k} actions:")
@@ -125,8 +229,13 @@ def batch_infer(args):
125
229
 
126
230
  model, tool_to_index, index_to_tool, config = load_model(args.checkpoint, device)
127
231
 
128
- api_url = os.environ.get("STRATUS_API_URL", "https://api.stratus.run")
129
- api_key = os.environ.get("STRATUS_API_KEY", "")
232
+ # Load embedding cache
233
+ cache_dir = args.cache_dir or find_cache_dir(args.checkpoint)
234
+ cache = EmbeddingCache(
235
+ cache_dir=cache_dir,
236
+ api_url=os.environ.get("STRATUS_API_URL", "https://api.stratus.run"),
237
+ api_key=os.environ.get("STRATUS_API_KEY", ""),
238
+ )
130
239
 
131
240
  for line in sys.stdin:
132
241
  line = line.strip()
@@ -135,8 +244,8 @@ def batch_infer(args):
135
244
 
136
245
  try:
137
246
  req = json.loads(line)
138
- state_emb = get_embedding(req["state"], api_url, api_key)
139
- goal_emb = get_embedding(req["goal"], api_url, api_key)
247
+ state_emb = cache.get(req["state"])
248
+ goal_emb = cache.get(req["goal"])
140
249
 
141
250
  state_tensor = torch.tensor([state_emb], dtype=torch.float32).to(device)
142
251
  goal_tensor = torch.tensor([goal_emb], dtype=torch.float32).to(device)
@@ -164,6 +273,9 @@ def batch_infer(args):
164
273
  print(json.dumps({"error": str(e)}))
165
274
  sys.stdout.flush()
166
275
 
276
+ stats = cache.stats()
277
+ print(f"Batch complete: {stats['hits']} hits, {stats['misses']} misses", file=sys.stderr)
278
+
167
279
 
168
280
  def main():
169
281
  parser = argparse.ArgumentParser(description="v2 policy head inference")
@@ -173,6 +285,7 @@ def main():
173
285
  parser.add_argument("--top-k", type=int, default=3, help="Number of top actions")
174
286
  parser.add_argument("--json", action="store_true", help="JSON output for TypeScript bridge")
175
287
  parser.add_argument("--batch", action="store_true", help="Batch mode: read JSONL from stdin")
288
+ parser.add_argument("--cache-dir", default=None, help="Directory with embeddings_cache.npz + text_to_idx.json")
176
289
  args = parser.parse_args()
177
290
 
178
291
  if args.batch: