jfl 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/dist/commands/doctor.d.ts +1 -0
  2. package/dist/commands/doctor.d.ts.map +1 -1
  3. package/dist/commands/doctor.js +30 -1
  4. package/dist/commands/doctor.js.map +1 -1
  5. package/dist/commands/ide.d.ts +2 -1
  6. package/dist/commands/ide.d.ts.map +1 -1
  7. package/dist/commands/ide.js +60 -1
  8. package/dist/commands/ide.js.map +1 -1
  9. package/dist/commands/init-from-service.d.ts +15 -0
  10. package/dist/commands/init-from-service.d.ts.map +1 -0
  11. package/dist/commands/init-from-service.js +541 -0
  12. package/dist/commands/init-from-service.js.map +1 -0
  13. package/dist/commands/init.d.ts +1 -0
  14. package/dist/commands/init.d.ts.map +1 -1
  15. package/dist/commands/init.js +32 -1
  16. package/dist/commands/init.js.map +1 -1
  17. package/dist/commands/kanban.d.ts.map +1 -1
  18. package/dist/commands/kanban.js +13 -4
  19. package/dist/commands/kanban.js.map +1 -1
  20. package/dist/commands/linear.d.ts +41 -0
  21. package/dist/commands/linear.d.ts.map +1 -0
  22. package/dist/commands/linear.js +715 -0
  23. package/dist/commands/linear.js.map +1 -0
  24. package/dist/commands/peter.d.ts.map +1 -1
  25. package/dist/commands/peter.js +232 -25
  26. package/dist/commands/peter.js.map +1 -1
  27. package/dist/commands/services.d.ts.map +1 -1
  28. package/dist/commands/services.js +146 -0
  29. package/dist/commands/services.js.map +1 -1
  30. package/dist/commands/setup.d.ts.map +1 -1
  31. package/dist/commands/setup.js +173 -13
  32. package/dist/commands/setup.js.map +1 -1
  33. package/dist/commands/telemetry-monitor.d.ts +11 -0
  34. package/dist/commands/telemetry-monitor.d.ts.map +1 -0
  35. package/dist/commands/telemetry-monitor.js +224 -0
  36. package/dist/commands/telemetry-monitor.js.map +1 -0
  37. package/dist/commands/telemetry-test.d.ts +11 -0
  38. package/dist/commands/telemetry-test.d.ts.map +1 -0
  39. package/dist/commands/telemetry-test.js +67 -0
  40. package/dist/commands/telemetry-test.js.map +1 -0
  41. package/dist/commands/tenet-agents.d.ts +13 -0
  42. package/dist/commands/tenet-agents.d.ts.map +1 -0
  43. package/dist/commands/tenet-agents.js +191 -0
  44. package/dist/commands/tenet-agents.js.map +1 -0
  45. package/dist/commands/tenet-setup.d.ts +19 -0
  46. package/dist/commands/tenet-setup.d.ts.map +1 -0
  47. package/dist/commands/tenet-setup.js +131 -0
  48. package/dist/commands/tenet-setup.js.map +1 -0
  49. package/dist/commands/train.d.ts +18 -0
  50. package/dist/commands/train.d.ts.map +1 -1
  51. package/dist/commands/train.js +182 -0
  52. package/dist/commands/train.js.map +1 -1
  53. package/dist/commands/whoami.d.ts +2 -0
  54. package/dist/commands/whoami.d.ts.map +1 -0
  55. package/dist/commands/whoami.js +24 -0
  56. package/dist/commands/whoami.js.map +1 -0
  57. package/dist/index.js +159 -10
  58. package/dist/index.js.map +1 -1
  59. package/dist/lib/advanced-setup.d.ts +78 -0
  60. package/dist/lib/advanced-setup.d.ts.map +1 -0
  61. package/dist/lib/advanced-setup.js +433 -0
  62. package/dist/lib/advanced-setup.js.map +1 -0
  63. package/dist/lib/agent-config.d.ts +33 -0
  64. package/dist/lib/agent-config.d.ts.map +1 -1
  65. package/dist/lib/agent-config.js +26 -0
  66. package/dist/lib/agent-config.js.map +1 -1
  67. package/dist/lib/counterfactual-training-bridge.d.ts +114 -0
  68. package/dist/lib/counterfactual-training-bridge.d.ts.map +1 -0
  69. package/dist/lib/counterfactual-training-bridge.js +322 -0
  70. package/dist/lib/counterfactual-training-bridge.js.map +1 -0
  71. package/dist/lib/discovery-agent.d.ts +48 -0
  72. package/dist/lib/discovery-agent.d.ts.map +1 -0
  73. package/dist/lib/discovery-agent.js +111 -0
  74. package/dist/lib/discovery-agent.js.map +1 -0
  75. package/dist/lib/flow-engine.d.ts.map +1 -1
  76. package/dist/lib/flow-engine.js +46 -8
  77. package/dist/lib/flow-engine.js.map +1 -1
  78. package/dist/lib/gtm-generator.d.ts +29 -0
  79. package/dist/lib/gtm-generator.d.ts.map +1 -0
  80. package/dist/lib/gtm-generator.js +252 -0
  81. package/dist/lib/gtm-generator.js.map +1 -0
  82. package/dist/lib/hub-health.d.ts +40 -0
  83. package/dist/lib/hub-health.d.ts.map +1 -0
  84. package/dist/lib/hub-health.js +89 -0
  85. package/dist/lib/hub-health.js.map +1 -0
  86. package/dist/lib/invariant-monitor.d.ts +6 -2
  87. package/dist/lib/invariant-monitor.d.ts.map +1 -1
  88. package/dist/lib/invariant-monitor.js +89 -2
  89. package/dist/lib/invariant-monitor.js.map +1 -1
  90. package/dist/lib/journal-analyzer.d.ts +71 -0
  91. package/dist/lib/journal-analyzer.d.ts.map +1 -0
  92. package/dist/lib/journal-analyzer.js +306 -0
  93. package/dist/lib/journal-analyzer.js.map +1 -0
  94. package/dist/lib/linear-client.d.ts +73 -0
  95. package/dist/lib/linear-client.d.ts.map +1 -0
  96. package/dist/lib/linear-client.js +112 -0
  97. package/dist/lib/linear-client.js.map +1 -0
  98. package/dist/lib/linear-id-map.d.ts +20 -0
  99. package/dist/lib/linear-id-map.d.ts.map +1 -0
  100. package/dist/lib/linear-id-map.js +57 -0
  101. package/dist/lib/linear-id-map.js.map +1 -0
  102. package/dist/lib/linear-kanban.d.ts +66 -0
  103. package/dist/lib/linear-kanban.d.ts.map +1 -0
  104. package/dist/lib/linear-kanban.js +175 -0
  105. package/dist/lib/linear-kanban.js.map +1 -0
  106. package/dist/lib/onboarding.d.ts +40 -0
  107. package/dist/lib/onboarding.d.ts.map +1 -0
  108. package/dist/lib/onboarding.js +213 -0
  109. package/dist/lib/onboarding.js.map +1 -0
  110. package/dist/lib/physical-world-model.d.ts +50 -0
  111. package/dist/lib/physical-world-model.d.ts.map +1 -0
  112. package/dist/lib/physical-world-model.js +251 -0
  113. package/dist/lib/physical-world-model.js.map +1 -0
  114. package/dist/lib/planning-loop.d.ts +157 -0
  115. package/dist/lib/planning-loop.d.ts.map +1 -0
  116. package/dist/lib/planning-loop.js +537 -0
  117. package/dist/lib/planning-loop.js.map +1 -0
  118. package/dist/lib/policy-head.d.ts +13 -0
  119. package/dist/lib/policy-head.d.ts.map +1 -1
  120. package/dist/lib/policy-head.js +168 -2
  121. package/dist/lib/policy-head.js.map +1 -1
  122. package/dist/lib/resource-optimizer-middleware.d.ts +39 -0
  123. package/dist/lib/resource-optimizer-middleware.d.ts.map +1 -0
  124. package/dist/lib/resource-optimizer-middleware.js +222 -0
  125. package/dist/lib/resource-optimizer-middleware.js.map +1 -0
  126. package/dist/lib/resource-optimizer.d.ts +71 -0
  127. package/dist/lib/resource-optimizer.d.ts.map +1 -0
  128. package/dist/lib/resource-optimizer.js +228 -0
  129. package/dist/lib/resource-optimizer.js.map +1 -0
  130. package/dist/lib/rl-manager.d.ts +74 -0
  131. package/dist/lib/rl-manager.d.ts.map +1 -0
  132. package/dist/lib/rl-manager.js +244 -0
  133. package/dist/lib/rl-manager.js.map +1 -0
  134. package/dist/lib/service-analyzer.d.ts +76 -0
  135. package/dist/lib/service-analyzer.d.ts.map +1 -0
  136. package/dist/lib/service-analyzer.js +704 -0
  137. package/dist/lib/service-analyzer.js.map +1 -0
  138. package/dist/lib/service-gtm.js +2 -2
  139. package/dist/lib/service-gtm.js.map +1 -1
  140. package/dist/lib/service-questionnaire.d.ts +11 -0
  141. package/dist/lib/service-questionnaire.d.ts.map +1 -0
  142. package/dist/lib/service-questionnaire.js +89 -0
  143. package/dist/lib/service-questionnaire.js.map +1 -0
  144. package/dist/lib/setup/agent-generator.d.ts +2 -0
  145. package/dist/lib/setup/agent-generator.d.ts.map +1 -1
  146. package/dist/lib/setup/agent-generator.js +128 -4
  147. package/dist/lib/setup/agent-generator.js.map +1 -1
  148. package/dist/lib/setup/flow-generator.d.ts +10 -0
  149. package/dist/lib/setup/flow-generator.d.ts.map +1 -0
  150. package/dist/lib/setup/flow-generator.js +113 -0
  151. package/dist/lib/setup/flow-generator.js.map +1 -0
  152. package/dist/lib/setup/invariant-bridge.d.ts +91 -0
  153. package/dist/lib/setup/invariant-bridge.d.ts.map +1 -0
  154. package/dist/lib/setup/invariant-bridge.js +384 -0
  155. package/dist/lib/setup/invariant-bridge.js.map +1 -0
  156. package/dist/lib/setup/spec-generator.d.ts +41 -5
  157. package/dist/lib/setup/spec-generator.d.ts.map +1 -1
  158. package/dist/lib/setup/spec-generator.js +503 -29
  159. package/dist/lib/setup/spec-generator.js.map +1 -1
  160. package/dist/lib/stratus-client.js +1 -1
  161. package/dist/lib/stratus-client.js.map +1 -1
  162. package/dist/lib/surface-agent.d.ts +78 -0
  163. package/dist/lib/surface-agent.d.ts.map +1 -0
  164. package/dist/lib/surface-agent.js +105 -0
  165. package/dist/lib/surface-agent.js.map +1 -0
  166. package/dist/lib/surface-coordination-example.d.ts +30 -0
  167. package/dist/lib/surface-coordination-example.d.ts.map +1 -0
  168. package/dist/lib/surface-coordination-example.js +164 -0
  169. package/dist/lib/surface-coordination-example.js.map +1 -0
  170. package/dist/lib/telemetry/physical-world-collector.d.ts +15 -0
  171. package/dist/lib/telemetry/physical-world-collector.d.ts.map +1 -0
  172. package/dist/lib/telemetry/physical-world-collector.js +177 -0
  173. package/dist/lib/telemetry/physical-world-collector.js.map +1 -0
  174. package/dist/lib/telemetry/training-bridge.d.ts +51 -0
  175. package/dist/lib/telemetry/training-bridge.d.ts.map +1 -0
  176. package/dist/lib/telemetry/training-bridge.js +185 -0
  177. package/dist/lib/telemetry/training-bridge.js.map +1 -0
  178. package/dist/lib/telemetry.d.ts +2 -1
  179. package/dist/lib/telemetry.d.ts.map +1 -1
  180. package/dist/lib/telemetry.js +23 -2
  181. package/dist/lib/telemetry.js.map +1 -1
  182. package/dist/lib/tenet-board-agent.d.ts +52 -0
  183. package/dist/lib/tenet-board-agent.d.ts.map +1 -0
  184. package/dist/lib/tenet-board-agent.js +226 -0
  185. package/dist/lib/tenet-board-agent.js.map +1 -0
  186. package/dist/lib/tenet-ide-agent.d.ts +40 -0
  187. package/dist/lib/tenet-ide-agent.d.ts.map +1 -0
  188. package/dist/lib/tenet-ide-agent.js +199 -0
  189. package/dist/lib/tenet-ide-agent.js.map +1 -0
  190. package/dist/lib/workspace/data-pipeline.d.ts.map +1 -1
  191. package/dist/lib/workspace/data-pipeline.js +27 -5
  192. package/dist/lib/workspace/data-pipeline.js.map +1 -1
  193. package/dist/lib/workspace/sidebar-runner.d.ts +13 -0
  194. package/dist/lib/workspace/sidebar-runner.d.ts.map +1 -0
  195. package/dist/lib/workspace/sidebar-runner.js +419 -0
  196. package/dist/lib/workspace/sidebar-runner.js.map +1 -0
  197. package/dist/lib/workspace/surface-registry.d.ts.map +1 -1
  198. package/dist/lib/workspace/surface-registry.js +4 -1
  199. package/dist/lib/workspace/surface-registry.js.map +1 -1
  200. package/dist/lib/workspace/surfaces/agent-overview.d.ts +3 -3
  201. package/dist/lib/workspace/surfaces/agent-overview.d.ts.map +1 -1
  202. package/dist/lib/workspace/surfaces/agent-overview.js +3 -3
  203. package/dist/lib/workspace/surfaces/agent-overview.js.map +1 -1
  204. package/dist/lib/workspace/surfaces/index.d.ts +3 -0
  205. package/dist/lib/workspace/surfaces/index.d.ts.map +1 -1
  206. package/dist/lib/workspace/surfaces/index.js +3 -0
  207. package/dist/lib/workspace/surfaces/index.js.map +1 -1
  208. package/dist/lib/workspace/surfaces/kanban.d.ts +15 -0
  209. package/dist/lib/workspace/surfaces/kanban.d.ts.map +1 -0
  210. package/dist/lib/workspace/surfaces/kanban.js +43 -0
  211. package/dist/lib/workspace/surfaces/kanban.js.map +1 -0
  212. package/dist/lib/workspace/surfaces/physical-world.d.ts +15 -0
  213. package/dist/lib/workspace/surfaces/physical-world.d.ts.map +1 -0
  214. package/dist/lib/workspace/surfaces/physical-world.js +37 -0
  215. package/dist/lib/workspace/surfaces/physical-world.js.map +1 -0
  216. package/dist/lib/workspace/surfaces/sidebar.d.ts +22 -0
  217. package/dist/lib/workspace/surfaces/sidebar.d.ts.map +1 -0
  218. package/dist/lib/workspace/surfaces/sidebar.js +90 -0
  219. package/dist/lib/workspace/surfaces/sidebar.js.map +1 -0
  220. package/dist/types/flows.d.ts +2 -1
  221. package/dist/types/flows.d.ts.map +1 -1
  222. package/dist/types/physical-world-model.d.ts +65 -0
  223. package/dist/types/physical-world-model.d.ts.map +1 -0
  224. package/dist/types/physical-world-model.js +43 -0
  225. package/dist/types/physical-world-model.js.map +1 -0
  226. package/dist/types/telemetry.d.ts +37 -0
  227. package/dist/types/telemetry.d.ts.map +1 -1
  228. package/dist/types/world-model.d.ts.map +1 -1
  229. package/dist/types/world-model.js +14 -7
  230. package/dist/types/world-model.js.map +1 -1
  231. package/dist/utils/context-hub-port.d.ts.map +1 -1
  232. package/dist/utils/context-hub-port.js +6 -1
  233. package/dist/utils/context-hub-port.js.map +1 -1
  234. package/package.json +3 -2
  235. package/packages/pi/extensions/index.ts +34 -6
  236. package/scripts/telemetry-dashboard.sh +44 -0
  237. package/scripts/test-planning-loop-e2e.ts +181 -0
  238. package/scripts/test-server-inference.ts +49 -0
  239. package/scripts/test-state-sensitivity.ts +32 -0
  240. package/scripts/train/v2/benchmark.py +661 -0
  241. package/scripts/train/v2/generate_balanced.py +439 -0
  242. package/scripts/train/v2/generate_hard_negatives.py +219 -0
  243. package/scripts/train/v2/infer.py +149 -36
  244. package/scripts/train/v2/infer_server.py +224 -0
  245. package/scripts/train/v2/online_train.py +576 -0
  246. package/scripts/train/v2/precompute.py +24 -6
  247. package/template/CLAUDE.md +74 -132
@@ -0,0 +1,224 @@
1
+ """
2
+ v2 Policy Head Inference Server — keeps model + cache warm in memory.
3
+
4
+ Communicates via stdin/stdout JSONL (one request per line, one response per line).
5
+ Eliminates ~3s model loading + ~0.1s cache loading per inference call.
6
+
7
+ Start: python infer_server.py --checkpoint path/to/best_policy_head.pt
8
+ Send: {"state": "...", "goal": "...", "top_k": 3}
9
+ Recv: {"action": "fix_bug", "confidence": 0.94, "alternatives": [...]}
10
+
11
+ Special commands:
12
+ {"cmd": "ping"} → {"status": "ok", "cache_hits": N, "cache_misses": N}
13
+ {"cmd": "quit"} → server exits
14
+ """
15
+
16
+ import json
17
+ import os
18
+ import sys
19
+ import time
20
+
21
+ import torch
22
+ import numpy as np
23
+
24
+ # Add script directory to path for model import
25
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
26
+ from model import PolicyHead
27
+
28
+
29
+ class InferenceServer:
30
+ def __init__(self, checkpoint_path: str, cache_dir: str = None):
31
+ self.hits = 0
32
+ self.misses = 0
33
+ self.inferences = 0
34
+
35
+ # Device
36
+ if torch.cuda.is_available():
37
+ self.device = "cuda"
38
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
39
+ self.device = "mps"
40
+ else:
41
+ self.device = "cpu"
42
+
43
+ # Load model
44
+ t0 = time.time()
45
+ ckpt = torch.load(checkpoint_path, map_location=self.device, weights_only=False)
46
+ config = ckpt["config"]
47
+ self.tool_to_index = ckpt["tool_to_index"]
48
+ self.index_to_tool = {v: k for k, v in self.tool_to_index.items()}
49
+
50
+ self.model = PolicyHead(
51
+ embedding_dim=config["embedding_dim"],
52
+ hidden_dim=config["hidden_dim"],
53
+ num_tools=ckpt["num_tools"],
54
+ num_layers=config["num_layers"],
55
+ num_heads=config["num_heads"],
56
+ dropout=config.get("dropout", 0.1),
57
+ ).to(self.device)
58
+ self.model.load_state_dict(ckpt["model_state_dict"])
59
+ self.model.eval()
60
+ self.embedding_dim = config["embedding_dim"]
61
+ t_model = time.time()
62
+
63
+ # Load embedding cache
64
+ self.text_to_idx = {}
65
+ self.embeddings = None
66
+ if cache_dir:
67
+ npz_path = os.path.join(cache_dir, "embeddings_cache.npz")
68
+ idx_path = os.path.join(cache_dir, "text_to_idx.json")
69
+ if os.path.exists(npz_path) and os.path.exists(idx_path):
70
+ data = np.load(npz_path)
71
+ self.embeddings = data["embeddings"]
72
+ self.text_to_idx = json.load(open(idx_path))
73
+ t_cache = time.time()
74
+
75
+ # In-memory cache for API-fetched embeddings (survives across requests)
76
+ self.live_cache: dict[str, list[float]] = {}
77
+
78
+ # Stratus API config
79
+ self.api_url = os.environ.get("STRATUS_API_URL", "https://api.stratus.run")
80
+ self.api_key = os.environ.get("STRATUS_API_KEY", "")
81
+
82
+ print(json.dumps({
83
+ "status": "ready",
84
+ "device": self.device,
85
+ "cache_size": len(self.text_to_idx),
86
+ "model_load_ms": int((t_model - t0) * 1000),
87
+ "cache_load_ms": int((t_cache - t_model) * 1000),
88
+ }), flush=True)
89
+
90
+ def get_embedding(self, text: str) -> list[float]:
91
+ """Get embedding: file cache → live cache → API → zero vector."""
92
+ # 1. File cache (precomputed)
93
+ if text in self.text_to_idx:
94
+ self.hits += 1
95
+ idx = self.text_to_idx[text]
96
+ return self.embeddings[idx].tolist()
97
+
98
+ # 2. Live cache (API results from this session)
99
+ if text in self.live_cache:
100
+ self.hits += 1
101
+ return self.live_cache[text]
102
+
103
+ # 3. API call
104
+ self.misses += 1
105
+ if self.api_key:
106
+ try:
107
+ import requests
108
+ response = requests.post(
109
+ f"{self.api_url}/v1/embeddings",
110
+ headers={
111
+ "Authorization": f"Bearer {self.api_key}",
112
+ "Content-Type": "application/json",
113
+ },
114
+ json={"model": "stratus-x1ac-base", "input": text},
115
+ timeout=15,
116
+ )
117
+ response.raise_for_status()
118
+ emb = response.json()["data"][0]["embedding"]
119
+ self.live_cache[text] = emb # Cache for future requests
120
+ return emb
121
+ except Exception as e:
122
+ print(json.dumps({"warning": f"API embed failed: {str(e)[:100]}"}), flush=True)
123
+
124
+ # 4. Zero vector fallback
125
+ return [0.0] * self.embedding_dim
126
+
127
+ def infer(self, state_text: str, goal_text: str, top_k: int = 3) -> dict:
128
+ t0 = time.time()
129
+
130
+ state_emb = self.get_embedding(state_text)
131
+ goal_emb = self.get_embedding(goal_text)
132
+ t_embed = time.time()
133
+
134
+ state_tensor = torch.tensor([state_emb], dtype=torch.float32).to(self.device)
135
+ goal_tensor = torch.tensor([goal_emb], dtype=torch.float32).to(self.device)
136
+
137
+ with torch.no_grad():
138
+ result = self.model.predict(state_tensor, goal_tensor, top_k=top_k)
139
+
140
+ top_indices = result["top_k_indices"][0].cpu().tolist()
141
+ top_probs = result["top_k_probs"][0].cpu().tolist()
142
+ t_infer = time.time()
143
+
144
+ self.inferences += 1
145
+
146
+ predictions = []
147
+ for idx, prob in zip(top_indices, top_probs):
148
+ predictions.append({
149
+ "action": self.index_to_tool[idx],
150
+ "confidence": round(prob, 4),
151
+ })
152
+
153
+ return {
154
+ "action": predictions[0]["action"],
155
+ "confidence": predictions[0]["confidence"],
156
+ "alternatives": predictions[1:],
157
+ "_timing_ms": {
158
+ "embed": int((t_embed - t0) * 1000),
159
+ "infer": int((t_infer - t_embed) * 1000),
160
+ "total": int((t_infer - t0) * 1000),
161
+ },
162
+ }
163
+
164
+ def run(self):
165
+ """Main loop: read JSONL from stdin, write JSONL to stdout."""
166
+ for line in sys.stdin:
167
+ line = line.strip()
168
+ if not line:
169
+ continue
170
+
171
+ try:
172
+ req = json.loads(line)
173
+
174
+ # Handle special commands
175
+ if "cmd" in req:
176
+ if req["cmd"] == "ping":
177
+ print(json.dumps({
178
+ "status": "ok",
179
+ "inferences": self.inferences,
180
+ "cache_hits": self.hits,
181
+ "cache_misses": self.misses,
182
+ "live_cache_size": len(self.live_cache),
183
+ }), flush=True)
184
+ continue
185
+ elif req["cmd"] == "quit":
186
+ break
187
+ else:
188
+ print(json.dumps({"error": f"Unknown command: {req['cmd']}"}), flush=True)
189
+ continue
190
+
191
+ # Regular inference
192
+ state = req.get("state", "")
193
+ goal = req.get("goal", "")
194
+ top_k = req.get("top_k", 3)
195
+
196
+ result = self.infer(state, goal, top_k)
197
+ print(json.dumps(result), flush=True)
198
+
199
+ except Exception as e:
200
+ print(json.dumps({"error": str(e)}), flush=True)
201
+
202
+
203
+ def main():
204
+ import argparse
205
+ parser = argparse.ArgumentParser(description="v2 policy head inference server")
206
+ parser.add_argument("--checkpoint", required=True, help="Path to .pt checkpoint")
207
+ parser.add_argument("--cache-dir", default=None, help="Embedding cache directory")
208
+ args = parser.parse_args()
209
+
210
+ # Auto-find cache dir
211
+ if not args.cache_dir:
212
+ import pathlib
213
+ ckpt_dir = pathlib.Path(args.checkpoint).parent
214
+ for candidate in [ckpt_dir.parent / "v2-data", pathlib.Path.cwd() / ".jfl" / "v2-data"]:
215
+ if (candidate / "embeddings_cache.npz").exists():
216
+ args.cache_dir = str(candidate)
217
+ break
218
+
219
+ server = InferenceServer(args.checkpoint, args.cache_dir)
220
+ server.run()
221
+
222
+
223
+ if __name__ == "__main__":
224
+ main()