@peakinfer/cli 1.0.133

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. package/.claude/settings.local.json +8 -0
  2. package/.env.example +6 -0
  3. package/.github/workflows/peakinfer.yml +64 -0
  4. package/CHANGELOG.md +31 -0
  5. package/LICENSE +190 -0
  6. package/README.md +335 -0
  7. package/data/inferencemax.json +274 -0
  8. package/dist/agent-analyzer.d.ts +45 -0
  9. package/dist/agent-analyzer.d.ts.map +1 -0
  10. package/dist/agent-analyzer.js +374 -0
  11. package/dist/agent-analyzer.js.map +1 -0
  12. package/dist/agent.d.ts +76 -0
  13. package/dist/agent.d.ts.map +1 -0
  14. package/dist/agent.js +965 -0
  15. package/dist/agent.js.map +1 -0
  16. package/dist/agents/correlation-analyzer.d.ts +34 -0
  17. package/dist/agents/correlation-analyzer.d.ts.map +1 -0
  18. package/dist/agents/correlation-analyzer.js +261 -0
  19. package/dist/agents/correlation-analyzer.js.map +1 -0
  20. package/dist/agents/index.d.ts +91 -0
  21. package/dist/agents/index.d.ts.map +1 -0
  22. package/dist/agents/index.js +111 -0
  23. package/dist/agents/index.js.map +1 -0
  24. package/dist/agents/runtime-analyzer.d.ts +38 -0
  25. package/dist/agents/runtime-analyzer.d.ts.map +1 -0
  26. package/dist/agents/runtime-analyzer.js +244 -0
  27. package/dist/agents/runtime-analyzer.js.map +1 -0
  28. package/dist/analysis-types.d.ts +500 -0
  29. package/dist/analysis-types.d.ts.map +1 -0
  30. package/dist/analysis-types.js +11 -0
  31. package/dist/analysis-types.js.map +1 -0
  32. package/dist/analytics.d.ts +25 -0
  33. package/dist/analytics.d.ts.map +1 -0
  34. package/dist/analytics.js +94 -0
  35. package/dist/analytics.js.map +1 -0
  36. package/dist/analyzer.d.ts +48 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +547 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/artifacts.d.ts +44 -0
  41. package/dist/artifacts.d.ts.map +1 -0
  42. package/dist/artifacts.js +165 -0
  43. package/dist/artifacts.js.map +1 -0
  44. package/dist/benchmarks/index.d.ts +88 -0
  45. package/dist/benchmarks/index.d.ts.map +1 -0
  46. package/dist/benchmarks/index.js +205 -0
  47. package/dist/benchmarks/index.js.map +1 -0
  48. package/dist/cli.d.ts +3 -0
  49. package/dist/cli.d.ts.map +1 -0
  50. package/dist/cli.js +427 -0
  51. package/dist/cli.js.map +1 -0
  52. package/dist/commands/ci.d.ts +19 -0
  53. package/dist/commands/ci.d.ts.map +1 -0
  54. package/dist/commands/ci.js +253 -0
  55. package/dist/commands/ci.js.map +1 -0
  56. package/dist/commands/config.d.ts +16 -0
  57. package/dist/commands/config.d.ts.map +1 -0
  58. package/dist/commands/config.js +249 -0
  59. package/dist/commands/config.js.map +1 -0
  60. package/dist/commands/demo.d.ts +15 -0
  61. package/dist/commands/demo.d.ts.map +1 -0
  62. package/dist/commands/demo.js +106 -0
  63. package/dist/commands/demo.js.map +1 -0
  64. package/dist/commands/export.d.ts +14 -0
  65. package/dist/commands/export.d.ts.map +1 -0
  66. package/dist/commands/export.js +209 -0
  67. package/dist/commands/export.js.map +1 -0
  68. package/dist/commands/history.d.ts +15 -0
  69. package/dist/commands/history.d.ts.map +1 -0
  70. package/dist/commands/history.js +389 -0
  71. package/dist/commands/history.js.map +1 -0
  72. package/dist/commands/template.d.ts +14 -0
  73. package/dist/commands/template.d.ts.map +1 -0
  74. package/dist/commands/template.js +341 -0
  75. package/dist/commands/template.js.map +1 -0
  76. package/dist/commands/validate-map.d.ts +12 -0
  77. package/dist/commands/validate-map.d.ts.map +1 -0
  78. package/dist/commands/validate-map.js +274 -0
  79. package/dist/commands/validate-map.js.map +1 -0
  80. package/dist/commands/whatif.d.ts +17 -0
  81. package/dist/commands/whatif.d.ts.map +1 -0
  82. package/dist/commands/whatif.js +206 -0
  83. package/dist/commands/whatif.js.map +1 -0
  84. package/dist/comparison.d.ts +38 -0
  85. package/dist/comparison.d.ts.map +1 -0
  86. package/dist/comparison.js +223 -0
  87. package/dist/comparison.js.map +1 -0
  88. package/dist/config.d.ts +42 -0
  89. package/dist/config.d.ts.map +1 -0
  90. package/dist/config.js +158 -0
  91. package/dist/config.js.map +1 -0
  92. package/dist/connectors/helicone.d.ts +9 -0
  93. package/dist/connectors/helicone.d.ts.map +1 -0
  94. package/dist/connectors/helicone.js +106 -0
  95. package/dist/connectors/helicone.js.map +1 -0
  96. package/dist/connectors/index.d.ts +37 -0
  97. package/dist/connectors/index.d.ts.map +1 -0
  98. package/dist/connectors/index.js +65 -0
  99. package/dist/connectors/index.js.map +1 -0
  100. package/dist/connectors/langsmith.d.ts +9 -0
  101. package/dist/connectors/langsmith.d.ts.map +1 -0
  102. package/dist/connectors/langsmith.js +122 -0
  103. package/dist/connectors/langsmith.js.map +1 -0
  104. package/dist/connectors/types.d.ts +83 -0
  105. package/dist/connectors/types.d.ts.map +1 -0
  106. package/dist/connectors/types.js +98 -0
  107. package/dist/connectors/types.js.map +1 -0
  108. package/dist/cost-estimator.d.ts +46 -0
  109. package/dist/cost-estimator.d.ts.map +1 -0
  110. package/dist/cost-estimator.js +104 -0
  111. package/dist/cost-estimator.js.map +1 -0
  112. package/dist/costs.d.ts +57 -0
  113. package/dist/costs.d.ts.map +1 -0
  114. package/dist/costs.js +251 -0
  115. package/dist/costs.js.map +1 -0
  116. package/dist/counterfactuals.d.ts +29 -0
  117. package/dist/counterfactuals.d.ts.map +1 -0
  118. package/dist/counterfactuals.js +448 -0
  119. package/dist/counterfactuals.js.map +1 -0
  120. package/dist/enhancement-prompts.d.ts +41 -0
  121. package/dist/enhancement-prompts.d.ts.map +1 -0
  122. package/dist/enhancement-prompts.js +88 -0
  123. package/dist/enhancement-prompts.js.map +1 -0
  124. package/dist/envelopes.d.ts +20 -0
  125. package/dist/envelopes.d.ts.map +1 -0
  126. package/dist/envelopes.js +790 -0
  127. package/dist/envelopes.js.map +1 -0
  128. package/dist/format-normalizer.d.ts +71 -0
  129. package/dist/format-normalizer.d.ts.map +1 -0
  130. package/dist/format-normalizer.js +1331 -0
  131. package/dist/format-normalizer.js.map +1 -0
  132. package/dist/history.d.ts +79 -0
  133. package/dist/history.d.ts.map +1 -0
  134. package/dist/history.js +313 -0
  135. package/dist/history.js.map +1 -0
  136. package/dist/html.d.ts +11 -0
  137. package/dist/html.d.ts.map +1 -0
  138. package/dist/html.js +463 -0
  139. package/dist/html.js.map +1 -0
  140. package/dist/impact.d.ts +42 -0
  141. package/dist/impact.d.ts.map +1 -0
  142. package/dist/impact.js +443 -0
  143. package/dist/impact.js.map +1 -0
  144. package/dist/index.d.ts +26 -0
  145. package/dist/index.d.ts.map +1 -0
  146. package/dist/index.js +34 -0
  147. package/dist/index.js.map +1 -0
  148. package/dist/insights.d.ts +5 -0
  149. package/dist/insights.d.ts.map +1 -0
  150. package/dist/insights.js +271 -0
  151. package/dist/insights.js.map +1 -0
  152. package/dist/joiner.d.ts +9 -0
  153. package/dist/joiner.d.ts.map +1 -0
  154. package/dist/joiner.js +247 -0
  155. package/dist/joiner.js.map +1 -0
  156. package/dist/orchestrator.d.ts +34 -0
  157. package/dist/orchestrator.d.ts.map +1 -0
  158. package/dist/orchestrator.js +827 -0
  159. package/dist/orchestrator.js.map +1 -0
  160. package/dist/pdf.d.ts +26 -0
  161. package/dist/pdf.d.ts.map +1 -0
  162. package/dist/pdf.js +84 -0
  163. package/dist/pdf.js.map +1 -0
  164. package/dist/prediction.d.ts +33 -0
  165. package/dist/prediction.d.ts.map +1 -0
  166. package/dist/prediction.js +316 -0
  167. package/dist/prediction.js.map +1 -0
  168. package/dist/prompts/loader.d.ts +38 -0
  169. package/dist/prompts/loader.d.ts.map +1 -0
  170. package/dist/prompts/loader.js +60 -0
  171. package/dist/prompts/loader.js.map +1 -0
  172. package/dist/renderer.d.ts +64 -0
  173. package/dist/renderer.d.ts.map +1 -0
  174. package/dist/renderer.js +923 -0
  175. package/dist/renderer.js.map +1 -0
  176. package/dist/runid.d.ts +57 -0
  177. package/dist/runid.d.ts.map +1 -0
  178. package/dist/runid.js +199 -0
  179. package/dist/runid.js.map +1 -0
  180. package/dist/runtime.d.ts +29 -0
  181. package/dist/runtime.d.ts.map +1 -0
  182. package/dist/runtime.js +366 -0
  183. package/dist/runtime.js.map +1 -0
  184. package/dist/scanner.d.ts +11 -0
  185. package/dist/scanner.d.ts.map +1 -0
  186. package/dist/scanner.js +426 -0
  187. package/dist/scanner.js.map +1 -0
  188. package/dist/templates.d.ts +120 -0
  189. package/dist/templates.d.ts.map +1 -0
  190. package/dist/templates.js +429 -0
  191. package/dist/templates.js.map +1 -0
  192. package/dist/tools/index.d.ts +153 -0
  193. package/dist/tools/index.d.ts.map +1 -0
  194. package/dist/tools/index.js +177 -0
  195. package/dist/tools/index.js.map +1 -0
  196. package/dist/types.d.ts +3647 -0
  197. package/dist/types.d.ts.map +1 -0
  198. package/dist/types.js +703 -0
  199. package/dist/types.js.map +1 -0
  200. package/dist/version.d.ts +7 -0
  201. package/dist/version.d.ts.map +1 -0
  202. package/dist/version.js +23 -0
  203. package/dist/version.js.map +1 -0
  204. package/docs/demo-guide.md +423 -0
  205. package/docs/events-format.md +295 -0
  206. package/docs/inferencemap-spec.md +344 -0
  207. package/docs/migration-v2.md +293 -0
  208. package/fixtures/demo/precomputed.json +142 -0
  209. package/fixtures/demo-project/README.md +52 -0
  210. package/fixtures/demo-project/ai-service.ts +65 -0
  211. package/fixtures/demo-project/sample-events.jsonl +15 -0
  212. package/fixtures/demo-project/src/ai-service.ts +128 -0
  213. package/fixtures/demo-project/src/llm-client.ts +155 -0
  214. package/package.json +65 -0
  215. package/prompts/agent-analyzer.yaml +47 -0
  216. package/prompts/ci-gate.yaml +98 -0
  217. package/prompts/correlation-analyzer.yaml +178 -0
  218. package/prompts/format-normalizer.yaml +46 -0
  219. package/prompts/peak-performance.yaml +180 -0
  220. package/prompts/pr-comment.yaml +111 -0
  221. package/prompts/runtime-analyzer.yaml +189 -0
  222. package/prompts/unified-analyzer.yaml +241 -0
  223. package/schemas/inference-map.v0.1.json +215 -0
  224. package/scripts/benchmark.ts +394 -0
  225. package/scripts/demo-v1.5.sh +158 -0
  226. package/scripts/sync-from-site.sh +197 -0
  227. package/scripts/validate-sync.sh +178 -0
  228. package/src/agent-analyzer.ts +481 -0
  229. package/src/agent.ts +1232 -0
  230. package/src/agents/correlation-analyzer.ts +353 -0
  231. package/src/agents/index.ts +235 -0
  232. package/src/agents/runtime-analyzer.ts +343 -0
  233. package/src/analysis-types.ts +558 -0
  234. package/src/analytics.ts +100 -0
  235. package/src/analyzer.ts +692 -0
  236. package/src/artifacts.ts +218 -0
  237. package/src/benchmarks/index.ts +309 -0
  238. package/src/cli.ts +503 -0
  239. package/src/commands/ci.ts +336 -0
  240. package/src/commands/config.ts +288 -0
  241. package/src/commands/demo.ts +175 -0
  242. package/src/commands/export.ts +297 -0
  243. package/src/commands/history.ts +425 -0
  244. package/src/commands/template.ts +385 -0
  245. package/src/commands/validate-map.ts +324 -0
  246. package/src/commands/whatif.ts +272 -0
  247. package/src/comparison.ts +283 -0
  248. package/src/config.ts +188 -0
  249. package/src/connectors/helicone.ts +164 -0
  250. package/src/connectors/index.ts +93 -0
  251. package/src/connectors/langsmith.ts +179 -0
  252. package/src/connectors/types.ts +180 -0
  253. package/src/cost-estimator.ts +146 -0
  254. package/src/costs.ts +347 -0
  255. package/src/counterfactuals.ts +516 -0
  256. package/src/enhancement-prompts.ts +118 -0
  257. package/src/envelopes.ts +814 -0
  258. package/src/format-normalizer.ts +1486 -0
  259. package/src/history.ts +400 -0
  260. package/src/html.ts +512 -0
  261. package/src/impact.ts +522 -0
  262. package/src/index.ts +83 -0
  263. package/src/insights.ts +341 -0
  264. package/src/joiner.ts +289 -0
  265. package/src/orchestrator.ts +1015 -0
  266. package/src/pdf.ts +110 -0
  267. package/src/prediction.ts +392 -0
  268. package/src/prompts/loader.ts +88 -0
  269. package/src/renderer.ts +1045 -0
  270. package/src/runid.ts +261 -0
  271. package/src/runtime.ts +450 -0
  272. package/src/scanner.ts +508 -0
  273. package/src/templates.ts +561 -0
  274. package/src/tools/index.ts +214 -0
  275. package/src/types.ts +873 -0
  276. package/src/version.ts +24 -0
  277. package/templates/context-accumulation.yaml +23 -0
  278. package/templates/cost-concentration.yaml +20 -0
  279. package/templates/dead-code.yaml +20 -0
  280. package/templates/latency-explainer.yaml +23 -0
  281. package/templates/optimizations/ab-testing-framework.yaml +74 -0
  282. package/templates/optimizations/api-gateway-optimization.yaml +81 -0
  283. package/templates/optimizations/api-model-routing-strategy.yaml +126 -0
  284. package/templates/optimizations/auto-scaling-optimization.yaml +85 -0
  285. package/templates/optimizations/batch-utilization-diagnostic.yaml +142 -0
  286. package/templates/optimizations/comprehensive-apm.yaml +76 -0
  287. package/templates/optimizations/context-window-optimization.yaml +91 -0
  288. package/templates/optimizations/cost-sensitive-batch-processing.yaml +77 -0
  289. package/templates/optimizations/distributed-training-optimization.yaml +77 -0
  290. package/templates/optimizations/document-analysis-edge.yaml +77 -0
  291. package/templates/optimizations/document-pipeline-optimization.yaml +78 -0
  292. package/templates/optimizations/domain-specific-distillation.yaml +78 -0
  293. package/templates/optimizations/error-handling-optimization.yaml +76 -0
  294. package/templates/optimizations/gptq-4bit-quantization.yaml +96 -0
  295. package/templates/optimizations/long-context-memory-management.yaml +78 -0
  296. package/templates/optimizations/max-tokens-optimization.yaml +76 -0
  297. package/templates/optimizations/memory-bandwidth-optimization.yaml +73 -0
  298. package/templates/optimizations/multi-framework-resilience.yaml +75 -0
  299. package/templates/optimizations/multi-tenant-optimization.yaml +75 -0
  300. package/templates/optimizations/prompt-caching-optimization.yaml +143 -0
  301. package/templates/optimizations/pytorch-to-onnx-migration.yaml +109 -0
  302. package/templates/optimizations/quality-monitoring.yaml +74 -0
  303. package/templates/optimizations/realtime-budget-controls.yaml +74 -0
  304. package/templates/optimizations/realtime-latency-optimization.yaml +74 -0
  305. package/templates/optimizations/sglang-concurrency-optimization.yaml +78 -0
  306. package/templates/optimizations/smart-model-routing.yaml +96 -0
  307. package/templates/optimizations/streaming-batch-selection.yaml +167 -0
  308. package/templates/optimizations/system-prompt-optimization.yaml +75 -0
  309. package/templates/optimizations/tensorrt-llm-performance.yaml +77 -0
  310. package/templates/optimizations/vllm-high-throughput-optimization.yaml +93 -0
  311. package/templates/optimizations/vllm-migration-memory-bound.yaml +78 -0
  312. package/templates/overpowered-extraction.yaml +32 -0
  313. package/templates/overpowered-model.yaml +31 -0
  314. package/templates/prompt-bloat.yaml +24 -0
  315. package/templates/retry-explosion.yaml +28 -0
  316. package/templates/schema/insight.schema.json +113 -0
  317. package/templates/schema/optimization.schema.json +180 -0
  318. package/templates/streaming-drift.yaml +30 -0
  319. package/templates/throughput-gap.yaml +21 -0
  320. package/templates/token-underutilization.yaml +28 -0
  321. package/templates/untested-fallback.yaml +21 -0
  322. package/tests/accuracy/drift-detection.test.ts +184 -0
  323. package/tests/accuracy/false-positives.test.ts +166 -0
  324. package/tests/accuracy/templates.test.ts +205 -0
  325. package/tests/action/commands.test.ts +125 -0
  326. package/tests/action/comments.test.ts +347 -0
  327. package/tests/cli.test.ts +203 -0
  328. package/tests/comparison.test.ts +309 -0
  329. package/tests/correlation-analyzer.test.ts +534 -0
  330. package/tests/counterfactuals.test.ts +347 -0
  331. package/tests/fixtures/events/missing-id.jsonl +1 -0
  332. package/tests/fixtures/events/missing-input.jsonl +1 -0
  333. package/tests/fixtures/events/missing-latency.jsonl +1 -0
  334. package/tests/fixtures/events/missing-model.jsonl +1 -0
  335. package/tests/fixtures/events/missing-output.jsonl +1 -0
  336. package/tests/fixtures/events/missing-provider.jsonl +1 -0
  337. package/tests/fixtures/events/missing-ts.jsonl +1 -0
  338. package/tests/fixtures/events/valid.csv +3 -0
  339. package/tests/fixtures/events/valid.json +1 -0
  340. package/tests/fixtures/events/valid.jsonl +2 -0
  341. package/tests/fixtures/events/with-callsite.jsonl +1 -0
  342. package/tests/fixtures/events/with-intent.jsonl +1 -0
  343. package/tests/fixtures/events/wrong-type.jsonl +1 -0
  344. package/tests/fixtures/repos/empty/.gitkeep +0 -0
  345. package/tests/fixtures/repos/hybrid-router/router.py +35 -0
  346. package/tests/fixtures/repos/saas-anthropic/agent.ts +27 -0
  347. package/tests/fixtures/repos/saas-openai/assistant.js +33 -0
  348. package/tests/fixtures/repos/saas-openai/client.py +26 -0
  349. package/tests/fixtures/repos/self-hosted-vllm/inference.py +22 -0
  350. package/tests/github-action.test.ts +292 -0
  351. package/tests/insights.test.ts +878 -0
  352. package/tests/joiner.test.ts +168 -0
  353. package/tests/performance/action-latency.test.ts +132 -0
  354. package/tests/performance/benchmark.test.ts +189 -0
  355. package/tests/performance/cli-latency.test.ts +102 -0
  356. package/tests/pr-comment.test.ts +313 -0
  357. package/tests/prediction.test.ts +296 -0
  358. package/tests/runtime-analyzer.test.ts +375 -0
  359. package/tests/runtime.test.ts +205 -0
  360. package/tests/scanner.test.ts +122 -0
  361. package/tests/template-conformance.test.ts +526 -0
  362. package/tests/unit/cost-calculator.test.ts +303 -0
  363. package/tests/unit/credits.test.ts +180 -0
  364. package/tests/unit/inference-map.test.ts +276 -0
  365. package/tests/unit/schema.test.ts +300 -0
  366. package/tsconfig.json +20 -0
  367. package/vitest.config.ts +14 -0
@@ -0,0 +1,218 @@
1
+ import { mkdirSync, writeFileSync, existsSync, readFileSync, symlinkSync, unlinkSync } from 'fs';
2
+ import { join, relative } from 'path';
3
+ import type { InferenceMap, Insight, JoinedOutput, RuntimeSummary } from './types.js';
4
+ import { generateRunId, getRunDir, createManifest, canResume, loadCachedArtifacts, type RunInputs, type RunManifest } from './runid.js';
5
+
6
+ // =============================================================================
7
+ // CONSTANTS
8
+ // =============================================================================
9
+
10
+ const OUTPUT_DIR = '.peakinfer';
11
+
12
+ // =============================================================================
13
+ // TYPES
14
+ // =============================================================================
15
+
16
+ export interface ArtifactData {
17
+ inferenceMap?: InferenceMap;
18
+ insights?: Insight[];
19
+ joined?: JoinedOutput;
20
+ runtime?: RuntimeSummary;
21
+ html?: string;
22
+ }
23
+
24
+ export interface SaveOptions {
25
+ runId?: string;
26
+ inputs?: RunInputs;
27
+ projectName?: string; // For human-friendly report naming
28
+ }
29
+
30
+ // =============================================================================
31
+ // HELPERS
32
+ // =============================================================================
33
+
34
+ /**
35
+ * Convert project name to a URL/file-safe slug
36
+ * Julie Zhou: Human-friendly naming for shareability
37
+ */
38
+ function toSlug(name: string): string {
39
+ return name
40
+ .toLowerCase()
41
+ .replace(/[^a-z0-9]+/g, '_') // Replace non-alphanumeric with underscore
42
+ .replace(/^_+|_+$/g, '') // Trim leading/trailing underscores
43
+ .substring(0, 50); // Limit length
44
+ }
45
+
46
+ function ensureDir(dir: string): void {
47
+ if (!existsSync(dir)) {
48
+ mkdirSync(dir, { recursive: true });
49
+ }
50
+ }
51
+
52
+ function writeJSON(filePath: string, data: unknown): void {
53
+ writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf-8');
54
+ }
55
+
56
+ function updateLatestSymlink(baseDir: string, runId: string): void {
57
+ const latestPath = join(baseDir, 'latest');
58
+ const targetPath = join('runs', runId);
59
+
60
+ try {
61
+ // Remove existing symlink
62
+ if (existsSync(latestPath)) {
63
+ unlinkSync(latestPath);
64
+ }
65
+ // Create new symlink
66
+ symlinkSync(targetPath, latestPath);
67
+ } catch {
68
+ // Symlinks may not work on all systems (e.g., Windows without admin)
69
+ // Fallback: write a text file with the run ID
70
+ writeFileSync(latestPath, runId, 'utf-8');
71
+ }
72
+ }
73
+
74
+ // =============================================================================
75
+ // PUBLIC API
76
+ // =============================================================================
77
+
78
+ /**
79
+ * Save all analysis artifacts to .peakinfer/runs/<runId>/ directory
80
+ * Also maintains backward compatibility with root-level artifacts
81
+ */
82
+ export function saveArtifacts(
83
+ data: ArtifactData,
84
+ outputDir: string = OUTPUT_DIR,
85
+ options: SaveOptions = {}
86
+ ): string[] {
87
+ const savedFiles: string[] = [];
88
+ const artifactNames: string[] = [];
89
+
90
+ // Determine run directory
91
+ let runDir = outputDir;
92
+ if (options.runId) {
93
+ runDir = getRunDir(outputDir, options.runId);
94
+ }
95
+
96
+ ensureDir(runDir);
97
+
98
+ // 1. InferenceMap - the core output
99
+ if (data.inferenceMap) {
100
+ const filePath = join(runDir, 'inferencemap.json');
101
+ writeJSON(filePath, data.inferenceMap);
102
+ savedFiles.push(filePath);
103
+ artifactNames.push('inferencemap.json');
104
+ }
105
+
106
+ // 2. Insights - the findings
107
+ if (data.insights) {
108
+ const filePath = join(runDir, 'insights.json');
109
+ writeJSON(filePath, data.insights);
110
+ savedFiles.push(filePath);
111
+ artifactNames.push('insights.json');
112
+ }
113
+
114
+ // 3. Joined data - static + runtime correlation
115
+ if (data.joined) {
116
+ const filePath = join(runDir, 'joined.json');
117
+ writeJSON(filePath, data.joined);
118
+ savedFiles.push(filePath);
119
+ artifactNames.push('joined.json');
120
+ }
121
+
122
+ // 4. Runtime summary - aggregated metrics
123
+ if (data.runtime) {
124
+ const filePath = join(runDir, 'runtime.json');
125
+ writeJSON(filePath, data.runtime);
126
+ savedFiles.push(filePath);
127
+ artifactNames.push('runtime.json');
128
+ }
129
+
130
+ // 5. HTML report - human-friendly naming for shareability
131
+ // Julie Zhou: "Reports exist to enable sharing, not exploration"
132
+ const reportFileName = options.projectName
133
+ ? `${toSlug(options.projectName)}_peakinfer_report.html`
134
+ : 'report.html';
135
+
136
+ if (data.html) {
137
+ const filePath = join(runDir, reportFileName);
138
+ writeFileSync(filePath, data.html, 'utf-8');
139
+ savedFiles.push(filePath);
140
+ artifactNames.push(reportFileName);
141
+ }
142
+
143
+ // 6. Save manifest if runId provided
144
+ if (options.runId && options.inputs) {
145
+ const manifest = createManifest(options.runId, options.inputs, artifactNames, 'complete');
146
+ const manifestPath = join(runDir, 'manifest.json');
147
+ writeJSON(manifestPath, manifest);
148
+
149
+ // Update latest symlink
150
+ updateLatestSymlink(outputDir, options.runId);
151
+ }
152
+
153
+ // 7. Also save to root level for backward compatibility
154
+ if (options.runId && runDir !== outputDir) {
155
+ ensureDir(outputDir);
156
+ if (data.inferenceMap) {
157
+ writeJSON(join(outputDir, 'inferencemap.json'), data.inferenceMap);
158
+ }
159
+ if (data.insights) {
160
+ writeJSON(join(outputDir, 'insights.json'), data.insights);
161
+ }
162
+ if (data.joined) {
163
+ writeJSON(join(outputDir, 'joined.json'), data.joined);
164
+ }
165
+ if (data.runtime) {
166
+ writeJSON(join(outputDir, 'runtime.json'), data.runtime);
167
+ }
168
+ if (data.html) {
169
+ writeFileSync(join(outputDir, reportFileName), data.html, 'utf-8');
170
+ }
171
+ }
172
+
173
+ return savedFiles;
174
+ }
175
+
176
+ /**
177
+ * Get the output directory path
178
+ */
179
+ export function getOutputDir(): string {
180
+ return OUTPUT_DIR;
181
+ }
182
+
183
+ /**
184
+ * Check if artifacts exist from a previous run
185
+ */
186
+ export function artifactsExist(outputDir: string = OUTPUT_DIR): boolean {
187
+ return existsSync(join(outputDir, 'inferencemap.json'));
188
+ }
189
+
190
+ /**
191
+ * Check if a run can be resumed with cached artifacts
192
+ */
193
+ export function checkResumable(inputs: RunInputs, outputDir: string = OUTPUT_DIR): {
194
+ canResume: boolean;
195
+ runId: string;
196
+ runDir: string;
197
+ } {
198
+ const runId = generateRunId(inputs);
199
+ const runDir = getRunDir(outputDir, runId);
200
+
201
+ return {
202
+ canResume: canResume(runDir, inputs),
203
+ runId,
204
+ runDir,
205
+ };
206
+ }
207
+
208
+ /**
209
+ * Load artifacts from a previous run
210
+ */
211
+ export function loadArtifacts(runDir: string): ArtifactData {
212
+ return loadCachedArtifacts(runDir);
213
+ }
214
+
215
+ /**
216
+ * Get a new run ID for given inputs
217
+ */
218
+ export { generateRunId } from './runid.js';
@@ -0,0 +1,309 @@
1
+ /**
2
+ * InferenceMAX Benchmark Integration
3
+ *
4
+ * Provides benchmark comparison for LLM inference performance.
5
+ * Data sourced from the InferenceMAX benchmark suite.
6
+ */
7
+
8
+ import { readFileSync } from 'fs';
9
+ import { join, dirname } from 'path';
10
+ import { fileURLToPath } from 'url';
11
+
12
+ // Get the directory of the current module
13
+ const __filename = fileURLToPath(import.meta.url);
14
+ const __dirname = dirname(__filename);
15
+
16
+ interface BenchmarkMetrics {
17
+ ttft_ms: number;
18
+ p50_latency_ms: number;
19
+ p95_latency_ms: number;
20
+ p99_latency_ms: number;
21
+ throughput_tps: number;
22
+ cost_per_1k_input: number;
23
+ cost_per_1k_output: number;
24
+ }
25
+
26
+ interface BenchmarkEntry {
27
+ model: string;
28
+ provider: string;
29
+ framework: string;
30
+ hardware: string;
31
+ metrics: BenchmarkMetrics;
32
+ optimal_config?: Record<string, unknown>;
33
+ notes?: string;
34
+ }
35
+
36
+ interface BenchmarkData {
37
+ version: string;
38
+ last_updated: string;
39
+ source: string;
40
+ benchmarks: Record<string, BenchmarkEntry>;
41
+ model_aliases: Record<string, string>;
42
+ }
43
+
44
+ export interface BenchmarkComparison {
45
+ pointId: string;
46
+ model: string;
47
+ framework: string;
48
+ hardware: string;
49
+ your_metrics: {
50
+ p95_latency_ms?: number;
51
+ ttft_ms?: number;
52
+ throughput_tps?: number;
53
+ };
54
+ benchmark_metrics: BenchmarkMetrics;
55
+ gaps: {
56
+ p95_latency?: { value: number; percent: number; description: string };
57
+ ttft?: { value: number; percent: number; description: string };
58
+ throughput?: { value: number; percent: number; description: string };
59
+ };
60
+ overall_gap: string;
61
+ optimal_config?: Record<string, unknown>;
62
+ }
63
+
64
+ let benchmarkData: BenchmarkData | null = null;
65
+
66
+ /**
67
+ * Load benchmark data from JSON file
68
+ */
69
+ function loadBenchmarks(): BenchmarkData {
70
+ if (benchmarkData) return benchmarkData;
71
+
72
+ try {
73
+ const dataPath = join(__dirname, '../../data/inferencemax.json');
74
+ const content = readFileSync(dataPath, 'utf-8');
75
+ benchmarkData = JSON.parse(content) as BenchmarkData;
76
+ return benchmarkData;
77
+ } catch (error) {
78
+ throw new Error(`Failed to load benchmark data: ${error instanceof Error ? error.message : 'Unknown error'}`);
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Normalize model name for lookup
84
+ */
85
+ function normalizeModel(model: string): string {
86
+ return model
87
+ .toLowerCase()
88
+ .replace(/[_\s]/g, '-')
89
+ .replace(/-+/g, '-')
90
+ .replace(/^-|-$/g, '');
91
+ }
92
+
93
+ /**
94
+ * Get benchmark for a specific model
95
+ */
96
+ export function getBenchmark(
97
+ model: string,
98
+ framework = 'api',
99
+ hardware = 'api'
100
+ ): BenchmarkEntry | null {
101
+ const data = loadBenchmarks();
102
+ const normalizedModel = normalizeModel(model);
103
+
104
+ // Try exact key match first
105
+ const exactKey = `${normalizedModel}:${framework}:${hardware}`;
106
+ if (data.benchmarks[exactKey]) {
107
+ return data.benchmarks[exactKey];
108
+ }
109
+
110
+ // Try alias lookup
111
+ const alias = data.model_aliases[normalizedModel];
112
+ if (alias && data.benchmarks[alias]) {
113
+ return data.benchmarks[alias];
114
+ }
115
+
116
+ // Try model name with default framework:hardware
117
+ const defaultKey = `${normalizedModel}:api:api`;
118
+ if (data.benchmarks[defaultKey]) {
119
+ return data.benchmarks[defaultKey];
120
+ }
121
+
122
+ // Try partial model name matches
123
+ for (const key of Object.keys(data.benchmarks)) {
124
+ const benchModel = normalizeModel(data.benchmarks[key].model);
125
+ if (benchModel.includes(normalizedModel) || normalizedModel.includes(benchModel)) {
126
+ return data.benchmarks[key];
127
+ }
128
+ }
129
+
130
+ return null;
131
+ }
132
+
133
+ /**
134
+ * Compare user metrics to benchmark
135
+ */
136
+ export function compareToBenchmark(
137
+ pointId: string,
138
+ model: string,
139
+ userMetrics: {
140
+ p95_latency_ms?: number;
141
+ ttft_ms?: number;
142
+ throughput_tps?: number;
143
+ },
144
+ framework = 'api',
145
+ hardware = 'api'
146
+ ): BenchmarkComparison | null {
147
+ const benchmark = getBenchmark(model, framework, hardware);
148
+ if (!benchmark) return null;
149
+
150
+ const gaps: BenchmarkComparison['gaps'] = {};
151
+
152
+ // P95 Latency gap
153
+ if (userMetrics.p95_latency_ms && benchmark.metrics.p95_latency_ms) {
154
+ const diff = userMetrics.p95_latency_ms - benchmark.metrics.p95_latency_ms;
155
+ const percent = Math.round((diff / benchmark.metrics.p95_latency_ms) * 100);
156
+ gaps.p95_latency = {
157
+ value: diff,
158
+ percent,
159
+ description: formatGapDescription(diff, percent, 'ms', 'slower', 'faster'),
160
+ };
161
+ }
162
+
163
+ // TTFT gap
164
+ if (userMetrics.ttft_ms && benchmark.metrics.ttft_ms) {
165
+ const diff = userMetrics.ttft_ms - benchmark.metrics.ttft_ms;
166
+ const percent = Math.round((diff / benchmark.metrics.ttft_ms) * 100);
167
+ gaps.ttft = {
168
+ value: diff,
169
+ percent,
170
+ description: formatGapDescription(diff, percent, 'ms', 'slower', 'faster'),
171
+ };
172
+ }
173
+
174
+ // Throughput gap (inverse - higher is better)
175
+ if (userMetrics.throughput_tps && benchmark.metrics.throughput_tps) {
176
+ const diff = benchmark.metrics.throughput_tps - userMetrics.throughput_tps;
177
+ const percent = Math.round((diff / benchmark.metrics.throughput_tps) * 100);
178
+ gaps.throughput = {
179
+ value: -diff, // Negative means user is faster
180
+ percent: -percent,
181
+ description: formatGapDescription(diff, percent, 'tps', 'below', 'above'),
182
+ };
183
+ }
184
+
185
+ // Calculate overall gap description
186
+ const overallGap = calculateOverallGap(gaps);
187
+
188
+ return {
189
+ pointId,
190
+ model: benchmark.model,
191
+ framework: benchmark.framework,
192
+ hardware: benchmark.hardware,
193
+ your_metrics: userMetrics,
194
+ benchmark_metrics: benchmark.metrics,
195
+ gaps,
196
+ overall_gap: overallGap,
197
+ optimal_config: benchmark.optimal_config,
198
+ };
199
+ }
200
+
201
+ function formatGapDescription(
202
+ diff: number,
203
+ percent: number,
204
+ unit: string,
205
+ worseWord: string,
206
+ betterWord: string
207
+ ): string {
208
+ if (diff === 0 || percent === 0) {
209
+ return 'On par with benchmark';
210
+ }
211
+
212
+ const absPercent = Math.abs(percent);
213
+ const absDiff = Math.abs(diff);
214
+ const word = diff > 0 ? worseWord : betterWord;
215
+
216
+ if (absPercent > 100) {
217
+ const multiplier = (absPercent / 100 + 1).toFixed(1);
218
+ return `${multiplier}x ${word}`;
219
+ }
220
+
221
+ return `${absPercent}% ${word} (${diff > 0 ? '+' : ''}${absDiff}${unit})`;
222
+ }
223
+
224
+ function calculateOverallGap(gaps: BenchmarkComparison['gaps']): string {
225
+ const issues: string[] = [];
226
+
227
+ if (gaps.p95_latency && gaps.p95_latency.percent > 50) {
228
+ issues.push(`latency ${gaps.p95_latency.description}`);
229
+ }
230
+
231
+ if (gaps.ttft && gaps.ttft.percent > 50) {
232
+ issues.push(`TTFT ${gaps.ttft.description}`);
233
+ }
234
+
235
+ if (gaps.throughput && gaps.throughput.percent < -30) {
236
+ issues.push(`throughput ${gaps.throughput.description}`);
237
+ }
238
+
239
+ if (issues.length === 0) {
240
+ if (Object.keys(gaps).length === 0) {
241
+ return 'No metrics to compare';
242
+ }
243
+ return 'Performing within benchmark range';
244
+ }
245
+
246
+ return issues.join(', ');
247
+ }
248
+
249
+ /**
250
+ * Get all available benchmarks
251
+ */
252
+ export function listBenchmarks(): BenchmarkEntry[] {
253
+ const data = loadBenchmarks();
254
+ return Object.values(data.benchmarks);
255
+ }
256
+
257
+ /**
258
+ * Get benchmark data version
259
+ */
260
+ export function getBenchmarkVersion(): { version: string; lastUpdated: string } {
261
+ const data = loadBenchmarks();
262
+ return {
263
+ version: data.version,
264
+ lastUpdated: data.last_updated,
265
+ };
266
+ }
267
+
268
+ /**
269
+ * Check if benchmark data is available for a model
270
+ */
271
+ export function hasBenchmark(model: string): boolean {
272
+ return getBenchmark(model) !== null;
273
+ }
274
+
275
+ /**
276
+ * Format benchmark comparison for display
277
+ */
278
+ export function formatBenchmarkComparison(comparison: BenchmarkComparison): string {
279
+ const lines: string[] = [];
280
+
281
+ lines.push(`Model: ${comparison.model}`);
282
+ lines.push(`Framework: ${comparison.framework} | Hardware: ${comparison.hardware}`);
283
+ lines.push('');
284
+
285
+ if (comparison.gaps.p95_latency) {
286
+ lines.push(`P95 Latency: Your ${comparison.your_metrics.p95_latency_ms}ms | Benchmark ${comparison.benchmark_metrics.p95_latency_ms}ms | ${comparison.gaps.p95_latency.description}`);
287
+ }
288
+
289
+ if (comparison.gaps.ttft) {
290
+ lines.push(`TTFT: Your ${comparison.your_metrics.ttft_ms}ms | Benchmark ${comparison.benchmark_metrics.ttft_ms}ms | ${comparison.gaps.ttft.description}`);
291
+ }
292
+
293
+ if (comparison.gaps.throughput) {
294
+ lines.push(`Throughput: Your ${comparison.your_metrics.throughput_tps} tps | Benchmark ${comparison.benchmark_metrics.throughput_tps} tps | ${comparison.gaps.throughput.description}`);
295
+ }
296
+
297
+ lines.push('');
298
+ lines.push(`Overall: ${comparison.overall_gap}`);
299
+
300
+ if (comparison.optimal_config) {
301
+ lines.push('');
302
+ lines.push('Optimal Config:');
303
+ for (const [key, value] of Object.entries(comparison.optimal_config)) {
304
+ lines.push(` ${key}: ${value}`);
305
+ }
306
+ }
307
+
308
+ return lines.join('\n');
309
+ }