waypoi 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. package/.github/instructions/ui.instructions.md +42 -0
  2. package/.github/workflows/ci.yml +35 -0
  3. package/.github/workflows/publish.yml +71 -0
  4. package/.github/workflows/release.yml +48 -0
  5. package/.playwright-mcp/console-2026-04-04T01-41-10-746Z.log +2 -0
  6. package/.playwright-mcp/console-2026-04-04T01-41-28-799Z.log +3 -0
  7. package/.playwright-mcp/console-2026-04-05T02-26-51-909Z.log +76 -0
  8. package/.playwright-mcp/page-2026-04-04T01-41-10-816Z.yml +1 -0
  9. package/.playwright-mcp/page-2026-04-04T01-41-29-141Z.yml +77 -0
  10. package/.playwright-mcp/page-2026-04-04T01-41-42-633Z.yml +190 -0
  11. package/.playwright-mcp/page-2026-04-04T01-42-03-929Z.yml +262 -0
  12. package/.playwright-mcp/page-2026-04-04T02-12-54-813Z.yml +6 -0
  13. package/.playwright-mcp/page-2026-04-04T02-14-58-600Z.yml +190 -0
  14. package/.playwright-mcp/page-2026-04-04T02-15-03-923Z.yml +190 -0
  15. package/.playwright-mcp/page-2026-04-04T02-15-07-426Z.yml +190 -0
  16. package/.playwright-mcp/page-2026-04-04T02-15-25-729Z.yml +262 -0
  17. package/.playwright-mcp/page-2026-04-04T02-16-22-984Z.yml +262 -0
  18. package/.playwright-mcp/page-2026-04-04T02-17-00-599Z.yml +190 -0
  19. package/.playwright-mcp/page-2026-04-04T02-17-50-874Z.yml +190 -0
  20. package/.playwright-mcp/page-2026-04-05T02-26-55-570Z.yml +6 -0
  21. package/AGENTS.md +48 -0
  22. package/CHANGELOG.md +131 -0
  23. package/README.md +552 -0
  24. package/assets/agent-mode.png +0 -0
  25. package/assets/categorize.png +0 -0
  26. package/assets/dashboard.png +0 -0
  27. package/assets/endpoint-proxy.png +0 -0
  28. package/assets/icon.png +0 -0
  29. package/assets/mcp-generate-image.png +0 -0
  30. package/assets/mcp-understand-image.png +0 -0
  31. package/assets/peek-token-flow.png +0 -0
  32. package/assets/playground.png +0 -0
  33. package/assets/sankey.png +0 -0
  34. package/cli/index.ts +2805 -0
  35. package/cli/legacyRewrite.ts +108 -0
  36. package/cli/modelRef.ts +24 -0
  37. package/dist/cli/index.js +2536 -0
  38. package/dist/cli/legacyRewrite.js +92 -0
  39. package/dist/cli/modelRef.js +20 -0
  40. package/dist/src/benchmark/artifacts.js +131 -0
  41. package/dist/src/benchmark/capabilityClassifier.js +81 -0
  42. package/dist/src/benchmark/capabilityStore.js +144 -0
  43. package/dist/src/benchmark/config.js +238 -0
  44. package/dist/src/benchmark/gates.js +118 -0
  45. package/dist/src/benchmark/jobs.js +252 -0
  46. package/dist/src/benchmark/runner.js +1847 -0
  47. package/dist/src/benchmark/schema.js +353 -0
  48. package/dist/src/benchmark/suites.js +314 -0
  49. package/dist/src/benchmark/tinyQaDataset.js +422 -0
  50. package/dist/src/benchmark/types.js +25 -0
  51. package/dist/src/config.js +47 -0
  52. package/dist/src/index.js +178 -0
  53. package/dist/src/mcp/client.js +215 -0
  54. package/dist/src/mcp/discovery.js +226 -0
  55. package/dist/src/mcp/policy.js +65 -0
  56. package/dist/src/mcp/registry.js +129 -0
  57. package/dist/src/mcp/service.js +460 -0
  58. package/dist/src/middleware/auth.js +179 -0
  59. package/dist/src/middleware/requestCapture.js +192 -0
  60. package/dist/src/middleware/requestStats.js +118 -0
  61. package/dist/src/pools/builder.js +132 -0
  62. package/dist/src/pools/repository.js +69 -0
  63. package/dist/src/pools/scheduler.js +360 -0
  64. package/dist/src/pools/types.js +2 -0
  65. package/dist/src/protocols/adapters/dashscope.js +267 -0
  66. package/dist/src/protocols/adapters/inferenceV2.js +346 -0
  67. package/dist/src/protocols/adapters/openai.js +27 -0
  68. package/dist/src/protocols/registry.js +99 -0
  69. package/dist/src/protocols/types.js +2 -0
  70. package/dist/src/providers/health.js +153 -0
  71. package/dist/src/providers/importer.js +289 -0
  72. package/dist/src/providers/modelRegistry.js +313 -0
  73. package/dist/src/providers/repository.js +361 -0
  74. package/dist/src/providers/types.js +2 -0
  75. package/dist/src/routes/admin.js +531 -0
  76. package/dist/src/routes/audio.js +295 -0
  77. package/dist/src/routes/chat.js +240 -0
  78. package/dist/src/routes/embeddings.js +157 -0
  79. package/dist/src/routes/images.js +288 -0
  80. package/dist/src/routes/mcp.js +256 -0
  81. package/dist/src/routes/mcpService.js +100 -0
  82. package/dist/src/routes/models.js +48 -0
  83. package/dist/src/routes/responses.js +711 -0
  84. package/dist/src/routes/sessions.js +450 -0
  85. package/dist/src/routes/stats.js +270 -0
  86. package/dist/src/routes/ui.js +97 -0
  87. package/dist/src/routes/videos.js +107 -0
  88. package/dist/src/routing/router.js +338 -0
  89. package/dist/src/services/imageGeneration.js +280 -0
  90. package/dist/src/services/imageUnderstanding.js +352 -0
  91. package/dist/src/services/videoGeneration.js +79 -0
  92. package/dist/src/storage/captureRepository.js +1591 -0
  93. package/dist/src/storage/files.js +157 -0
  94. package/dist/src/storage/imageCache.js +346 -0
  95. package/dist/src/storage/repositories.js +388 -0
  96. package/dist/src/storage/sessionRepository.js +370 -0
  97. package/dist/src/storage/statsRepository.js +204 -0
  98. package/dist/src/transport/httpClient.js +126 -0
  99. package/dist/src/types.js +2 -0
  100. package/dist/src/utils/messageMedia.js +285 -0
  101. package/dist/src/utils/modelCapabilities.js +108 -0
  102. package/dist/src/utils/modelDiscovery.js +170 -0
  103. package/dist/src/version.js +5 -0
  104. package/dist/src/workers/captureRetention.js +25 -0
  105. package/dist/src/workers/configWatcher.js +91 -0
  106. package/dist/src/workers/healthChecker.js +21 -0
  107. package/dist/src/workers/statsRotation.js +41 -0
  108. package/docs/LLM/output_schema.md +312 -0
  109. package/docs/benchmark.md +208 -0
  110. package/docs/mcp-guidelines.md +125 -0
  111. package/docs/mcp-service.md +178 -0
  112. package/docs/opencode.md +86 -0
  113. package/docs/providers.md +79 -0
  114. package/examples/benchmark.config.yaml +28 -0
  115. package/examples/providers/alibaba-dashscope.yaml +88 -0
  116. package/examples/providers/alibaba-llm.yaml +64 -0
  117. package/examples/providers/alibaba-registry.yaml +7 -0
  118. package/examples/providers/inference-v2-ray.yaml +29 -0
  119. package/examples/scenarios/assets/omni-call-sample.wav +0 -0
  120. package/examples/scenarios/custom.jsonl +5 -0
  121. package/examples/scenarios/custom.yaml +40 -0
  122. package/model-form-v2.png +0 -0
  123. package/package.json +66 -0
  124. package/provider-form-v2.png +0 -0
  125. package/provider-form.png +0 -0
  126. package/scripts/manual-test.sh +11 -0
  127. package/scripts/version-from-git.js +23 -0
  128. package/src/benchmark/artifacts.ts +149 -0
  129. package/src/benchmark/capabilityClassifier.ts +99 -0
  130. package/src/benchmark/capabilityStore.ts +174 -0
  131. package/src/benchmark/config.ts +337 -0
  132. package/src/benchmark/gates.ts +164 -0
  133. package/src/benchmark/jobs.ts +312 -0
  134. package/src/benchmark/runner.ts +2519 -0
  135. package/src/benchmark/schema.ts +443 -0
  136. package/src/benchmark/suites.ts +323 -0
  137. package/src/benchmark/tinyQaDataset.ts +428 -0
  138. package/src/benchmark/types.ts +442 -0
  139. package/src/config.ts +44 -0
  140. package/src/index.ts +195 -0
  141. package/src/mcp/client.ts +305 -0
  142. package/src/mcp/discovery.ts +266 -0
  143. package/src/mcp/policy.ts +105 -0
  144. package/src/mcp/registry.ts +164 -0
  145. package/src/mcp/service.ts +611 -0
  146. package/src/middleware/auth.ts +251 -0
  147. package/src/middleware/requestCapture.ts +245 -0
  148. package/src/middleware/requestStats.ts +163 -0
  149. package/src/pools/builder.ts +159 -0
  150. package/src/pools/repository.ts +71 -0
  151. package/src/pools/scheduler.ts +425 -0
  152. package/src/pools/types.ts +117 -0
  153. package/src/protocols/adapters/dashscope.ts +335 -0
  154. package/src/protocols/adapters/inferenceV2.ts +428 -0
  155. package/src/protocols/adapters/openai.ts +32 -0
  156. package/src/protocols/registry.ts +117 -0
  157. package/src/protocols/types.ts +81 -0
  158. package/src/providers/health.ts +207 -0
  159. package/src/providers/importer.ts +402 -0
  160. package/src/providers/modelRegistry.ts +415 -0
  161. package/src/providers/repository.ts +439 -0
  162. package/src/providers/types.ts +113 -0
  163. package/src/routes/admin.ts +666 -0
  164. package/src/routes/audio.ts +372 -0
  165. package/src/routes/chat.ts +301 -0
  166. package/src/routes/embeddings.ts +197 -0
  167. package/src/routes/images.ts +356 -0
  168. package/src/routes/mcp.ts +320 -0
  169. package/src/routes/mcpService.ts +114 -0
  170. package/src/routes/models.ts +50 -0
  171. package/src/routes/responses.ts +872 -0
  172. package/src/routes/sessions.ts +558 -0
  173. package/src/routes/stats.ts +312 -0
  174. package/src/routes/ui.ts +96 -0
  175. package/src/routes/videos.ts +132 -0
  176. package/src/routing/router.ts +501 -0
  177. package/src/services/imageGeneration.ts +396 -0
  178. package/src/services/imageUnderstanding.ts +449 -0
  179. package/src/services/videoGeneration.ts +127 -0
  180. package/src/storage/captureRepository.ts +1835 -0
  181. package/src/storage/files.ts +178 -0
  182. package/src/storage/imageCache.ts +405 -0
  183. package/src/storage/repositories.ts +494 -0
  184. package/src/storage/sessionRepository.ts +419 -0
  185. package/src/storage/statsRepository.ts +238 -0
  186. package/src/transport/httpClient.ts +145 -0
  187. package/src/types.ts +322 -0
  188. package/src/utils/messageMedia.ts +293 -0
  189. package/src/utils/modelCapabilities.ts +161 -0
  190. package/src/utils/modelDiscovery.ts +203 -0
  191. package/src/workers/captureRetention.ts +25 -0
  192. package/src/workers/configWatcher.ts +115 -0
  193. package/src/workers/healthChecker.ts +22 -0
  194. package/src/workers/statsRotation.ts +49 -0
  195. package/tests/benchmarkAdminRoutes.test.ts +82 -0
  196. package/tests/benchmarkBasics.test.ts +116 -0
  197. package/tests/captureAdminRoutes.test.ts +420 -0
  198. package/tests/captureRepository.test.ts +797 -0
  199. package/tests/cliLegacyRewrite.test.ts +45 -0
  200. package/tests/imageGeneration.service.test.ts +107 -0
  201. package/tests/imageUnderstanding.service.test.ts +123 -0
  202. package/tests/mcpPolicy.test.ts +105 -0
  203. package/tests/mcpService.test.ts +1245 -0
  204. package/tests/modelRef.test.ts +23 -0
  205. package/tests/modelsRoutes.test.ts +154 -0
  206. package/tests/sessionMediaCache.test.ts +167 -0
  207. package/tests/statsRoutes.test.ts +323 -0
  208. package/tsconfig.json +15 -0
  209. package/ui/index.html +16 -0
  210. package/ui/package-lock.json +8521 -0
  211. package/ui/package.json +52 -0
  212. package/ui/postcss.config.js +6 -0
  213. package/ui/public/assets/apple-touch-icon.png +0 -0
  214. package/ui/public/assets/favicon-16.png +0 -0
  215. package/ui/public/assets/favicon-32.png +0 -0
  216. package/ui/public/assets/icon-192.png +0 -0
  217. package/ui/public/assets/icon-512.png +0 -0
  218. package/ui/src/App.tsx +27 -0
  219. package/ui/src/api/client.ts +1503 -0
  220. package/ui/src/components/EndpointUsageGuide.tsx +361 -0
  221. package/ui/src/components/Layout.tsx +124 -0
  222. package/ui/src/components/MessageContent.tsx +365 -0
  223. package/ui/src/components/ToolCallMessage.tsx +179 -0
  224. package/ui/src/components/ToolPicker.tsx +442 -0
  225. package/ui/src/components/messageContentParser.test.ts +41 -0
  226. package/ui/src/components/messageContentParser.ts +73 -0
  227. package/ui/src/components/thinkingPreview.test.ts +27 -0
  228. package/ui/src/components/thinkingPreview.ts +15 -0
  229. package/ui/src/components/toMermaidSankey.test.ts +78 -0
  230. package/ui/src/components/toMermaidSankey.ts +56 -0
  231. package/ui/src/components/ui/button.tsx +58 -0
  232. package/ui/src/components/ui/input.tsx +21 -0
  233. package/ui/src/components/ui/textarea.tsx +21 -0
  234. package/ui/src/lib/utils.ts +6 -0
  235. package/ui/src/main.tsx +9 -0
  236. package/ui/src/pages/AgentPlayground.tsx +2010 -0
  237. package/ui/src/pages/Benchmark.tsx +988 -0
  238. package/ui/src/pages/Dashboard.tsx +581 -0
  239. package/ui/src/pages/Peek.tsx +962 -0
  240. package/ui/src/pages/Settings.tsx +2013 -0
  241. package/ui/src/pages/agentPlaygroundPayload.test.ts +109 -0
  242. package/ui/src/pages/agentPlaygroundPayload.ts +97 -0
  243. package/ui/src/pages/agentThinkingContent.test.ts +50 -0
  244. package/ui/src/pages/agentThinkingContent.ts +57 -0
  245. package/ui/src/pages/dashboardTokenUsage.test.ts +66 -0
  246. package/ui/src/pages/dashboardTokenUsage.ts +36 -0
  247. package/ui/src/pages/imageUpload.test.ts +39 -0
  248. package/ui/src/pages/imageUpload.ts +71 -0
  249. package/ui/src/pages/peekFilters.test.ts +29 -0
  250. package/ui/src/pages/peekFilters.ts +13 -0
  251. package/ui/src/pages/peekMedia.test.ts +58 -0
  252. package/ui/src/pages/peekMedia.ts +148 -0
  253. package/ui/src/pages/sessionAutoTitle.test.ts +128 -0
  254. package/ui/src/pages/sessionAutoTitle.ts +106 -0
  255. package/ui/src/stores/settings.ts +58 -0
  256. package/ui/src/styles/globals.css +223 -0
  257. package/ui/src/vite-env.d.ts +8 -0
  258. package/ui/tailwind.config.js +106 -0
  259. package/ui/tsconfig.json +32 -0
  260. package/ui/vite.config.ts +37 -0
@@ -0,0 +1,449 @@
1
+ import { promises as fs } from "fs";
2
+ import path from "path";
3
+ import sharp from "sharp";
4
+ import { routeRequest } from "../routing/router";
5
+ import { selectPoolCandidates } from "../pools/scheduler";
6
+ import { pickBestProviderModelByCapabilities } from "../providers/modelRegistry";
7
+ import { StoragePaths } from "../storage/files";
8
+
9
+ const DEFAULT_INSTRUCTION =
10
+ "Analyze this image. Return OCR text, key objects, scene summary, and notable details.";
11
+ const MAX_IMAGE_PIXELS = 1080 * 720 - 1;
12
+ const RESIZE_QUALITY = 85;
13
+
14
+ export interface ImageUnderstandingRequest {
15
+ image_path?: string;
16
+ image_url?: string;
17
+ instruction?: string;
18
+ model?: string;
19
+ max_tokens?: number;
20
+ temperature?: number;
21
+ }
22
+
23
+ export interface ImageAnalysis {
24
+ answer: string;
25
+ ocr_text: string;
26
+ objects: string[];
27
+ scene: string;
28
+ notable_details: string[];
29
+ safety_notes: string[];
30
+ }
31
+
32
+ export interface ImageGeometry {
33
+ original_width: number;
34
+ original_height: number;
35
+ uploaded_width: number;
36
+ uploaded_height: number;
37
+ scale_x: number;
38
+ scale_y: number;
39
+ resized: boolean;
40
+ }
41
+
42
+ export interface ImageUnderstandingResult {
43
+ model: string;
44
+ analysis: ImageAnalysis;
45
+ raw_text: string;
46
+ image_geometry?: ImageGeometry;
47
+ usage: {
48
+ prompt_tokens: number;
49
+ completion_tokens: number;
50
+ total_tokens: number;
51
+ };
52
+ }
53
+
54
+ interface ResolvedImageInput {
55
+ imageUrl: string;
56
+ imageGeometry?: ImageGeometry;
57
+ }
58
+
59
+ export async function runImageUnderstanding(
60
+ paths: StoragePaths,
61
+ input: ImageUnderstandingRequest,
62
+ signal: AbortSignal
63
+ ): Promise<ImageUnderstandingResult> {
64
+ const model = await resolveVisionTextModel(paths, input.model);
65
+ if (!model) {
66
+ throw typedError("no_vision_model", "No vision-capable text model available.");
67
+ }
68
+
69
+ const resolvedImage = await resolveImageInput(input);
70
+ const instruction = input.instruction?.trim() ? input.instruction : DEFAULT_INSTRUCTION;
71
+ const messages: Array<Record<string, unknown>> = [];
72
+ if (resolvedImage.imageGeometry) {
73
+ messages.push({
74
+ role: "system",
75
+ content: buildImageGeometrySystemMessage(resolvedImage.imageGeometry),
76
+ });
77
+ }
78
+ messages.push({
79
+ role: "user",
80
+ content: [
81
+ { type: "image_url", image_url: { url: resolvedImage.imageUrl } },
82
+ { type: "text", text: instruction },
83
+ ],
84
+ });
85
+
86
+ const payload: Record<string, unknown> = {
87
+ model,
88
+ stream: false,
89
+ messages,
90
+ };
91
+ if (typeof input.max_tokens === "number") {
92
+ payload.max_tokens = input.max_tokens;
93
+ }
94
+ if (typeof input.temperature === "number") {
95
+ payload.temperature = input.temperature;
96
+ }
97
+
98
+ const outcome = await routeRequest(
99
+ paths,
100
+ model,
101
+ "/v1/chat/completions",
102
+ payload,
103
+ {},
104
+ signal,
105
+ {
106
+ requiredInput: ["text", "image"],
107
+ requiredOutput: ["text"],
108
+ }
109
+ );
110
+ const responsePayload = await readBody(outcome.attempt.response);
111
+ const rawText = extractAssistantText(responsePayload.payload);
112
+ const analysis = parseImageUnderstandingText(rawText);
113
+ return {
114
+ model,
115
+ analysis,
116
+ raw_text: rawText,
117
+ image_geometry: resolvedImage.imageGeometry,
118
+ usage: {
119
+ prompt_tokens: responsePayload.usage?.prompt_tokens ?? 0,
120
+ completion_tokens: responsePayload.usage?.completion_tokens ?? 0,
121
+ total_tokens: responsePayload.usage?.total_tokens ?? 0,
122
+ },
123
+ };
124
+ }
125
+
126
+ export async function resolveImageInputToUrl(input: ImageUnderstandingRequest): Promise<string> {
127
+ const resolved = await resolveImageInput(input);
128
+ return resolved.imageUrl;
129
+ }
130
+
131
+ export async function resolveImageInput(input: ImageUnderstandingRequest): Promise<ResolvedImageInput> {
132
+ if (input.image_path) {
133
+ return imageDataUrlWithGeometryFromPath(input.image_path);
134
+ }
135
+ if (input.image_url && isValidImageUrl(input.image_url)) {
136
+ return { imageUrl: input.image_url };
137
+ }
138
+ throw typedError(
139
+ "invalid_request",
140
+ "Exactly one image source is required: image_path or image_url."
141
+ );
142
+ }
143
+
144
+ export async function imageDataUrlFromPath(imagePath: string): Promise<string> {
145
+ const resolved = await imageDataUrlWithGeometryFromPath(imagePath);
146
+ return resolved.imageUrl;
147
+ }
148
+
149
+ export async function imageDataUrlWithGeometryFromPath(
150
+ imagePath: string
151
+ ): Promise<ResolvedImageInput> {
152
+ const abs = path.resolve(imagePath);
153
+ let data: Buffer;
154
+ try {
155
+ data = await fs.readFile(abs);
156
+ } catch {
157
+ throw typedError("invalid_request", `image_path not readable: ${imagePath}`);
158
+ }
159
+ let mimeType = mimeFromExt(abs);
160
+
161
+ const image = sharp(data);
162
+ const meta = await image.metadata();
163
+ if (!meta.width || !meta.height) {
164
+ throw typedError("invalid_request", "Unable to read image dimensions.");
165
+ }
166
+
167
+ const originalWidth = meta.width;
168
+ const originalHeight = meta.height;
169
+ let uploadedWidth = originalWidth;
170
+ let uploadedHeight = originalHeight;
171
+
172
+ const area = meta.width * meta.height;
173
+ if (area > MAX_IMAGE_PIXELS) {
174
+ const scale = Math.sqrt(MAX_IMAGE_PIXELS / area);
175
+ const targetWidth = Math.max(1, Math.floor(meta.width * scale));
176
+ const targetHeight = Math.max(1, Math.floor(meta.height * scale));
177
+ uploadedWidth = targetWidth;
178
+ uploadedHeight = targetHeight;
179
+ let resized = image.resize(targetWidth, targetHeight, { fit: "fill" });
180
+ const format = (meta.format ?? "").toLowerCase();
181
+ if (format === "jpeg" || format === "jpg") {
182
+ resized = resized.jpeg({ quality: RESIZE_QUALITY });
183
+ mimeType = "image/jpeg";
184
+ } else if (format === "png") {
185
+ resized = resized.png();
186
+ mimeType = "image/png";
187
+ } else if (format === "webp") {
188
+ resized = resized.webp({ quality: RESIZE_QUALITY });
189
+ mimeType = "image/webp";
190
+ } else {
191
+ resized = resized.png();
192
+ mimeType = "image/png";
193
+ }
194
+ data = await resized.toBuffer();
195
+ }
196
+
197
+ return {
198
+ imageUrl: `data:${mimeType};base64,${data.toString("base64")}`,
199
+ imageGeometry: {
200
+ original_width: originalWidth,
201
+ original_height: originalHeight,
202
+ uploaded_width: uploadedWidth,
203
+ uploaded_height: uploadedHeight,
204
+ scale_x: originalWidth / uploadedWidth,
205
+ scale_y: originalHeight / uploadedHeight,
206
+ resized: originalWidth !== uploadedWidth || originalHeight !== uploadedHeight,
207
+ },
208
+ };
209
+ }
210
+
211
+ export function buildImageGeometrySystemMessage(imageGeometry: ImageGeometry): string {
212
+ return [
213
+ "If you return coordinates or bounding boxes, express them in the original image pixel space.",
214
+ `Original image size: ${imageGeometry.original_width}x${imageGeometry.original_height}.`,
215
+ `Uploaded image size: ${imageGeometry.uploaded_width}x${imageGeometry.uploaded_height}.`,
216
+ `Scale factors from uploaded to original: x=${imageGeometry.scale_x}, y=${imageGeometry.scale_y}.`,
217
+ "Do not return coordinates in resized-image pixels.",
218
+ ].join(" ");
219
+ }
220
+
221
+ export function parseImageUnderstandingText(rawText: string): ImageAnalysis {
222
+ const trimmed = rawText.trim();
223
+ const parsedJson = parseEmbeddedJson(trimmed);
224
+ if (parsedJson) {
225
+ return fromJsonAnalysis(parsedJson, trimmed);
226
+ }
227
+
228
+ const answer = trimmed || "No textual response returned.";
229
+ const lines = answer.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
230
+ return {
231
+ answer,
232
+ ocr_text: extractKeyValue(lines, "ocr_text") ?? extractKeyValue(lines, "ocr") ?? "",
233
+ objects: splitList(extractKeyValue(lines, "objects") ?? ""),
234
+ scene: extractKeyValue(lines, "scene") ?? "",
235
+ notable_details: splitList(extractKeyValue(lines, "notable_details") ?? extractKeyValue(lines, "notable details") ?? ""),
236
+ safety_notes: splitList(extractKeyValue(lines, "safety_notes") ?? extractKeyValue(lines, "safety notes") ?? ""),
237
+ };
238
+ }
239
+
240
+ async function resolveVisionTextModel(paths: StoragePaths, requestedModel?: string): Promise<string | null> {
241
+ if (requestedModel) {
242
+ return requestedModel;
243
+ }
244
+
245
+ const smart = await selectPoolCandidates(
246
+ paths,
247
+ "smart",
248
+ {
249
+ requiredInput: ["text", "image"],
250
+ requiredOutput: ["text"],
251
+ },
252
+ {
253
+ operation: "chat_completions",
254
+ stream: false,
255
+ }
256
+ );
257
+ if (smart && smart.candidates.length > 0) {
258
+ return "smart";
259
+ }
260
+
261
+ return pickBestProviderModelByCapabilities(
262
+ paths,
263
+ { requiredInput: ["text", "image"], requiredOutput: ["text"] },
264
+ "llm"
265
+ );
266
+ }
267
+
268
+ async function readBody(response: {
269
+ statusCode?: number;
270
+ body: NodeJS.ReadableStream;
271
+ headers: Record<string, string | string[]>;
272
+ }): Promise<{
273
+ payload: unknown;
274
+ usage: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number } | null;
275
+ }> {
276
+ const chunks: Buffer[] = [];
277
+ for await (const chunk of response.body) {
278
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
279
+ }
280
+ const buffer = Buffer.concat(chunks);
281
+ const contentType = normalizeContentType(response.headers);
282
+ const rawText = buffer.toString("utf8");
283
+ let payload: {
284
+ usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
285
+ };
286
+ try {
287
+ payload = JSON.parse(rawText);
288
+ } catch {
289
+ const status = typeof response.statusCode === "number" ? response.statusCode : 0;
290
+ const snippet = summarizeBodySnippet(rawText);
291
+ throw typedError(
292
+ "upstream_error",
293
+ `Expected JSON from chat completion. status=${status} content-type=${contentType || "unknown"} body=${snippet}`
294
+ );
295
+ }
296
+ return { payload, usage: payload.usage ?? null };
297
+ }
298
+
299
+ function extractAssistantText(payload: unknown): string {
300
+ if (!payload || typeof payload !== "object") {
301
+ return "";
302
+ }
303
+ const choices = (payload as { choices?: unknown }).choices;
304
+ if (!Array.isArray(choices) || choices.length === 0) {
305
+ return "";
306
+ }
307
+ const first = choices[0] as { message?: { content?: unknown } };
308
+ const content = first?.message?.content;
309
+ if (typeof content === "string") {
310
+ return content;
311
+ }
312
+ if (Array.isArray(content)) {
313
+ const texts = content
314
+ .map((part) => {
315
+ if (!part || typeof part !== "object") return "";
316
+ const type = (part as { type?: unknown }).type;
317
+ if (type !== "text") return "";
318
+ return (part as { text?: string }).text ?? "";
319
+ })
320
+ .filter(Boolean);
321
+ return texts.join("\n").trim();
322
+ }
323
+ return "";
324
+ }
325
+
326
+ function parseEmbeddedJson(text: string): Record<string, unknown> | null {
327
+ const fencedMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
328
+ const candidate = fencedMatch ? fencedMatch[1] : text;
329
+ try {
330
+ const parsed = JSON.parse(candidate);
331
+ return parsed && typeof parsed === "object" ? (parsed as Record<string, unknown>) : null;
332
+ } catch {
333
+ return null;
334
+ }
335
+ }
336
+
337
+ function fromJsonAnalysis(json: Record<string, unknown>, fallbackRaw: string): ImageAnalysis {
338
+ const analysis = (json.analysis && typeof json.analysis === "object"
339
+ ? (json.analysis as Record<string, unknown>)
340
+ : json) as Record<string, unknown>;
341
+
342
+ const answer =
343
+ asString(analysis.answer) ??
344
+ asString(json.answer) ??
345
+ asString(json.raw_text) ??
346
+ asString(fallbackRaw) ??
347
+ "No textual response returned.";
348
+ return {
349
+ answer,
350
+ ocr_text: asString(analysis.ocr_text) ?? "",
351
+ objects: asStringArray(analysis.objects),
352
+ scene: asString(analysis.scene) ?? "",
353
+ notable_details: asStringArray(analysis.notable_details),
354
+ safety_notes: asStringArray(analysis.safety_notes),
355
+ };
356
+ }
357
+
358
+ function asString(value: unknown): string | null {
359
+ if (typeof value === "string") {
360
+ const trimmed = value.trim();
361
+ return trimmed.length > 0 ? trimmed : null;
362
+ }
363
+ return null;
364
+ }
365
+
366
+ function asStringArray(value: unknown): string[] {
367
+ if (!Array.isArray(value)) {
368
+ return [];
369
+ }
370
+ return value
371
+ .map((entry) => (typeof entry === "string" ? entry.trim() : ""))
372
+ .filter(Boolean);
373
+ }
374
+
375
+ function splitList(value: string): string[] {
376
+ if (!value) return [];
377
+ return value
378
+ .split(/[,\n]/)
379
+ .map((item) => item.replace(/^[-*]\s*/, "").trim())
380
+ .filter(Boolean);
381
+ }
382
+
383
+ function extractKeyValue(lines: string[], key: string): string | null {
384
+ const pattern = new RegExp(`^${escapeRegExp(key)}\\s*:\\s*(.+)$`, "i");
385
+ for (const line of lines) {
386
+ const match = line.match(pattern);
387
+ if (match) {
388
+ return match[1].trim();
389
+ }
390
+ }
391
+ return null;
392
+ }
393
+
394
+ function escapeRegExp(value: string): string {
395
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
396
+ }
397
+
398
+ function isValidImageUrl(url: string): boolean {
399
+ if (url.startsWith("data:")) {
400
+ return /^data:[^;]+;base64,/i.test(url);
401
+ }
402
+ try {
403
+ const parsed = new URL(url);
404
+ return parsed.protocol === "http:" || parsed.protocol === "https:";
405
+ } catch {
406
+ return false;
407
+ }
408
+ }
409
+
410
+ function normalizeContentType(headers: Record<string, string | string[]>): string {
411
+ const ct = headers["content-type"] ?? headers["Content-Type"];
412
+ if (Array.isArray(ct)) return ct.join(", ");
413
+ return ct ?? "";
414
+ }
415
+
416
+ function summarizeBodySnippet(body: string): string {
417
+ const trimmed = body.replace(/\s+/g, " ").trim();
418
+ if (!trimmed) {
419
+ return "<empty>";
420
+ }
421
+ const max = 1024;
422
+ if (trimmed.length <= max) {
423
+ return trimmed;
424
+ }
425
+ return `${trimmed.slice(0, max)}…`;
426
+ }
427
+
428
+ function mimeFromExt(filePath: string): string {
429
+ const ext = path.extname(filePath).slice(1).toLowerCase();
430
+ const map: Record<string, string> = {
431
+ png: "image/png",
432
+ jpg: "image/jpeg",
433
+ jpeg: "image/jpeg",
434
+ gif: "image/gif",
435
+ webp: "image/webp",
436
+ bmp: "image/bmp",
437
+ svg: "image/svg+xml",
438
+ tif: "image/tiff",
439
+ tiff: "image/tiff",
440
+ };
441
+ return map[ext] ?? "application/octet-stream";
442
+ }
443
+
444
+ function typedError(type: string, message: string): Error & { type: string; retryable: boolean } {
445
+ const error = new Error(message) as Error & { type: string; retryable: boolean };
446
+ error.type = type;
447
+ error.retryable = false;
448
+ return error;
449
+ }
@@ -0,0 +1,127 @@
1
+ import { routeRequest } from "../routing/router";
2
+ import { selectPoolCandidates } from "../pools/scheduler";
3
+ import { pickBestProviderModelByCapabilities } from "../providers/modelRegistry";
4
+ import { StoragePaths } from "../storage/files";
5
+ import { VideoGenerationRequest } from "../types";
6
+
7
+ export interface VideoGenerationRunResult {
8
+ model: string;
9
+ statusCode: number;
10
+ headers: Record<string, string | string[]>;
11
+ payload: unknown;
12
+ route: {
13
+ endpointId: string;
14
+ endpointName: string;
15
+ upstreamModel: string;
16
+ };
17
+ }
18
+
19
+ export async function resolveVideoGenerationModel(
20
+ paths: StoragePaths,
21
+ requestedModel?: string
22
+ ): Promise<string | null> {
23
+ if (requestedModel) {
24
+ return requestedModel;
25
+ }
26
+ return pickDefaultVideoModel(paths);
27
+ }
28
+
29
+ export async function runVideoGeneration(
30
+ paths: StoragePaths,
31
+ request: VideoGenerationRequest,
32
+ headers: Record<string, string | string[] | undefined>,
33
+ signal: AbortSignal
34
+ ): Promise<VideoGenerationRunResult> {
35
+ const model = await resolveVideoGenerationModel(paths, request.model);
36
+ if (!model) {
37
+ const error = new Error("No video generation model available. Add or enable a provider model.") as Error & {
38
+ type: string;
39
+ retryable: boolean;
40
+ };
41
+ error.type = "no_video_model";
42
+ error.retryable = false;
43
+ throw error;
44
+ }
45
+
46
+ const outcome = await routeRequest(
47
+ paths,
48
+ model,
49
+ "/v1/videos/generations",
50
+ { ...request, model } as Record<string, unknown>,
51
+ headers,
52
+ signal,
53
+ {
54
+ endpointType: "video",
55
+ requiredInput: ["text"],
56
+ requiredOutput: ["video"],
57
+ }
58
+ );
59
+
60
+ const body = await readBody(outcome.attempt.response);
61
+
62
+ return {
63
+ model,
64
+ statusCode: outcome.attempt.response.statusCode,
65
+ headers: outcome.attempt.response.headers,
66
+ payload: body.payload,
67
+ route: {
68
+ endpointId: outcome.attempt.endpoint.id,
69
+ endpointName: outcome.attempt.endpoint.name,
70
+ upstreamModel: outcome.attempt.upstreamModel,
71
+ },
72
+ };
73
+ }
74
+
75
+ async function pickDefaultVideoModel(paths: StoragePaths): Promise<string | null> {
76
+ const smart = await selectPoolCandidates(
77
+ paths,
78
+ "smart",
79
+ {
80
+ requiredInput: ["text"],
81
+ requiredOutput: ["video"],
82
+ },
83
+ {
84
+ operation: "video_generations",
85
+ stream: false,
86
+ }
87
+ );
88
+ if (smart && smart.candidates.length > 0) {
89
+ return "smart";
90
+ }
91
+
92
+ const byCapabilities = await pickBestProviderModelByCapabilities(
93
+ paths,
94
+ { requiredInput: ["text"], requiredOutput: ["video"] },
95
+ "video"
96
+ );
97
+ if (byCapabilities) {
98
+ return byCapabilities;
99
+ }
100
+ return null;
101
+ }
102
+
103
+ async function readBody(response: {
104
+ body: NodeJS.ReadableStream;
105
+ headers: Record<string, string | string[]>;
106
+ }): Promise<{ payload: unknown }> {
107
+ const chunks: Buffer[] = [];
108
+ for await (const chunk of response.body) {
109
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
110
+ }
111
+ const buffer = Buffer.concat(chunks);
112
+ const contentType = normalizeContentType(response.headers);
113
+ if (contentType.includes("application/json")) {
114
+ try {
115
+ return { payload: JSON.parse(buffer.toString("utf8")) };
116
+ } catch {
117
+ return { payload: buffer };
118
+ }
119
+ }
120
+ return { payload: buffer };
121
+ }
122
+
123
+ function normalizeContentType(headers: Record<string, string | string[]>): string {
124
+ const ct = headers["content-type"] ?? headers["Content-Type"];
125
+ if (Array.isArray(ct)) return ct.join(", ");
126
+ return ct ?? "";
127
+ }