ruflo 3.10.35 → 3.10.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (498) hide show
  1. package/README.md +416 -416
  2. package/bin/ruflo.js +77 -77
  3. package/package.json +113 -113
  4. package/src/chat-ui/Dockerfile +25 -25
  5. package/src/chat-ui/patch-mcp-url-safety.sh +28 -28
  6. package/src/config/config.example.json +76 -76
  7. package/src/mcp-bridge/Dockerfile +45 -45
  8. package/src/mcp-bridge/index.js +1692 -1692
  9. package/src/mcp-bridge/mcp-stdio-kernel.js +159 -159
  10. package/src/mcp-bridge/package.json +17 -17
  11. package/src/mcp-bridge/test-harness.js +470 -470
  12. package/src/nginx/Dockerfile +10 -10
  13. package/src/nginx/nginx.conf +67 -67
  14. package/src/nginx/static/favicon-dark.svg +4 -4
  15. package/src/nginx/static/favicon.svg +4 -4
  16. package/src/nginx/static/icon.svg +5 -5
  17. package/src/nginx/static/logo.svg +9 -9
  18. package/src/nginx/static/manifest.json +22 -22
  19. package/src/nginx/static/welcome.js +184 -184
  20. package/src/ruvocal/.claude/skills/add-model-descriptions/SKILL.md +73 -73
  21. package/src/ruvocal/.devcontainer/Dockerfile +9 -9
  22. package/src/ruvocal/.devcontainer/devcontainer.json +36 -36
  23. package/src/ruvocal/.dockerignore +16 -16
  24. package/src/ruvocal/.eslintignore +13 -13
  25. package/src/ruvocal/.eslintrc.cjs +45 -45
  26. package/src/ruvocal/.gcloudignore +18 -18
  27. package/src/ruvocal/.github/ISSUE_TEMPLATE/bug-report--chat-ui-.md +43 -43
  28. package/src/ruvocal/.github/ISSUE_TEMPLATE/config-support.md +9 -9
  29. package/src/ruvocal/.github/ISSUE_TEMPLATE/feature-request--chat-ui-.md +17 -17
  30. package/src/ruvocal/.github/ISSUE_TEMPLATE/huggingchat.md +11 -11
  31. package/src/ruvocal/.github/release.yml +16 -16
  32. package/src/ruvocal/.github/workflows/build-docs.yml +18 -18
  33. package/src/ruvocal/.github/workflows/build-image.yml +142 -142
  34. package/src/ruvocal/.github/workflows/build-pr-docs.yml +20 -20
  35. package/src/ruvocal/.github/workflows/deploy-dev.yml +63 -63
  36. package/src/ruvocal/.github/workflows/deploy-prod.yml +78 -78
  37. package/src/ruvocal/.github/workflows/lint-and-test.yml +84 -84
  38. package/src/ruvocal/.github/workflows/slugify.yaml +72 -72
  39. package/src/ruvocal/.github/workflows/trufflehog.yml +17 -17
  40. package/src/ruvocal/.github/workflows/upload-pr-documentation.yml +16 -16
  41. package/src/ruvocal/.husky/lint-stage-config.js +4 -4
  42. package/src/ruvocal/.husky/pre-commit +2 -2
  43. package/src/ruvocal/.prettierignore +14 -14
  44. package/src/ruvocal/.prettierrc +7 -7
  45. package/src/ruvocal/CLAUDE.md +126 -126
  46. package/src/ruvocal/Dockerfile +96 -96
  47. package/src/ruvocal/LICENSE +202 -202
  48. package/src/ruvocal/PRIVACY.md +41 -41
  49. package/src/ruvocal/README.md +164 -164
  50. package/src/ruvocal/chart/Chart.yaml +5 -5
  51. package/src/ruvocal/chart/env/dev.yaml +260 -260
  52. package/src/ruvocal/chart/env/prod.yaml +273 -273
  53. package/src/ruvocal/chart/templates/_helpers.tpl +22 -22
  54. package/src/ruvocal/chart/templates/config.yaml +10 -10
  55. package/src/ruvocal/chart/templates/deployment.yaml +81 -81
  56. package/src/ruvocal/chart/templates/hpa.yaml +45 -45
  57. package/src/ruvocal/chart/templates/infisical.yaml +24 -24
  58. package/src/ruvocal/chart/templates/ingress-internal.yaml +32 -32
  59. package/src/ruvocal/chart/templates/ingress.yaml +32 -32
  60. package/src/ruvocal/chart/templates/network-policy.yaml +36 -36
  61. package/src/ruvocal/chart/templates/service-account.yaml +13 -13
  62. package/src/ruvocal/chart/templates/service-monitor.yaml +17 -17
  63. package/src/ruvocal/chart/templates/service.yaml +21 -21
  64. package/src/ruvocal/chart/values.yaml +73 -73
  65. package/src/ruvocal/cloudbuild.yaml +68 -68
  66. package/src/ruvocal/config/branding.env.example +19 -19
  67. package/src/ruvocal/docker-compose.yml +21 -21
  68. package/src/ruvocal/docs/adr/ADR-029-HUGGINGFACE-CHAT-UI-CLOUD-RUN.md +1236 -1236
  69. package/src/ruvocal/docs/adr/ADR-033-RUVECTOR-RUFLO-MCP-INTEGRATION.md +111 -111
  70. package/src/ruvocal/docs/adr/ADR-034-OPTIONAL-MCP-BACKENDS.md +117 -117
  71. package/src/ruvocal/docs/adr/ADR-035-MCP-TOOL-GROUPS.md +186 -186
  72. package/src/ruvocal/docs/adr/ADR-037-AUTOPILOT-CHAT-MODE.md +1500 -1500
  73. package/src/ruvocal/docs/adr/ADR-038-RUVOCAL-FORK.md +286 -286
  74. package/src/ruvocal/docs/source/_toctree.yml +30 -30
  75. package/src/ruvocal/docs/source/configuration/common-issues.md +38 -38
  76. package/src/ruvocal/docs/source/configuration/llm-router.md +105 -105
  77. package/src/ruvocal/docs/source/configuration/mcp-tools.md +84 -84
  78. package/src/ruvocal/docs/source/configuration/metrics.md +9 -9
  79. package/src/ruvocal/docs/source/configuration/open-id.md +57 -57
  80. package/src/ruvocal/docs/source/configuration/overview.md +89 -89
  81. package/src/ruvocal/docs/source/configuration/theming.md +20 -20
  82. package/src/ruvocal/docs/source/developing/architecture.md +48 -48
  83. package/src/ruvocal/docs/source/index.md +53 -53
  84. package/src/ruvocal/docs/source/installation/docker.md +43 -43
  85. package/src/ruvocal/docs/source/installation/helm.md +43 -43
  86. package/src/ruvocal/docs/source/installation/local.md +62 -62
  87. package/src/ruvocal/entrypoint.sh +18 -18
  88. package/src/ruvocal/mcp-bridge/Dockerfile +45 -45
  89. package/src/ruvocal/mcp-bridge/cloudbuild.yaml +49 -49
  90. package/src/ruvocal/mcp-bridge/index.js +1902 -1902
  91. package/src/ruvocal/mcp-bridge/mcp-stdio-kernel.js +159 -159
  92. package/src/ruvocal/mcp-bridge/package-lock.json +762 -762
  93. package/src/ruvocal/mcp-bridge/package.json +17 -17
  94. package/src/ruvocal/mcp-bridge/test-harness.js +470 -470
  95. package/src/ruvocal/package-lock.json +11741 -11741
  96. package/src/ruvocal/package.json +121 -121
  97. package/src/ruvocal/postcss.config.js +6 -6
  98. package/src/ruvocal/rvf.manifest.json +204 -204
  99. package/src/ruvocal/scripts/config.ts +64 -64
  100. package/src/ruvocal/scripts/generate-welcome.mjs +181 -181
  101. package/src/ruvocal/scripts/populate.ts +288 -288
  102. package/src/ruvocal/scripts/samples.txt +194 -194
  103. package/src/ruvocal/scripts/setups/vitest-setup-server.ts +44 -44
  104. package/src/ruvocal/scripts/updateLocalEnv.ts +48 -48
  105. package/src/ruvocal/src/ambient.d.ts +7 -7
  106. package/src/ruvocal/src/app.d.ts +29 -29
  107. package/src/ruvocal/src/app.html +53 -53
  108. package/src/ruvocal/src/hooks.server.ts +32 -32
  109. package/src/ruvocal/src/hooks.ts +6 -6
  110. package/src/ruvocal/src/lib/APIClient.ts +148 -148
  111. package/src/ruvocal/src/lib/actions/clickOutside.ts +18 -18
  112. package/src/ruvocal/src/lib/actions/snapScrollToBottom.ts +346 -346
  113. package/src/ruvocal/src/lib/buildPrompt.ts +33 -33
  114. package/src/ruvocal/src/lib/components/AnnouncementBanner.svelte +20 -20
  115. package/src/ruvocal/src/lib/components/BackgroundGenerationPoller.svelte +168 -168
  116. package/src/ruvocal/src/lib/components/CodeBlock.svelte +73 -73
  117. package/src/ruvocal/src/lib/components/CopyToClipBoardBtn.svelte +92 -92
  118. package/src/ruvocal/src/lib/components/DeleteConversationModal.svelte +75 -75
  119. package/src/ruvocal/src/lib/components/EditConversationModal.svelte +100 -100
  120. package/src/ruvocal/src/lib/components/ExpandNavigation.svelte +22 -22
  121. package/src/ruvocal/src/lib/components/FoundationBackground.svelte +242 -242
  122. package/src/ruvocal/src/lib/components/HoverTooltip.svelte +44 -44
  123. package/src/ruvocal/src/lib/components/HtmlPreviewModal.svelte +143 -143
  124. package/src/ruvocal/src/lib/components/InfiniteScroll.svelte +50 -50
  125. package/src/ruvocal/src/lib/components/MobileNav.svelte +300 -300
  126. package/src/ruvocal/src/lib/components/Modal.svelte +115 -115
  127. package/src/ruvocal/src/lib/components/ModelCardMetadata.svelte +71 -71
  128. package/src/ruvocal/src/lib/components/NavConversationItem.svelte +151 -151
  129. package/src/ruvocal/src/lib/components/NavMenu.svelte +313 -313
  130. package/src/ruvocal/src/lib/components/Pagination.svelte +97 -97
  131. package/src/ruvocal/src/lib/components/PaginationArrow.svelte +27 -27
  132. package/src/ruvocal/src/lib/components/Portal.svelte +24 -24
  133. package/src/ruvocal/src/lib/components/RetryBtn.svelte +18 -18
  134. package/src/ruvocal/src/lib/components/RuFloUniverse.svelte +185 -185
  135. package/src/ruvocal/src/lib/components/RufloHelpModal.svelte +411 -411
  136. package/src/ruvocal/src/lib/components/ScrollToBottomBtn.svelte +47 -47
  137. package/src/ruvocal/src/lib/components/ScrollToPreviousBtn.svelte +77 -77
  138. package/src/ruvocal/src/lib/components/ShareConversationModal.svelte +182 -182
  139. package/src/ruvocal/src/lib/components/StopGeneratingBtn.svelte +69 -69
  140. package/src/ruvocal/src/lib/components/SubscribeModal.svelte +87 -87
  141. package/src/ruvocal/src/lib/components/Switch.svelte +36 -36
  142. package/src/ruvocal/src/lib/components/SystemPromptModal.svelte +44 -44
  143. package/src/ruvocal/src/lib/components/Toast.svelte +27 -27
  144. package/src/ruvocal/src/lib/components/Tooltip.svelte +30 -30
  145. package/src/ruvocal/src/lib/components/WelcomeModal.svelte +46 -46
  146. package/src/ruvocal/src/lib/components/chat/Alternatives.svelte +77 -77
  147. package/src/ruvocal/src/lib/components/chat/BlockWrapper.svelte +72 -72
  148. package/src/ruvocal/src/lib/components/chat/ChatInput.svelte +490 -490
  149. package/src/ruvocal/src/lib/components/chat/ChatIntroduction.svelte +123 -123
  150. package/src/ruvocal/src/lib/components/chat/ChatMessage.svelte +548 -548
  151. package/src/ruvocal/src/lib/components/chat/ChatWindow.svelte +1057 -1057
  152. package/src/ruvocal/src/lib/components/chat/FileDropzone.svelte +92 -92
  153. package/src/ruvocal/src/lib/components/chat/ImageLightbox.svelte +66 -66
  154. package/src/ruvocal/src/lib/components/chat/MarkdownBlock.svelte +23 -23
  155. package/src/ruvocal/src/lib/components/chat/MarkdownRenderer.svelte +69 -69
  156. package/src/ruvocal/src/lib/components/chat/MarkdownRenderer.svelte.test.ts +58 -58
  157. package/src/ruvocal/src/lib/components/chat/MessageAvatar.svelte +103 -103
  158. package/src/ruvocal/src/lib/components/chat/ModelSwitch.svelte +64 -64
  159. package/src/ruvocal/src/lib/components/chat/OpenReasoningResults.svelte +81 -81
  160. package/src/ruvocal/src/lib/components/chat/TaskGroup.svelte +88 -88
  161. package/src/ruvocal/src/lib/components/chat/ToolUpdate.svelte +273 -273
  162. package/src/ruvocal/src/lib/components/chat/UploadedFile.svelte +253 -253
  163. package/src/ruvocal/src/lib/components/chat/UrlFetchModal.svelte +203 -203
  164. package/src/ruvocal/src/lib/components/chat/VoiceRecorder.svelte +214 -214
  165. package/src/ruvocal/src/lib/components/icons/IconBurger.svelte +20 -20
  166. package/src/ruvocal/src/lib/components/icons/IconCheap.svelte +20 -20
  167. package/src/ruvocal/src/lib/components/icons/IconChevron.svelte +24 -24
  168. package/src/ruvocal/src/lib/components/icons/IconDazzled.svelte +40 -40
  169. package/src/ruvocal/src/lib/components/icons/IconFast.svelte +20 -20
  170. package/src/ruvocal/src/lib/components/icons/IconLoading.svelte +22 -22
  171. package/src/ruvocal/src/lib/components/icons/IconMCP.svelte +28 -28
  172. package/src/ruvocal/src/lib/components/icons/IconMoon.svelte +21 -21
  173. package/src/ruvocal/src/lib/components/icons/IconNew.svelte +20 -20
  174. package/src/ruvocal/src/lib/components/icons/IconOmni.svelte +90 -90
  175. package/src/ruvocal/src/lib/components/icons/IconPaperclip.svelte +24 -24
  176. package/src/ruvocal/src/lib/components/icons/IconPro.svelte +37 -37
  177. package/src/ruvocal/src/lib/components/icons/IconShare.svelte +21 -21
  178. package/src/ruvocal/src/lib/components/icons/IconSun.svelte +93 -93
  179. package/src/ruvocal/src/lib/components/icons/Logo.svelte +68 -68
  180. package/src/ruvocal/src/lib/components/icons/LogoHuggingFaceBorderless.svelte +54 -54
  181. package/src/ruvocal/src/lib/components/mcp/AddServerForm.svelte +250 -250
  182. package/src/ruvocal/src/lib/components/mcp/MCPServerManager.svelte +185 -185
  183. package/src/ruvocal/src/lib/components/mcp/ServerCard.svelte +203 -203
  184. package/src/ruvocal/src/lib/components/players/AudioPlayer.svelte +82 -82
  185. package/src/ruvocal/src/lib/components/voice/AudioWaveform.svelte +96 -96
  186. package/src/ruvocal/src/lib/components/wasm/GalleryPanel.svelte +357 -357
  187. package/src/ruvocal/src/lib/constants/mcpExamples.ts +114 -114
  188. package/src/ruvocal/src/lib/constants/mime.ts +11 -11
  189. package/src/ruvocal/src/lib/constants/pagination.ts +1 -1
  190. package/src/ruvocal/src/lib/constants/publicSepToken.ts +1 -1
  191. package/src/ruvocal/src/lib/constants/routerExamples.ts +133 -133
  192. package/src/ruvocal/src/lib/constants/rvagentPresets.ts +206 -206
  193. package/src/ruvocal/src/lib/createShareLink.ts +27 -27
  194. package/src/ruvocal/src/lib/jobs/refresh-conversation-stats.ts +297 -297
  195. package/src/ruvocal/src/lib/migrations/lock.ts +56 -56
  196. package/src/ruvocal/src/lib/migrations/migrations.spec.ts +74 -74
  197. package/src/ruvocal/src/lib/migrations/migrations.ts +109 -109
  198. package/src/ruvocal/src/lib/migrations/routines/01-update-search-assistants.ts +50 -50
  199. package/src/ruvocal/src/lib/migrations/routines/02-update-assistants-models.ts +48 -48
  200. package/src/ruvocal/src/lib/migrations/routines/04-update-message-updates.ts +151 -151
  201. package/src/ruvocal/src/lib/migrations/routines/05-update-message-files.ts +56 -56
  202. package/src/ruvocal/src/lib/migrations/routines/06-trim-message-updates.ts +56 -56
  203. package/src/ruvocal/src/lib/migrations/routines/08-update-featured-to-review.ts +32 -32
  204. package/src/ruvocal/src/lib/migrations/routines/09-delete-empty-conversations.spec.ts +214 -214
  205. package/src/ruvocal/src/lib/migrations/routines/09-delete-empty-conversations.ts +88 -88
  206. package/src/ruvocal/src/lib/migrations/routines/10-update-reports-assistantid.ts +29 -29
  207. package/src/ruvocal/src/lib/migrations/routines/index.ts +15 -15
  208. package/src/ruvocal/src/lib/server/__tests__/conversation-stop-generating.spec.ts +103 -103
  209. package/src/ruvocal/src/lib/server/abortRegistry.ts +57 -57
  210. package/src/ruvocal/src/lib/server/abortedGenerations.ts +43 -43
  211. package/src/ruvocal/src/lib/server/adminToken.ts +62 -62
  212. package/src/ruvocal/src/lib/server/api/__tests__/conversations-id.spec.ts +296 -296
  213. package/src/ruvocal/src/lib/server/api/__tests__/conversations-message.spec.ts +216 -216
  214. package/src/ruvocal/src/lib/server/api/__tests__/conversations.spec.ts +235 -235
  215. package/src/ruvocal/src/lib/server/api/__tests__/misc.spec.ts +72 -72
  216. package/src/ruvocal/src/lib/server/api/__tests__/testHelpers.ts +86 -86
  217. package/src/ruvocal/src/lib/server/api/__tests__/user-reports.spec.ts +78 -78
  218. package/src/ruvocal/src/lib/server/api/__tests__/user.spec.ts +239 -239
  219. package/src/ruvocal/src/lib/server/api/types.ts +37 -37
  220. package/src/ruvocal/src/lib/server/api/utils/requireAuth.ts +22 -22
  221. package/src/ruvocal/src/lib/server/api/utils/resolveConversation.ts +69 -69
  222. package/src/ruvocal/src/lib/server/api/utils/resolveModel.ts +27 -27
  223. package/src/ruvocal/src/lib/server/api/utils/superjsonResponse.ts +15 -15
  224. package/src/ruvocal/src/lib/server/apiToken.ts +11 -11
  225. package/src/ruvocal/src/lib/server/auth.ts +554 -554
  226. package/src/ruvocal/src/lib/server/config.ts +187 -187
  227. package/src/ruvocal/src/lib/server/conversation.ts +83 -83
  228. package/src/ruvocal/src/lib/server/database/__tests__/rvf.spec.ts +709 -709
  229. package/src/ruvocal/src/lib/server/database/postgres.ts +700 -700
  230. package/src/ruvocal/src/lib/server/database/rvf.ts +1078 -1078
  231. package/src/ruvocal/src/lib/server/database.ts +145 -145
  232. package/src/ruvocal/src/lib/server/endpoints/document.ts +68 -68
  233. package/src/ruvocal/src/lib/server/endpoints/endpoints.ts +43 -43
  234. package/src/ruvocal/src/lib/server/endpoints/images.ts +211 -211
  235. package/src/ruvocal/src/lib/server/endpoints/openai/endpointOai.ts +266 -266
  236. package/src/ruvocal/src/lib/server/endpoints/openai/openAIChatToTextGenerationStream.ts +212 -212
  237. package/src/ruvocal/src/lib/server/endpoints/openai/openAICompletionToTextGenerationStream.ts +32 -32
  238. package/src/ruvocal/src/lib/server/endpoints/preprocessMessages.ts +61 -61
  239. package/src/ruvocal/src/lib/server/exitHandler.ts +59 -59
  240. package/src/ruvocal/src/lib/server/files/downloadFile.ts +34 -34
  241. package/src/ruvocal/src/lib/server/files/uploadFile.ts +29 -29
  242. package/src/ruvocal/src/lib/server/findRepoRoot.ts +13 -13
  243. package/src/ruvocal/src/lib/server/generateFromDefaultEndpoint.ts +46 -46
  244. package/src/ruvocal/src/lib/server/hooks/error.ts +37 -37
  245. package/src/ruvocal/src/lib/server/hooks/fetch.ts +22 -22
  246. package/src/ruvocal/src/lib/server/hooks/handle.ts +250 -250
  247. package/src/ruvocal/src/lib/server/hooks/init.ts +51 -51
  248. package/src/ruvocal/src/lib/server/isURLLocal.spec.ts +31 -31
  249. package/src/ruvocal/src/lib/server/isURLLocal.ts +74 -74
  250. package/src/ruvocal/src/lib/server/logger.ts +42 -42
  251. package/src/ruvocal/src/lib/server/mcp/clientPool.spec.ts +175 -175
  252. package/src/ruvocal/src/lib/server/mcp/hf.ts +32 -32
  253. package/src/ruvocal/src/lib/server/mcp/httpClient.ts +122 -122
  254. package/src/ruvocal/src/lib/server/mcp/registry.ts +76 -76
  255. package/src/ruvocal/src/lib/server/mcp/tools.ts +196 -196
  256. package/src/ruvocal/src/lib/server/metrics.ts +255 -255
  257. package/src/ruvocal/src/lib/server/models.ts +518 -518
  258. package/src/ruvocal/src/lib/server/requestContext.ts +55 -55
  259. package/src/ruvocal/src/lib/server/router/arch.ts +230 -230
  260. package/src/ruvocal/src/lib/server/router/endpoint.ts +316 -316
  261. package/src/ruvocal/src/lib/server/router/multimodal.ts +28 -28
  262. package/src/ruvocal/src/lib/server/router/policy.ts +49 -49
  263. package/src/ruvocal/src/lib/server/router/toolsRoute.ts +51 -51
  264. package/src/ruvocal/src/lib/server/router/types.ts +21 -21
  265. package/src/ruvocal/src/lib/server/sendSlack.ts +23 -23
  266. package/src/ruvocal/src/lib/server/textGeneration/generate.ts +258 -258
  267. package/src/ruvocal/src/lib/server/textGeneration/index.ts +96 -96
  268. package/src/ruvocal/src/lib/server/textGeneration/mcp/fileRefs.ts +155 -155
  269. package/src/ruvocal/src/lib/server/textGeneration/mcp/routerResolution.ts +108 -108
  270. package/src/ruvocal/src/lib/server/textGeneration/mcp/runMcpFlow.ts +831 -831
  271. package/src/ruvocal/src/lib/server/textGeneration/mcp/toolInvocation.ts +349 -349
  272. package/src/ruvocal/src/lib/server/textGeneration/mcp/wasmTools.test.ts +633 -633
  273. package/src/ruvocal/src/lib/server/textGeneration/reasoning.ts +23 -23
  274. package/src/ruvocal/src/lib/server/textGeneration/title.ts +83 -83
  275. package/src/ruvocal/src/lib/server/textGeneration/types.ts +28 -28
  276. package/src/ruvocal/src/lib/server/textGeneration/utils/prepareFiles.ts +88 -88
  277. package/src/ruvocal/src/lib/server/textGeneration/utils/routing.ts +21 -21
  278. package/src/ruvocal/src/lib/server/textGeneration/utils/toolPrompt.ts +49 -49
  279. package/src/ruvocal/src/lib/server/urlSafety.ts +77 -77
  280. package/src/ruvocal/src/lib/server/usageLimits.ts +30 -30
  281. package/src/ruvocal/src/lib/stores/autopilotStore.svelte.ts +175 -175
  282. package/src/ruvocal/src/lib/stores/backgroundGenerations.svelte.ts +32 -32
  283. package/src/ruvocal/src/lib/stores/backgroundGenerations.ts +1 -1
  284. package/src/ruvocal/src/lib/stores/errors.ts +9 -9
  285. package/src/ruvocal/src/lib/stores/isAborted.ts +3 -3
  286. package/src/ruvocal/src/lib/stores/isPro.ts +4 -4
  287. package/src/ruvocal/src/lib/stores/loading.ts +3 -3
  288. package/src/ruvocal/src/lib/stores/mcpServers.ts +534 -534
  289. package/src/ruvocal/src/lib/stores/pendingChatInput.ts +3 -3
  290. package/src/ruvocal/src/lib/stores/pendingMessage.ts +9 -9
  291. package/src/ruvocal/src/lib/stores/settings.ts +182 -182
  292. package/src/ruvocal/src/lib/stores/shareModal.ts +13 -13
  293. package/src/ruvocal/src/lib/stores/titleUpdate.ts +8 -8
  294. package/src/ruvocal/src/lib/stores/wasmMcp.ts +472 -472
  295. package/src/ruvocal/src/lib/switchTheme.ts +124 -124
  296. package/src/ruvocal/src/lib/types/AbortedGeneration.ts +8 -8
  297. package/src/ruvocal/src/lib/types/Assistant.ts +31 -31
  298. package/src/ruvocal/src/lib/types/AssistantStats.ts +11 -11
  299. package/src/ruvocal/src/lib/types/ConfigKey.ts +4 -4
  300. package/src/ruvocal/src/lib/types/ConvSidebar.ts +9 -9
  301. package/src/ruvocal/src/lib/types/Conversation.ts +27 -27
  302. package/src/ruvocal/src/lib/types/ConversationStats.ts +13 -13
  303. package/src/ruvocal/src/lib/types/Message.ts +41 -41
  304. package/src/ruvocal/src/lib/types/MessageEvent.ts +10 -10
  305. package/src/ruvocal/src/lib/types/MessageUpdate.ts +139 -139
  306. package/src/ruvocal/src/lib/types/MigrationResult.ts +7 -7
  307. package/src/ruvocal/src/lib/types/Model.ts +23 -23
  308. package/src/ruvocal/src/lib/types/Report.ts +12 -12
  309. package/src/ruvocal/src/lib/types/Review.ts +6 -6
  310. package/src/ruvocal/src/lib/types/Semaphore.ts +19 -19
  311. package/src/ruvocal/src/lib/types/Session.ts +22 -22
  312. package/src/ruvocal/src/lib/types/Settings.ts +93 -93
  313. package/src/ruvocal/src/lib/types/SharedConversation.ts +9 -9
  314. package/src/ruvocal/src/lib/types/Template.ts +6 -6
  315. package/src/ruvocal/src/lib/types/Timestamps.ts +4 -4
  316. package/src/ruvocal/src/lib/types/TokenCache.ts +6 -6
  317. package/src/ruvocal/src/lib/types/Tool.ts +77 -77
  318. package/src/ruvocal/src/lib/types/UrlDependency.ts +5 -5
  319. package/src/ruvocal/src/lib/types/User.ts +14 -14
  320. package/src/ruvocal/src/lib/utils/PublicConfig.svelte.ts +75 -75
  321. package/src/ruvocal/src/lib/utils/auth.ts +17 -17
  322. package/src/ruvocal/src/lib/utils/chunk.ts +33 -33
  323. package/src/ruvocal/src/lib/utils/cookiesAreEnabled.ts +13 -13
  324. package/src/ruvocal/src/lib/utils/debounce.ts +17 -17
  325. package/src/ruvocal/src/lib/utils/deepestChild.ts +6 -6
  326. package/src/ruvocal/src/lib/utils/favicon.ts +21 -21
  327. package/src/ruvocal/src/lib/utils/fetchJSON.ts +23 -23
  328. package/src/ruvocal/src/lib/utils/file2base64.ts +14 -14
  329. package/src/ruvocal/src/lib/utils/formatUserCount.ts +37 -37
  330. package/src/ruvocal/src/lib/utils/generationState.spec.ts +75 -75
  331. package/src/ruvocal/src/lib/utils/generationState.ts +26 -26
  332. package/src/ruvocal/src/lib/utils/getHref.ts +41 -41
  333. package/src/ruvocal/src/lib/utils/getReturnFromGenerator.ts +7 -7
  334. package/src/ruvocal/src/lib/utils/haptics.ts +64 -64
  335. package/src/ruvocal/src/lib/utils/hashConv.ts +12 -12
  336. package/src/ruvocal/src/lib/utils/hf.ts +17 -17
  337. package/src/ruvocal/src/lib/utils/isDesktop.ts +7 -7
  338. package/src/ruvocal/src/lib/utils/isUrl.ts +8 -8
  339. package/src/ruvocal/src/lib/utils/isVirtualKeyboard.ts +16 -16
  340. package/src/ruvocal/src/lib/utils/loadAttachmentsFromUrls.ts +115 -115
  341. package/src/ruvocal/src/lib/utils/marked.spec.ts +96 -96
  342. package/src/ruvocal/src/lib/utils/marked.ts +531 -531
  343. package/src/ruvocal/src/lib/utils/mcpValidation.ts +147 -147
  344. package/src/ruvocal/src/lib/utils/mergeAsyncGenerators.ts +38 -38
  345. package/src/ruvocal/src/lib/utils/messageUpdates.spec.ts +262 -262
  346. package/src/ruvocal/src/lib/utils/messageUpdates.ts +324 -324
  347. package/src/ruvocal/src/lib/utils/mime.ts +56 -56
  348. package/src/ruvocal/src/lib/utils/models.ts +14 -14
  349. package/src/ruvocal/src/lib/utils/parseBlocks.ts +120 -120
  350. package/src/ruvocal/src/lib/utils/parseIncompleteMarkdown.ts +644 -644
  351. package/src/ruvocal/src/lib/utils/parseStringToList.ts +10 -10
  352. package/src/ruvocal/src/lib/utils/randomUuid.ts +14 -14
  353. package/src/ruvocal/src/lib/utils/searchTokens.ts +33 -33
  354. package/src/ruvocal/src/lib/utils/sha256.ts +7 -7
  355. package/src/ruvocal/src/lib/utils/stringifyError.ts +12 -12
  356. package/src/ruvocal/src/lib/utils/sum.ts +3 -3
  357. package/src/ruvocal/src/lib/utils/template.spec.ts +59 -59
  358. package/src/ruvocal/src/lib/utils/template.ts +53 -53
  359. package/src/ruvocal/src/lib/utils/timeout.ts +9 -9
  360. package/src/ruvocal/src/lib/utils/toolProgress.spec.ts +46 -46
  361. package/src/ruvocal/src/lib/utils/toolProgress.ts +11 -11
  362. package/src/ruvocal/src/lib/utils/tree/addChildren.spec.ts +102 -102
  363. package/src/ruvocal/src/lib/utils/tree/addChildren.ts +48 -48
  364. package/src/ruvocal/src/lib/utils/tree/addSibling.spec.ts +81 -81
  365. package/src/ruvocal/src/lib/utils/tree/addSibling.ts +41 -41
  366. package/src/ruvocal/src/lib/utils/tree/buildSubtree.spec.ts +110 -110
  367. package/src/ruvocal/src/lib/utils/tree/buildSubtree.ts +24 -24
  368. package/src/ruvocal/src/lib/utils/tree/convertLegacyConversation.spec.ts +31 -31
  369. package/src/ruvocal/src/lib/utils/tree/convertLegacyConversation.ts +36 -36
  370. package/src/ruvocal/src/lib/utils/tree/isMessageId.spec.ts +15 -15
  371. package/src/ruvocal/src/lib/utils/tree/isMessageId.ts +5 -5
  372. package/src/ruvocal/src/lib/utils/tree/tree.d.ts +14 -14
  373. package/src/ruvocal/src/lib/utils/tree/treeHelpers.spec.ts +167 -167
  374. package/src/ruvocal/src/lib/utils/updates.ts +39 -39
  375. package/src/ruvocal/src/lib/utils/urlParams.ts +13 -13
  376. package/src/ruvocal/src/lib/wasm/idb.ts +438 -438
  377. package/src/ruvocal/src/lib/wasm/index.ts +1213 -1213
  378. package/src/ruvocal/src/lib/wasm/tests/wasm-capabilities.test.ts +565 -565
  379. package/src/ruvocal/src/lib/wasm/wasm.worker.ts +332 -332
  380. package/src/ruvocal/src/lib/wasm/workerClient.ts +166 -166
  381. package/src/ruvocal/src/lib/workers/autopilotWorker.ts +221 -221
  382. package/src/ruvocal/src/lib/workers/detailFetchWorker.ts +100 -100
  383. package/src/ruvocal/src/lib/workers/markdownWorker.ts +61 -61
  384. package/src/ruvocal/src/routes/+error.svelte +20 -20
  385. package/src/ruvocal/src/routes/+layout.svelte +324 -324
  386. package/src/ruvocal/src/routes/+layout.ts +91 -91
  387. package/src/ruvocal/src/routes/+page.svelte +168 -168
  388. package/src/ruvocal/src/routes/.well-known/oauth-cimd/+server.ts +37 -37
  389. package/src/ruvocal/src/routes/__debug/openai/+server.ts +21 -21
  390. package/src/ruvocal/src/routes/admin/export/+server.ts +159 -159
  391. package/src/ruvocal/src/routes/admin/stats/compute/+server.ts +16 -16
  392. package/src/ruvocal/src/routes/api/conversation/[id]/+server.ts +40 -40
  393. package/src/ruvocal/src/routes/api/conversation/[id]/message/[messageId]/+server.ts +42 -42
  394. package/src/ruvocal/src/routes/api/conversations/+server.ts +48 -48
  395. package/src/ruvocal/src/routes/api/fetch-url/+server.ts +147 -147
  396. package/src/ruvocal/src/routes/api/mcp/health/+server.ts +292 -292
  397. package/src/ruvocal/src/routes/api/mcp/servers/+server.ts +32 -32
  398. package/src/ruvocal/src/routes/api/models/+server.ts +25 -25
  399. package/src/ruvocal/src/routes/api/transcribe/+server.ts +104 -104
  400. package/src/ruvocal/src/routes/api/user/+server.ts +15 -15
  401. package/src/ruvocal/src/routes/api/user/validate-token/+server.ts +20 -20
  402. package/src/ruvocal/src/routes/api/v2/conversations/+server.ts +48 -48
  403. package/src/ruvocal/src/routes/api/v2/conversations/[id]/+server.ts +94 -94
  404. package/src/ruvocal/src/routes/api/v2/conversations/[id]/message/[messageId]/+server.ts +43 -43
  405. package/src/ruvocal/src/routes/api/v2/conversations/import-share/+server.ts +23 -23
  406. package/src/ruvocal/src/routes/api/v2/debug/config/+server.ts +16 -16
  407. package/src/ruvocal/src/routes/api/v2/debug/refresh/+server.ts +30 -30
  408. package/src/ruvocal/src/routes/api/v2/export/+server.ts +196 -196
  409. package/src/ruvocal/src/routes/api/v2/feature-flags/+server.ts +14 -14
  410. package/src/ruvocal/src/routes/api/v2/models/+server.ts +38 -38
  411. package/src/ruvocal/src/routes/api/v2/models/[namespace]/+server.ts +8 -8
  412. package/src/ruvocal/src/routes/api/v2/models/[namespace]/[model]/+server.ts +8 -8
  413. package/src/ruvocal/src/routes/api/v2/models/[namespace]/[model]/subscribe/+server.ts +28 -28
  414. package/src/ruvocal/src/routes/api/v2/models/[namespace]/subscribe/+server.ts +28 -28
  415. package/src/ruvocal/src/routes/api/v2/models/old/+server.ts +7 -7
  416. package/src/ruvocal/src/routes/api/v2/models/refresh/+server.ts +33 -33
  417. package/src/ruvocal/src/routes/api/v2/public-config/+server.ts +7 -7
  418. package/src/ruvocal/src/routes/api/v2/user/+server.ts +17 -17
  419. package/src/ruvocal/src/routes/api/v2/user/billing-orgs/+server.ts +73 -73
  420. package/src/ruvocal/src/routes/api/v2/user/reports/+server.ts +17 -17
  421. package/src/ruvocal/src/routes/api/v2/user/settings/+server.ts +110 -110
  422. package/src/ruvocal/src/routes/conversation/+server.ts +115 -115
  423. package/src/ruvocal/src/routes/conversation/[id]/+page.svelte +586 -586
  424. package/src/ruvocal/src/routes/conversation/[id]/+page.ts +60 -60
  425. package/src/ruvocal/src/routes/conversation/[id]/+server.ts +740 -740
  426. package/src/ruvocal/src/routes/conversation/[id]/message/[messageId]/prompt/+server.ts +66 -66
  427. package/src/ruvocal/src/routes/conversation/[id]/share/+server.ts +69 -69
  428. package/src/ruvocal/src/routes/conversation/[id]/stop-generating/+server.ts +35 -35
  429. package/src/ruvocal/src/routes/healthcheck/+server.ts +3 -3
  430. package/src/ruvocal/src/routes/login/+server.ts +5 -5
  431. package/src/ruvocal/src/routes/login/callback/+server.ts +103 -103
  432. package/src/ruvocal/src/routes/login/callback/updateUser.spec.ts +157 -157
  433. package/src/ruvocal/src/routes/login/callback/updateUser.ts +215 -215
  434. package/src/ruvocal/src/routes/logout/+server.ts +18 -18
  435. package/src/ruvocal/src/routes/metrics/+server.ts +18 -18
  436. package/src/ruvocal/src/routes/models/+page.svelte +233 -233
  437. package/src/ruvocal/src/routes/models/[...model]/+page.svelte +161 -161
  438. package/src/ruvocal/src/routes/models/[...model]/+page.ts +14 -14
  439. package/src/ruvocal/src/routes/models/[...model]/thumbnail.png/+server.ts +64 -64
  440. package/src/ruvocal/src/routes/models/[...model]/thumbnail.png/ModelThumbnail.svelte +28 -28
  441. package/src/ruvocal/src/routes/privacy/+page.svelte +11 -11
  442. package/src/ruvocal/src/routes/r/[id]/+page.ts +34 -34
  443. package/src/ruvocal/src/routes/settings/(nav)/+layout.svelte +282 -282
  444. package/src/ruvocal/src/routes/settings/(nav)/+layout.ts +1 -1
  445. package/src/ruvocal/src/routes/settings/(nav)/+server.ts +59 -59
  446. package/src/ruvocal/src/routes/settings/(nav)/[...model]/+page.svelte +464 -464
  447. package/src/ruvocal/src/routes/settings/(nav)/[...model]/+page.ts +14 -14
  448. package/src/ruvocal/src/routes/settings/(nav)/application/+page.svelte +362 -362
  449. package/src/ruvocal/src/routes/settings/+layout.svelte +40 -40
  450. package/src/ruvocal/src/styles/highlight-js.css +195 -195
  451. package/src/ruvocal/src/styles/main.css +144 -144
  452. package/src/ruvocal/static/chatui/favicon-dark.svg +3 -3
  453. package/src/ruvocal/static/chatui/favicon-dev.svg +3 -3
  454. package/src/ruvocal/static/chatui/favicon.svg +3 -3
  455. package/src/ruvocal/static/chatui/icon.svg +3 -3
  456. package/src/ruvocal/static/chatui/logo.svg +7 -7
  457. package/src/ruvocal/static/chatui/manifest.json +54 -54
  458. package/src/ruvocal/static/chatui/welcome.js +184 -184
  459. package/src/ruvocal/static/huggingchat/favicon-dark.svg +4 -4
  460. package/src/ruvocal/static/huggingchat/favicon-dev.svg +4 -4
  461. package/src/ruvocal/static/huggingchat/favicon.svg +4 -4
  462. package/src/ruvocal/static/huggingchat/fulltext-logo.svg +1 -1
  463. package/src/ruvocal/static/huggingchat/icon.svg +4 -4
  464. package/src/ruvocal/static/huggingchat/logo.svg +4 -4
  465. package/src/ruvocal/static/huggingchat/manifest.json +54 -54
  466. package/src/ruvocal/static/huggingchat/routes.chat.json +226 -226
  467. package/src/ruvocal/static/robots.txt +10 -10
  468. package/src/ruvocal/static/wasm/rvagent_wasm.js +1539 -1539
  469. package/src/ruvocal/stub/@reflink/reflink/package.json +5 -5
  470. package/src/ruvocal/svelte.config.js +53 -53
  471. package/src/ruvocal/tailwind.config.cjs +30 -30
  472. package/src/ruvocal/tsconfig.json +19 -19
  473. package/src/ruvocal/vite.config.ts +87 -87
  474. package/src/scripts/deploy.sh +116 -116
  475. package/src/scripts/generate-config.js +245 -245
  476. package/src/scripts/generate-welcome.js +187 -187
  477. package/src/scripts/package-rvf.sh +116 -116
  478. package/src/ruvocal/.claude-flow/daemon-state.json +0 -135
  479. package/src/ruvocal/.claude-flow/data/pending-insights.jsonl +0 -0
  480. package/src/ruvocal/.claude-flow/data/ranked-context.json +0 -5
  481. package/src/ruvocal/.claude-flow/logs/daemon.log +0 -31
  482. package/src/ruvocal/.claude-flow/logs/headless/audit_1777949411822_juxau0_prompt.log +0 -989
  483. package/src/ruvocal/.claude-flow/logs/headless/audit_1777949411822_juxau0_result.log +0 -67
  484. package/src/ruvocal/.claude-flow/logs/headless/audit_1777950042278_jvj5xq_prompt.log +0 -989
  485. package/src/ruvocal/.claude-flow/logs/headless/audit_1777950042278_jvj5xq_result.log +0 -93
  486. package/src/ruvocal/.claude-flow/logs/headless/optimize_1777949531823_yt5yc2_prompt.log +0 -1498
  487. package/src/ruvocal/.claude-flow/logs/headless/optimize_1777949531823_yt5yc2_result.log +0 -93
  488. package/src/ruvocal/.claude-flow/logs/headless/testgaps_1777949771821_elw1j4_prompt.log +0 -1498
  489. package/src/ruvocal/.claude-flow/logs/headless/testgaps_1777949771821_elw1j4_result.log +0 -100
  490. package/src/ruvocal/.claude-flow/metrics/codebase-map.json +0 -11
  491. package/src/ruvocal/.claude-flow/metrics/consolidation.json +0 -6
  492. package/src/ruvocal/.claude-flow/neural/stats.json +0 -6
  493. package/src/ruvocal/.claude-flow/sessions/current.json +0 -13
  494. package/src/ruvocal/.swarm/attestation.db +0 -0
  495. package/src/ruvocal/.swarm/hnsw.index +0 -0
  496. package/src/ruvocal/.swarm/hnsw.metadata.json +0 -1
  497. package/src/ruvocal/.swarm/memory.db +0 -0
  498. package/src/ruvocal/.swarm/schema.sql +0 -305
@@ -1,260 +1,260 @@
1
- image:
2
- repository: huggingface
3
- name: chat-ui
4
-
5
- #nodeSelector:
6
- # role-huggingchat: "true"
7
- #
8
- #tolerations:
9
- # - key: "huggingface.co/huggingchat"
10
- # operator: "Equal"
11
- # value: "true"
12
- # effect: "NoSchedule"
13
-
14
- serviceAccount:
15
- enabled: true
16
- create: true
17
- name: huggingchat-ephemeral
18
-
19
- ingress:
20
- enabled: false
21
-
22
- ingressInternal:
23
- enabled: true
24
- path: "/chat"
25
- annotations:
26
- external-dns.alpha.kubernetes.io/hostname: "*.chat-dev.huggingface.tech"
27
- alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
28
- alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
29
- alb.ingress.kubernetes.io/group.name: "chat-dev-internal-public"
30
- alb.ingress.kubernetes.io/load-balancer-name: "chat-dev-internal-public"
31
- alb.ingress.kubernetes.io/ssl-redirect: "443"
32
- alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
33
- alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
34
- alb.ingress.kubernetes.io/target-type: "ip"
35
- alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/bc3eb446-1c04-432c-ac6b-946a88d725da"
36
- kubernetes.io/ingress.class: "alb"
37
-
38
- envVars:
39
- TEST: "test"
40
- COUPLE_SESSION_WITH_COOKIE_NAME: "token"
41
- OPENID_SCOPES: "openid profile inference-api read-mcp read-billing"
42
- USE_USER_TOKEN: "true"
43
- MCP_FORWARD_HF_USER_TOKEN: "true"
44
- AUTOMATIC_LOGIN: "false"
45
-
46
- ADDRESS_HEADER: "X-Forwarded-For"
47
- APP_BASE: "/chat"
48
- ALLOW_IFRAME: "false"
49
- COOKIE_SAMESITE: "lax"
50
- COOKIE_SECURE: "true"
51
- EXPOSE_API: "true"
52
- METRICS_ENABLED: "true"
53
- LOG_LEVEL: "debug"
54
- NODE_LOG_STRUCTURED_DATA: "true"
55
-
56
- OPENAI_BASE_URL: "https://router.huggingface.co/v1"
57
- PUBLIC_APP_ASSETS: "huggingchat"
58
- PUBLIC_APP_NAME: "HuggingChat"
59
- PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone"
60
- PUBLIC_ORIGIN: ""
61
- PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js"
62
-
63
- TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507"
64
- LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1"
65
- LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json"
66
- LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B"
67
- LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
68
- LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
69
- LLM_ROUTER_ENABLE_MULTIMODAL: "true"
70
- LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3.5-397B-A17B"
71
- LLM_ROUTER_ENABLE_TOOLS: "true"
72
- LLM_ROUTER_TOOLS_MODEL: "moonshotai/Kimi-K2-Instruct-0905"
73
- TRANSCRIPTION_MODEL: "openai/whisper-large-v3-turbo"
74
- MCP_SERVERS: >
75
- [{"name": "Web Search (Exa)", "url": "https://mcp.exa.ai/mcp?tools=web_search_exa,get_code_context_exa,crawling_exa"}, {"name": "Hugging Face", "url": "https://hf.co/mcp?login"}]
76
- MCP_TOOL_TIMEOUT_MS: "120000"
77
- PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni"
78
- PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png"
79
- PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
80
- MODELS: >
81
- [
82
- { "id": "Qwen/Qwen3.5-122B-A10B", "description": "Multimodal MoE excelling at agentic tool use with 1M context and 201 languages." },
83
- { "id": "Qwen/Qwen3.5-35B-A3B", "description": "Compact multimodal MoE with hybrid DeltaNet, 1M context, and 201 languages." },
84
- { "id": "Qwen/Qwen3.5-27B", "description": "Dense multimodal hybrid with top-tier reasoning density and 1M context." },
85
- { "id": "Qwen/Qwen3.5-397B-A17B", "description": "Native multimodal MoE with hybrid attention, 1M context, and 201 languages.", "parameters": { "max_tokens": 32768 } },
86
- { "id": "allenai/Olmo-3.1-32B-Think", "description": "Updated Olmo Think with extended RL for stronger math, code, and instruction following." },
87
- { "id": "MiniMaxAI/MiniMax-M2.5", "description": "Frontier 230B MoE agent for top-tier coding, tool calling, and fast inference." },
88
- { "id": "zai-org/GLM-5", "description": "Flagship 745B MoE for agentic reasoning, coding, and creative writing." },
89
- { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship Qwen3 vision-language MoE for visual agents, documents, and GUI automation." },
90
- { "id": "google/gemma-3n-E4B-it", "description": "Mobile-first multimodal Gemma handling text, images, video, and audio on-device." },
91
- { "id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "description": "Hybrid Mamba-Transformer with 128K context and controllable reasoning budget." },
92
- { "id": "mistralai/Mistral-7B-Instruct-v0.2", "description": "Efficient 7B instruction model with 32K context for dialogue and coding." },
93
- { "id": "Qwen/Qwen3-Coder-Next-FP8", "description": "FP8 Qwen3-Coder-Next for efficient inference with repository-scale coding agents." },
94
- { "id": "arcee-ai/Trinity-Mini", "description": "Compact US-built MoE for multi-turn agents, tool use, and structured outputs." },
95
- { "id": "Qwen/Qwen3-Coder-Next", "description": "Ultra-sparse coding MoE for repository-scale agents with 256K context." },
96
- { "id": "moonshotai/Kimi-K2.5", "description": "Native multimodal agent with agent swarms for parallel tool orchestration." },
97
- { "id": "allenai/Molmo2-8B", "description": "Open vision-language model excelling at video understanding, pointing, and object tracking." },
98
- { "id": "zai-org/GLM-4.7-Flash", "description": "Fast GLM-4.7 variant optimized for lower latency coding and agents." },
99
- { "id": "zai-org/GLM-4.7", "description": "Flagship GLM MoE for coding, reasoning, and agentic tool use." },
100
- { "id": "zai-org/GLM-4.7-FP8", "description": "FP8 GLM-4.7 for efficient inference with strong coding." },
101
- { "id": "MiniMaxAI/MiniMax-M2.1", "description": "MoE agent model with multilingual coding and fast outputs." },
102
- { "id": "XiaomiMiMo/MiMo-V2-Flash", "description": "Fast MoE reasoning model with speculative decoding for agents." },
103
- { "id": "Qwen/Qwen3-VL-32B-Instruct", "description": "Vision-language Qwen for documents, GUI agents, and visual reasoning." },
104
- { "id": "allenai/Olmo-3.1-32B-Instruct", "description": "Fully open chat model strong at tool use and dialogue." },
105
- { "id": "zai-org/AutoGLM-Phone-9B-Multilingual", "description": "Mobile agent for multilingual Android device automation." },
106
- { "id": "utter-project/EuroLLM-22B-Instruct-2512", "description": "European multilingual model for all EU languages and translation." },
107
- { "id": "dicta-il/DictaLM-3.0-24B-Thinking", "description": "Hebrew-English reasoning model with explicit thinking traces for bilingual QA and logic." },
108
- { "id": "EssentialAI/rnj-1-instruct", "description": "8B code and STEM model rivaling larger models on agentic coding, math, and tool use." },
109
- { "id": "MiniMaxAI/MiniMax-M2", "description": "Compact MoE model tuned for fast coding, agentic workflows, and long-context chat." },
110
- { "id": "PrimeIntellect/INTELLECT-3-FP8", "description": "FP8 INTELLECT-3 variant for cheaper frontier-level math, code, and general reasoning." },
111
- { "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", "description": "Flagship Qwen3 vision-language model for high-accuracy image, text, and video reasoning." },
112
- { "id": "Qwen/Qwen3-VL-30B-A3B-Thinking", "description": "Thinking-mode Qwen3-VL that emits detailed multimodal reasoning traces for difficult problems." },
113
- { "id": "Qwen/Qwen3-VL-8B-Instruct", "description": "Smaller Qwen3 vision-language assistant for everyday multimodal chat, captioning, and analysis." },
114
- { "id": "aisingapore/Qwen-SEA-LION-v4-32B-IT", "description": "SEA-LION v4 Qwen optimized for Southeast Asian languages and regional enterprise workloads." },
115
- { "id": "allenai/Olmo-3-32B-Think", "description": "Fully open 32B thinking model excelling at stepwise math, coding, and research reasoning." },
116
- { "id": "allenai/Olmo-3-7B-Instruct", "description": "Lightweight Olmo assistant for instruction following, Q&A, and everyday open-source workflows." },
117
- { "id": "allenai/Olmo-3-7B-Think", "description": "7B Olmo reasoning model delivering transparent multi-step thinking on modest hardware." },
118
- { "id": "deepcogito/cogito-671b-v2.1", "description": "Frontier-scale 671B MoE focused on deep reasoning, math proofs, and complex coding." },
119
- { "id": "deepcogito/cogito-671b-v2.1-FP8", "description": "FP8 Cogito v2.1 making 671B-scale reasoning more affordable to serve and experiment with." },
120
- { "id": "deepseek-ai/DeepSeek-V3.2", "description": "Latest DeepSeek agent model combining strong reasoning, tool-use, and efficient long-context inference." },
121
- { "id": "moonshotai/Kimi-K2-Thinking", "description": "Reasoning-focused Kimi K2 variant for deep chain-of-thought and large agentic tool flows." },
122
- { "id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2", "description": "NVIDIA Nano 12B general assistant for coding, chat, and agents with efficient deployment." },
123
- { "id": "ServiceNow-AI/Apriel-1.6-15b-Thinker", "description": "15B multimodal reasoning model with efficient thinking for enterprise and coding tasks." },
124
- { "id": "openai/gpt-oss-safeguard-20b", "description": "Safety-focused gpt-oss variant for content classification, policy enforcement, and LLM output filtering." },
125
- { "id": "zai-org/GLM-4.5", "description": "Flagship GLM agent model unifying advanced reasoning, coding, and tool-using capabilities." },
126
- { "id": "zai-org/GLM-4.5V-FP8", "description": "FP8 vision-language GLM-4.5V for efficient multilingual visual QA, understanding, and hybrid reasoning." },
127
- { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." },
128
- { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." },
129
- { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." },
130
- { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." },
131
- { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." },
132
- { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." },
133
- { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." },
134
- { "id": "zai-org/GLM-4.6V", "description": "106B vision-language model with 128K context and native tool calling for multimodal agents.", "parameters": { "max_tokens": 8192 } },
135
- { "id": "zai-org/GLM-4.6V-Flash", "description": "9B lightweight vision model for fast local inference with tool calling and UI understanding." },
136
- { "id": "zai-org/GLM-4.6V-FP8", "description": "FP8-quantized GLM-4.6V for efficient multimodal deployment with native tool use." },
137
- { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." },
138
- { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." },
139
- { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." },
140
- { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." },
141
- { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." },
142
- { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." },
143
- { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." },
144
- { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." },
145
- { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." },
146
- { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." },
147
- { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." },
148
- { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." },
149
- { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." },
150
- { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." },
151
- { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." },
152
- { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." },
153
- { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." },
154
- { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." },
155
- { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." },
156
- { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." },
157
- { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." },
158
- { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." },
159
- { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." },
160
- { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." },
161
- { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." },
162
- { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." },
163
- { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." },
164
- { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." },
165
- { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." },
166
- { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." },
167
- { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." },
168
- { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." },
169
- { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." },
170
- { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." },
171
- { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." },
172
- { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." },
173
- { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." },
174
- { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." },
175
- { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." },
176
- { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." },
177
- { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." },
178
- { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." },
179
- { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." },
180
- { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." },
181
- { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." },
182
- { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." },
183
- { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." },
184
- { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." },
185
- { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." },
186
- { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." },
187
- { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." },
188
- { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." },
189
- { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." },
190
- { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } },
191
- { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." },
192
- { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." },
193
- { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." },
194
- { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." },
195
- { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." },
196
- { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." },
197
- { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." },
198
- { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." },
199
- { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." },
200
- { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." },
201
- { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." },
202
- { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." },
203
- { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." },
204
- { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." },
205
- { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." },
206
- { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." },
207
- { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." },
208
- { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." },
209
- { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." },
210
- { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." },
211
- { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." },
212
- { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." },
213
- { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." },
214
- { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." },
215
- { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." },
216
- { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." },
217
- { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." },
218
- { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." },
219
- { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." },
220
- { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." },
221
- { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." },
222
- { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." },
223
- { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." },
224
- { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." },
225
- { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." },
226
- { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." },
227
- { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." },
228
- { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." },
229
- { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." },
230
- { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." },
231
- { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." },
232
- { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." },
233
- { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." },
234
- { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." },
235
- { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." },
236
- { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." },
237
- { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." },
238
- { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." },
239
- { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." },
240
- { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." },
241
- { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." },
242
- { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." },
243
- { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." }
244
- ]
245
-
246
- infisical:
247
- enabled: true
248
- env: "ephemeral-us-east-1"
249
-
250
- replicas: 1
251
- autoscaling:
252
- enabled: false
253
-
254
- resources:
255
- requests:
256
- cpu: 2
257
- memory: 4Gi
258
- limits:
259
- cpu: 4
260
- memory: 8Gi
1
+ image:
2
+ repository: huggingface
3
+ name: chat-ui
4
+
5
+ #nodeSelector:
6
+ # role-huggingchat: "true"
7
+ #
8
+ #tolerations:
9
+ # - key: "huggingface.co/huggingchat"
10
+ # operator: "Equal"
11
+ # value: "true"
12
+ # effect: "NoSchedule"
13
+
14
+ serviceAccount:
15
+ enabled: true
16
+ create: true
17
+ name: huggingchat-ephemeral
18
+
19
+ ingress:
20
+ enabled: false
21
+
22
+ ingressInternal:
23
+ enabled: true
24
+ path: "/chat"
25
+ annotations:
26
+ external-dns.alpha.kubernetes.io/hostname: "*.chat-dev.huggingface.tech"
27
+ alb.ingress.kubernetes.io/healthcheck-path: "/chat/healthcheck"
28
+ alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
29
+ alb.ingress.kubernetes.io/group.name: "chat-dev-internal-public"
30
+ alb.ingress.kubernetes.io/load-balancer-name: "chat-dev-internal-public"
31
+ alb.ingress.kubernetes.io/ssl-redirect: "443"
32
+ alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
33
+ alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
34
+ alb.ingress.kubernetes.io/target-type: "ip"
35
+ alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:us-east-1:707930574880:certificate/bc3eb446-1c04-432c-ac6b-946a88d725da"
36
+ kubernetes.io/ingress.class: "alb"
37
+
38
+ envVars:
39
+ TEST: "test"
40
+ COUPLE_SESSION_WITH_COOKIE_NAME: "token"
41
+ OPENID_SCOPES: "openid profile inference-api read-mcp read-billing"
42
+ USE_USER_TOKEN: "true"
43
+ MCP_FORWARD_HF_USER_TOKEN: "true"
44
+ AUTOMATIC_LOGIN: "false"
45
+
46
+ ADDRESS_HEADER: "X-Forwarded-For"
47
+ APP_BASE: "/chat"
48
+ ALLOW_IFRAME: "false"
49
+ COOKIE_SAMESITE: "lax"
50
+ COOKIE_SECURE: "true"
51
+ EXPOSE_API: "true"
52
+ METRICS_ENABLED: "true"
53
+ LOG_LEVEL: "debug"
54
+ NODE_LOG_STRUCTURED_DATA: "true"
55
+
56
+ OPENAI_BASE_URL: "https://router.huggingface.co/v1"
57
+ PUBLIC_APP_ASSETS: "huggingchat"
58
+ PUBLIC_APP_NAME: "HuggingChat"
59
+ PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone"
60
+ PUBLIC_ORIGIN: ""
61
+ PUBLIC_PLAUSIBLE_SCRIPT_URL: "https://plausible.io/js/pa-Io_oigECawqdlgpf5qvHb.js"
62
+
63
+ TASK_MODEL: "Qwen/Qwen3-4B-Instruct-2507"
64
+ LLM_ROUTER_ARCH_BASE_URL: "https://router.huggingface.co/v1"
65
+ LLM_ROUTER_ROUTES_PATH: "build/client/chat/huggingchat/routes.chat.json"
66
+ LLM_ROUTER_ARCH_MODEL: "katanemo/Arch-Router-1.5B"
67
+ LLM_ROUTER_OTHER_ROUTE: "casual_conversation"
68
+ LLM_ROUTER_ARCH_TIMEOUT_MS: "10000"
69
+ LLM_ROUTER_ENABLE_MULTIMODAL: "true"
70
+ LLM_ROUTER_MULTIMODAL_MODEL: "Qwen/Qwen3.5-397B-A17B"
71
+ LLM_ROUTER_ENABLE_TOOLS: "true"
72
+ LLM_ROUTER_TOOLS_MODEL: "moonshotai/Kimi-K2-Instruct-0905"
73
+ TRANSCRIPTION_MODEL: "openai/whisper-large-v3-turbo"
74
+ MCP_SERVERS: >
75
+ [{"name": "Web Search (Exa)", "url": "https://mcp.exa.ai/mcp?tools=web_search_exa,get_code_context_exa,crawling_exa"}, {"name": "Hugging Face", "url": "https://hf.co/mcp?login"}]
76
+ MCP_TOOL_TIMEOUT_MS: "120000"
77
+ PUBLIC_LLM_ROUTER_DISPLAY_NAME: "Omni"
78
+ PUBLIC_LLM_ROUTER_LOGO_URL: "https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/C5V0v1xZXv6M7FXsdJH9b.png"
79
+ PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
80
+ MODELS: >
81
+ [
82
+ { "id": "Qwen/Qwen3.5-122B-A10B", "description": "Multimodal MoE excelling at agentic tool use with 1M context and 201 languages." },
83
+ { "id": "Qwen/Qwen3.5-35B-A3B", "description": "Compact multimodal MoE with hybrid DeltaNet, 1M context, and 201 languages." },
84
+ { "id": "Qwen/Qwen3.5-27B", "description": "Dense multimodal hybrid with top-tier reasoning density and 1M context." },
85
+ { "id": "Qwen/Qwen3.5-397B-A17B", "description": "Native multimodal MoE with hybrid attention, 1M context, and 201 languages.", "parameters": { "max_tokens": 32768 } },
86
+ { "id": "allenai/Olmo-3.1-32B-Think", "description": "Updated Olmo Think with extended RL for stronger math, code, and instruction following." },
87
+ { "id": "MiniMaxAI/MiniMax-M2.5", "description": "Frontier 230B MoE agent for top-tier coding, tool calling, and fast inference." },
88
+ { "id": "zai-org/GLM-5", "description": "Flagship 745B MoE for agentic reasoning, coding, and creative writing." },
89
+ { "id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "description": "Flagship Qwen3 vision-language MoE for visual agents, documents, and GUI automation." },
90
+ { "id": "google/gemma-3n-E4B-it", "description": "Mobile-first multimodal Gemma handling text, images, video, and audio on-device." },
91
+ { "id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "description": "Hybrid Mamba-Transformer with 128K context and controllable reasoning budget." },
92
+ { "id": "mistralai/Mistral-7B-Instruct-v0.2", "description": "Efficient 7B instruction model with 32K context for dialogue and coding." },
93
+ { "id": "Qwen/Qwen3-Coder-Next-FP8", "description": "FP8 Qwen3-Coder-Next for efficient inference with repository-scale coding agents." },
94
+ { "id": "arcee-ai/Trinity-Mini", "description": "Compact US-built MoE for multi-turn agents, tool use, and structured outputs." },
95
+ { "id": "Qwen/Qwen3-Coder-Next", "description": "Ultra-sparse coding MoE for repository-scale agents with 256K context." },
96
+ { "id": "moonshotai/Kimi-K2.5", "description": "Native multimodal agent with agent swarms for parallel tool orchestration." },
97
+ { "id": "allenai/Molmo2-8B", "description": "Open vision-language model excelling at video understanding, pointing, and object tracking." },
98
+ { "id": "zai-org/GLM-4.7-Flash", "description": "Fast GLM-4.7 variant optimized for lower latency coding and agents." },
99
+ { "id": "zai-org/GLM-4.7", "description": "Flagship GLM MoE for coding, reasoning, and agentic tool use." },
100
+ { "id": "zai-org/GLM-4.7-FP8", "description": "FP8 GLM-4.7 for efficient inference with strong coding." },
101
+ { "id": "MiniMaxAI/MiniMax-M2.1", "description": "MoE agent model with multilingual coding and fast outputs." },
102
+ { "id": "XiaomiMiMo/MiMo-V2-Flash", "description": "Fast MoE reasoning model with speculative decoding for agents." },
103
+ { "id": "Qwen/Qwen3-VL-32B-Instruct", "description": "Vision-language Qwen for documents, GUI agents, and visual reasoning." },
104
+ { "id": "allenai/Olmo-3.1-32B-Instruct", "description": "Fully open chat model strong at tool use and dialogue." },
105
+ { "id": "zai-org/AutoGLM-Phone-9B-Multilingual", "description": "Mobile agent for multilingual Android device automation." },
106
+ { "id": "utter-project/EuroLLM-22B-Instruct-2512", "description": "European multilingual model for all EU languages and translation." },
107
+ { "id": "dicta-il/DictaLM-3.0-24B-Thinking", "description": "Hebrew-English reasoning model with explicit thinking traces for bilingual QA and logic." },
108
+ { "id": "EssentialAI/rnj-1-instruct", "description": "8B code and STEM model rivaling larger models on agentic coding, math, and tool use." },
109
+ { "id": "MiniMaxAI/MiniMax-M2", "description": "Compact MoE model tuned for fast coding, agentic workflows, and long-context chat." },
110
+ { "id": "PrimeIntellect/INTELLECT-3-FP8", "description": "FP8 INTELLECT-3 variant for cheaper frontier-level math, code, and general reasoning." },
111
+ { "id": "Qwen/Qwen3-VL-30B-A3B-Instruct", "description": "Flagship Qwen3 vision-language model for high-accuracy image, text, and video reasoning." },
112
+ { "id": "Qwen/Qwen3-VL-30B-A3B-Thinking", "description": "Thinking-mode Qwen3-VL that emits detailed multimodal reasoning traces for difficult problems." },
113
+ { "id": "Qwen/Qwen3-VL-8B-Instruct", "description": "Smaller Qwen3 vision-language assistant for everyday multimodal chat, captioning, and analysis." },
114
+ { "id": "aisingapore/Qwen-SEA-LION-v4-32B-IT", "description": "SEA-LION v4 Qwen optimized for Southeast Asian languages and regional enterprise workloads." },
115
+ { "id": "allenai/Olmo-3-32B-Think", "description": "Fully open 32B thinking model excelling at stepwise math, coding, and research reasoning." },
116
+ { "id": "allenai/Olmo-3-7B-Instruct", "description": "Lightweight Olmo assistant for instruction following, Q&A, and everyday open-source workflows." },
117
+ { "id": "allenai/Olmo-3-7B-Think", "description": "7B Olmo reasoning model delivering transparent multi-step thinking on modest hardware." },
118
+ { "id": "deepcogito/cogito-671b-v2.1", "description": "Frontier-scale 671B MoE focused on deep reasoning, math proofs, and complex coding." },
119
+ { "id": "deepcogito/cogito-671b-v2.1-FP8", "description": "FP8 Cogito v2.1 making 671B-scale reasoning more affordable to serve and experiment with." },
120
+ { "id": "deepseek-ai/DeepSeek-V3.2", "description": "Latest DeepSeek agent model combining strong reasoning, tool-use, and efficient long-context inference." },
121
+ { "id": "moonshotai/Kimi-K2-Thinking", "description": "Reasoning-focused Kimi K2 variant for deep chain-of-thought and large agentic tool flows." },
122
+ { "id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2", "description": "NVIDIA Nano 12B general assistant for coding, chat, and agents with efficient deployment." },
123
+ { "id": "ServiceNow-AI/Apriel-1.6-15b-Thinker", "description": "15B multimodal reasoning model with efficient thinking for enterprise and coding tasks." },
124
+ { "id": "openai/gpt-oss-safeguard-20b", "description": "Safety-focused gpt-oss variant for content classification, policy enforcement, and LLM output filtering." },
125
+ { "id": "zai-org/GLM-4.5", "description": "Flagship GLM agent model unifying advanced reasoning, coding, and tool-using capabilities." },
126
+ { "id": "zai-org/GLM-4.5V-FP8", "description": "FP8 vision-language GLM-4.5V for efficient multilingual visual QA, understanding, and hybrid reasoning." },
127
+ { "id": "deepseek-ai/DeepSeek-V3.2-Exp", "description": "Experimental V3.2 release focused on faster, lower-cost inference with strong general reasoning and tool use." },
128
+ { "id": "zai-org/GLM-4.6", "description": "Next-gen GLM with very long context and solid multilingual reasoning; good for agents and tools." },
129
+ { "id": "Kwaipilot/KAT-Dev", "description": "Developer-oriented assistant tuned for coding, debugging, and lightweight agent workflows." },
130
+ { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Flagship multimodal Qwen (text+image) instruction model for high-accuracy visual reasoning and detailed explanations." },
131
+ { "id": "deepseek-ai/DeepSeek-V3.1-Terminus", "description": "Refined V3.1 variant optimized for reliability on long contexts, structured outputs, and tool use." },
132
+ { "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "description": "Deliberative multimodal Qwen that can produce step-wise visual+text reasoning traces for complex tasks." },
133
+ { "id": "zai-org/GLM-4.6-FP8", "description": "FP8-optimized GLM-4.6 for faster/cheaper deployment with near-parity quality on most tasks." },
134
+ { "id": "zai-org/GLM-4.6V", "description": "106B vision-language model with 128K context and native tool calling for multimodal agents.", "parameters": { "max_tokens": 8192 } },
135
+ { "id": "zai-org/GLM-4.6V-Flash", "description": "9B lightweight vision model for fast local inference with tool calling and UI understanding." },
136
+ { "id": "zai-org/GLM-4.6V-FP8", "description": "FP8-quantized GLM-4.6V for efficient multimodal deployment with native tool use." },
137
+ { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "description": "Deliberative text-only 235B Qwen variant for transparent, step-by-step reasoning on hard problems." },
138
+ { "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "description": "Instruction tuned Qwen for multilingual reasoning, coding, long contexts." },
139
+ { "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "description": "Thinking mode Qwen that outputs explicit step by step reasoning." },
140
+ { "id": "moonshotai/Kimi-K2-Instruct-0905", "description": "Instruction MoE strong coding and multi step reasoning, long context." },
141
+ { "id": "openai/gpt-oss-20b", "description": "Efficient open model for reasoning and tool use, runs locally." },
142
+ { "id": "swiss-ai/Apertus-8B-Instruct-2509", "description": "Open, multilingual, trained on compliant data transparent global assistant." },
143
+ { "id": "openai/gpt-oss-120b", "description": "High performing open model suitable for large scale applications." },
144
+ { "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "description": "Code specialized Qwen long context strong generation and function calling." },
145
+ { "id": "meta-llama/Llama-3.1-8B-Instruct", "description": "Instruction tuned Llama efficient conversational assistant with improved alignment." },
146
+ { "id": "Qwen/Qwen2.5-VL-7B-Instruct", "description": "Vision language Qwen handles images and text for basic multimodal tasks." },
147
+ { "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "description": "Instruction tuned Qwen reliable general tasks with long context support." },
148
+ { "id": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "description": "Baidu multimodal MoE strong at complex vision language reasoning." },
149
+ { "id": "baidu/ERNIE-4.5-0.3B-PT", "description": "Tiny efficient Baidu model surprisingly long context for lightweight chat." },
150
+ { "id": "deepseek-ai/DeepSeek-R1", "description": "MoE reasoning model excels at math, logic, coding with steps." },
151
+ { "id": "baidu/ERNIE-4.5-21B-A3B-PT", "description": "Efficient Baidu MoE competitive generation with fewer active parameters." },
152
+ { "id": "swiss-ai/Apertus-70B-Instruct-2509", "description": "Open multilingual model trained on open data transparent and capable." },
153
+ { "id": "Qwen/Qwen3-4B-Instruct-2507", "description": "Compact instruction Qwen great for lightweight assistants and apps." },
154
+ { "id": "meta-llama/Llama-3.2-3B-Instruct", "description": "Small efficient Llama for basic conversations and instructions." },
155
+ { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "description": "Huge Qwen coder repository scale understanding and advanced generation." },
156
+ { "id": "meta-llama/Meta-Llama-3-8B-Instruct", "description": "Aligned, efficient Llama dependable open source assistant tasks." },
157
+ { "id": "Qwen/Qwen3-4B-Thinking-2507", "description": "Small Qwen that emits transparent step by step reasoning." },
158
+ { "id": "moonshotai/Kimi-K2-Instruct", "description": "MoE assistant strong coding, reasoning, agentic tasks, long context." },
159
+ { "id": "zai-org/GLM-4.5V", "description": "Vision language MoE state of the art multimodal reasoning." },
160
+ { "id": "zai-org/GLM-4.6", "description": "Hybrid reasoning model top choice for intelligent agent applications." },
161
+ { "id": "deepseek-ai/DeepSeek-V3.1", "description": "Supports direct and thinking style reasoning within one model." },
162
+ { "id": "Qwen/Qwen3-8B", "description": "Efficient Qwen assistant strong multilingual skills and formatting." },
163
+ { "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "description": "Thinking mode Qwen explicit reasoning for complex interpretable tasks." },
164
+ { "id": "google/gemma-3-27b-it", "description": "Multimodal Gemma long context strong text and image understanding." },
165
+ { "id": "zai-org/GLM-4.5-Air", "description": "Efficient GLM strong reasoning and tool use at lower cost." },
166
+ { "id": "HuggingFaceTB/SmolLM3-3B", "description": "Small multilingual long context model surprisingly strong reasoning." },
167
+ { "id": "Qwen/Qwen3-30B-A3B", "description": "Qwen base model for general use or further fine tuning." },
168
+ { "id": "Qwen/Qwen2.5-7B-Instruct", "description": "Compact instruction model solid for basic conversation and tasks." },
169
+ { "id": "Qwen/Qwen3-32B", "description": "General purpose Qwen strong for complex queries and dialogues." },
170
+ { "id": "Qwen/QwQ-32B", "description": "Preview Qwen showcasing next generation features and alignment." },
171
+ { "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "description": "Flagship instruction Qwen near state of the art across domains." },
172
+ { "id": "meta-llama/Llama-3.3-70B-Instruct", "description": "Improved Llama alignment and structure powerful complex conversations." },
173
+ { "id": "Qwen/Qwen2.5-VL-32B-Instruct", "description": "Multimodal Qwen advanced visual reasoning for complex image plus text." },
174
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "description": "Tiny distilled Qwen stepwise math and logic reasoning." },
175
+ { "id": "Qwen/Qwen3-235B-A22B", "description": "Qwen base at flagship scale ideal for custom fine tuning." },
176
+ { "id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "description": "Processes text and images excels at summarization and cross modal reasoning." },
177
+ { "id": "NousResearch/Hermes-4-70B", "description": "Steerable assistant strong reasoning and creativity highly helpful." },
178
+ { "id": "Qwen/Qwen2.5-Coder-32B-Instruct", "description": "Code model strong generation and tool use bridges sizes." },
179
+ { "id": "katanemo/Arch-Router-1.5B", "description": "Lightweight router model directs queries to specialized backends." },
180
+ { "id": "meta-llama/Llama-3.2-1B-Instruct", "description": "Ultra small Llama handles basic Q and A and instructions." },
181
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "description": "Distilled Qwen excels at stepwise logic in compact footprint." },
182
+ { "id": "deepseek-ai/DeepSeek-V3", "description": "General language model direct answers strong creative and knowledge tasks." },
183
+ { "id": "deepseek-ai/DeepSeek-V3-0324", "description": "Updated V3 better reasoning and coding strong tool use." },
184
+ { "id": "CohereLabs/command-a-translate-08-2025", "description": "Translation focused Command model high quality multilingual translation." },
185
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "description": "Distilled from R1 strong reasoning standout dense model." },
186
+ { "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT", "description": "Multimodal base text image pretraining for cross modal understanding." },
187
+ { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "description": "MoE multimodal Llama rivals top vision language models." },
188
+ { "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "description": "Quantized giant coder faster lighter retains advanced code generation." },
189
+ { "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", "description": "Qwen3 variant with R1 reasoning improvements compact and capable." },
190
+ { "id": "deepseek-ai/DeepSeek-R1-0528", "description": "R1 update improved reasoning, fewer hallucinations, adds function calling.", "parameters": { "max_tokens": 32000 } },
191
+ { "id": "Qwen/Qwen3-14B", "description": "Balanced Qwen good performance and efficiency for assistants." },
192
+ { "id": "MiniMaxAI/MiniMax-M1-80k", "description": "Long context MoE very fast excels at long range reasoning and code." },
193
+ { "id": "Qwen/Qwen2.5-Coder-7B-Instruct", "description": "Efficient coding assistant for lightweight programming tasks." },
194
+ { "id": "aisingapore/Gemma-SEA-LION-v4-27B-IT", "description": "Gemma SEA LION optimized for Southeast Asian languages or enterprise." },
195
+ { "id": "CohereLabs/aya-expanse-8b", "description": "Small Aya Expanse broad knowledge and efficient general reasoning." },
196
+ { "id": "baichuan-inc/Baichuan-M2-32B", "description": "Medical reasoning specialist fine tuned for clinical QA bilingual." },
197
+ { "id": "Qwen/Qwen2.5-VL-72B-Instruct", "description": "Vision language Qwen detailed image interpretation and instructions." },
198
+ { "id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "description": "FP8 Maverick efficient deployment retains top multimodal capability." },
199
+ { "id": "zai-org/GLM-4.1V-9B-Thinking", "description": "Vision language with explicit reasoning strong for its size." },
200
+ { "id": "zai-org/GLM-4.5-Air-FP8", "description": "FP8 efficient GLM Air hybrid reasoning with minimal compute." },
201
+ { "id": "google/gemma-2-2b-it", "description": "Small Gemma instruction tuned safe responsible outputs easy deployment." },
202
+ { "id": "arcee-ai/AFM-4.5B", "description": "Enterprise focused model strong CPU performance compliant and practical." },
203
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "description": "Llama distilled from R1 strong reasoning and structured outputs." },
204
+ { "id": "CohereLabs/aya-vision-8b", "description": "Vision capable Aya handles images and text for basic multimodal." },
205
+ { "id": "NousResearch/Hermes-3-Llama-3.1-405B", "description": "Highly aligned assistant excels at math, code, QA." },
206
+ { "id": "Qwen/Qwen2.5-72B-Instruct", "description": "Accurate detailed instruction model supports tools and long contexts." },
207
+ { "id": "meta-llama/Llama-Guard-4-12B", "description": "Safety guardrail model filters and enforces content policies." },
208
+ { "id": "CohereLabs/command-a-vision-07-2025", "description": "Command model with image input captioning and visual QA." },
209
+ { "id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "description": "NVIDIA tuned Llama optimized throughput for research and production." },
210
+ { "id": "meta-llama/Meta-Llama-3-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and reliability over predecessors." },
211
+ { "id": "NousResearch/Hermes-4-405B", "description": "Frontier Hermes hybrid reasoning excels at math, code, creativity." },
212
+ { "id": "NousResearch/Hermes-2-Pro-Llama-3-8B", "description": "Small Hermes highly steerable maximized helpfulness for basics." },
213
+ { "id": "google/gemma-2-9b-it", "description": "Gemma with improved accuracy and context safe, easy to deploy." },
214
+ { "id": "Sao10K/L3-8B-Stheno-v3.2", "description": "Community Llama variant themed tuning and unique conversational style." },
215
+ { "id": "deepcogito/cogito-v2-preview-llama-109B-MoE", "description": "MoE preview advanced reasoning tests DeepCogito v2 fine tuning." },
216
+ { "id": "CohereLabs/c4ai-command-r-08-2024", "description": "Cohere Command variant instruction following with specialized tuning." },
217
+ { "id": "baidu/ERNIE-4.5-300B-A47B-Base-PT", "description": "Large base model foundation for specialized language systems." },
218
+ { "id": "CohereLabs/aya-expanse-32b", "description": "Aya Expanse large comprehensive knowledge and reasoning capabilities." },
219
+ { "id": "CohereLabs/c4ai-command-a-03-2025", "description": "Updated Command assistant improved accuracy and general usefulness." },
220
+ { "id": "CohereLabs/command-a-reasoning-08-2025", "description": "Command variant optimized for complex multi step logical reasoning." },
221
+ { "id": "alpindale/WizardLM-2-8x22B", "description": "Multi expert WizardLM MoE approach for efficient high quality generation." },
222
+ { "id": "tokyotech-llm/Llama-3.3-Swallow-70B-Instruct-v0.4", "description": "Academic fine tune potential multilingual and domain improvements." },
223
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "description": "Llama distilled from R1 improved reasoning enterprise friendly." },
224
+ { "id": "CohereLabs/c4ai-command-r7b-12-2024", "description": "Small Command variant research or regional adaptation focus." },
225
+ { "id": "Sao10K/L3-70B-Euryale-v2.1", "description": "Creative community instruct model with distinctive persona." },
226
+ { "id": "CohereLabs/aya-vision-32b", "description": "Larger Aya Vision advanced vision language with detailed reasoning." },
227
+ { "id": "meta-llama/Llama-3.1-405B-Instruct", "description": "Massive instruction model very long context excels at complex tasks." },
228
+ { "id": "CohereLabs/c4ai-command-r7b-arabic-02-2025", "description": "Command tuned for Arabic fluent and culturally appropriate outputs." },
229
+ { "id": "Sao10K/L3-8B-Lunaris-v1", "description": "Community Llama creative role play oriented themed persona." },
230
+ { "id": "Qwen/Qwen2.5-Coder-7B", "description": "Small Qwen coder basic programming assistance for low resource environments." },
231
+ { "id": "Qwen/QwQ-32B-Preview", "description": "Preview Qwen experimental features and architecture refinements." },
232
+ { "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "description": "Distilled Qwen mid size strong reasoning and clear steps." },
233
+ { "id": "meta-llama/Llama-3.1-70B-Instruct", "description": "Instruction tuned Llama improved reasoning and factual reliability." },
234
+ { "id": "Qwen/Qwen3-235B-A22B-FP8", "description": "FP8 quantized Qwen flagship efficient access to ultra large capabilities." },
235
+ { "id": "zai-org/GLM-4-32B-0414", "description": "Open licensed GLM matches larger proprietary models on benchmarks." },
236
+ { "id": "SentientAGI/Dobby-Unhinged-Llama-3.3-70B", "description": "Unfiltered candid creative outputs intentionally less restricted behavior." },
237
+ { "id": "marin-community/marin-8b-instruct", "description": "Community tuned assistant helpful conversational everyday tasks." },
238
+ { "id": "deepseek-ai/DeepSeek-Prover-V2-671B", "description": "Specialist for mathematical proofs and formal reasoning workflows." },
239
+ { "id": "NousResearch/Hermes-3-Llama-3.1-70B", "description": "Highly aligned assistant strong complex instruction following." },
240
+ { "id": "Qwen/Qwen2.5-Coder-3B-Instruct", "description": "Tiny coding assistant basic code completions and explanations." },
241
+ { "id": "deepcogito/cogito-v2-preview-llama-70B", "description": "Preview fine tune enhanced reasoning and tool use indications." },
242
+ { "id": "deepcogito/cogito-v2-preview-llama-405B", "description": "Preview at frontier scale tests advanced fine tuning methods." },
243
+ { "id": "deepcogito/cogito-v2-preview-deepseek-671B-MoE", "description": "Experimental blend of DeepCogito and DeepSeek approaches for reasoning." }
244
+ ]
245
+
246
+ infisical:
247
+ enabled: true
248
+ env: "ephemeral-us-east-1"
249
+
250
+ replicas: 1
251
+ autoscaling:
252
+ enabled: false
253
+
254
+ resources:
255
+ requests:
256
+ cpu: 2
257
+ memory: 4Gi
258
+ limits:
259
+ cpu: 4
260
+ memory: 8Gi