amd-gaia 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (800) hide show
  1. amd_gaia-0.14.1.dist-info/METADATA +768 -0
  2. amd_gaia-0.14.1.dist-info/RECORD +800 -0
  3. amd_gaia-0.14.1.dist-info/WHEEL +5 -0
  4. amd_gaia-0.14.1.dist-info/entry_points.txt +5 -0
  5. amd_gaia-0.14.1.dist-info/licenses/LICENSE.md +21 -0
  6. amd_gaia-0.14.1.dist-info/top_level.txt +1 -0
  7. gaia/__init__.py +2 -0
  8. gaia/agents/__init__.py +19 -0
  9. gaia/agents/base/__init__.py +9 -0
  10. gaia/agents/base/agent.py +2072 -0
  11. gaia/agents/base/api_agent.py +120 -0
  12. gaia/agents/base/console.py +1457 -0
  13. gaia/agents/base/mcp_agent.py +86 -0
  14. gaia/agents/base/tools.py +83 -0
  15. gaia/agents/blender/agent.py +556 -0
  16. gaia/agents/blender/agent_simple.py +135 -0
  17. gaia/agents/blender/app.py +211 -0
  18. gaia/agents/blender/app_simple.py +41 -0
  19. gaia/agents/blender/core/__init__.py +16 -0
  20. gaia/agents/blender/core/materials.py +506 -0
  21. gaia/agents/blender/core/objects.py +316 -0
  22. gaia/agents/blender/core/rendering.py +225 -0
  23. gaia/agents/blender/core/scene.py +220 -0
  24. gaia/agents/blender/core/view.py +146 -0
  25. gaia/agents/chat/__init__.py +9 -0
  26. gaia/agents/chat/agent.py +975 -0
  27. gaia/agents/chat/app.py +1058 -0
  28. gaia/agents/chat/session.py +508 -0
  29. gaia/agents/chat/tools/__init__.py +15 -0
  30. gaia/agents/chat/tools/file_tools.py +96 -0
  31. gaia/agents/chat/tools/rag_tools.py +1729 -0
  32. gaia/agents/chat/tools/shell_tools.py +436 -0
  33. gaia/agents/code/__init__.py +7 -0
  34. gaia/agents/code/agent.py +547 -0
  35. gaia/agents/code/app.py +266 -0
  36. gaia/agents/code/models.py +135 -0
  37. gaia/agents/code/orchestration/__init__.py +24 -0
  38. gaia/agents/code/orchestration/checklist_executor.py +1739 -0
  39. gaia/agents/code/orchestration/checklist_generator.py +709 -0
  40. gaia/agents/code/orchestration/factories/__init__.py +9 -0
  41. gaia/agents/code/orchestration/factories/base.py +63 -0
  42. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -0
  43. gaia/agents/code/orchestration/factories/python_factory.py +106 -0
  44. gaia/agents/code/orchestration/orchestrator.py +610 -0
  45. gaia/agents/code/orchestration/project_analyzer.py +391 -0
  46. gaia/agents/code/orchestration/steps/__init__.py +67 -0
  47. gaia/agents/code/orchestration/steps/base.py +188 -0
  48. gaia/agents/code/orchestration/steps/error_handler.py +314 -0
  49. gaia/agents/code/orchestration/steps/nextjs.py +828 -0
  50. gaia/agents/code/orchestration/steps/python.py +307 -0
  51. gaia/agents/code/orchestration/template_catalog.py +463 -0
  52. gaia/agents/code/orchestration/workflows/__init__.py +14 -0
  53. gaia/agents/code/orchestration/workflows/base.py +80 -0
  54. gaia/agents/code/orchestration/workflows/nextjs.py +186 -0
  55. gaia/agents/code/orchestration/workflows/python.py +94 -0
  56. gaia/agents/code/prompts/__init__.py +11 -0
  57. gaia/agents/code/prompts/base_prompt.py +77 -0
  58. gaia/agents/code/prompts/code_patterns.py +1925 -0
  59. gaia/agents/code/prompts/nextjs_prompt.py +40 -0
  60. gaia/agents/code/prompts/python_prompt.py +109 -0
  61. gaia/agents/code/schema_inference.py +365 -0
  62. gaia/agents/code/system_prompt.py +41 -0
  63. gaia/agents/code/tools/__init__.py +42 -0
  64. gaia/agents/code/tools/cli_tools.py +1138 -0
  65. gaia/agents/code/tools/code_formatting.py +319 -0
  66. gaia/agents/code/tools/code_tools.py +769 -0
  67. gaia/agents/code/tools/error_fixing.py +1347 -0
  68. gaia/agents/code/tools/external_tools.py +180 -0
  69. gaia/agents/code/tools/file_io.py +845 -0
  70. gaia/agents/code/tools/prisma_tools.py +190 -0
  71. gaia/agents/code/tools/project_management.py +1016 -0
  72. gaia/agents/code/tools/testing.py +321 -0
  73. gaia/agents/code/tools/typescript_tools.py +122 -0
  74. gaia/agents/code/tools/validation_parsing.py +461 -0
  75. gaia/agents/code/tools/validation_tools.py +803 -0
  76. gaia/agents/code/tools/web_dev_tools.py +1744 -0
  77. gaia/agents/code/validators/__init__.py +16 -0
  78. gaia/agents/code/validators/antipattern_checker.py +241 -0
  79. gaia/agents/code/validators/ast_analyzer.py +197 -0
  80. gaia/agents/code/validators/requirements_validator.py +145 -0
  81. gaia/agents/code/validators/syntax_validator.py +171 -0
  82. gaia/agents/docker/__init__.py +7 -0
  83. gaia/agents/docker/agent.py +642 -0
  84. gaia/agents/jira/__init__.py +11 -0
  85. gaia/agents/jira/agent.py +894 -0
  86. gaia/agents/jira/jql_templates.py +299 -0
  87. gaia/agents/routing/__init__.py +7 -0
  88. gaia/agents/routing/agent.py +512 -0
  89. gaia/agents/routing/system_prompt.py +75 -0
  90. gaia/api/__init__.py +23 -0
  91. gaia/api/agent_registry.py +238 -0
  92. gaia/api/app.py +305 -0
  93. gaia/api/openai_server.py +575 -0
  94. gaia/api/schemas.py +186 -0
  95. gaia/api/sse_handler.py +370 -0
  96. gaia/apps/__init__.py +4 -0
  97. gaia/apps/llm/__init__.py +6 -0
  98. gaia/apps/llm/app.py +169 -0
  99. gaia/apps/summarize/app.py +633 -0
  100. gaia/apps/summarize/html_viewer.py +133 -0
  101. gaia/apps/summarize/pdf_formatter.py +284 -0
  102. gaia/audio/__init__.py +2 -0
  103. gaia/audio/audio_client.py +439 -0
  104. gaia/audio/audio_recorder.py +269 -0
  105. gaia/audio/kokoro_tts.py +599 -0
  106. gaia/audio/whisper_asr.py +432 -0
  107. gaia/chat/__init__.py +16 -0
  108. gaia/chat/app.py +430 -0
  109. gaia/chat/prompts.py +522 -0
  110. gaia/chat/sdk.py +1200 -0
  111. gaia/cli.py +5621 -0
  112. gaia/eval/batch_experiment.py +2332 -0
  113. gaia/eval/claude.py +542 -0
  114. gaia/eval/config.py +37 -0
  115. gaia/eval/email_generator.py +512 -0
  116. gaia/eval/eval.py +3179 -0
  117. gaia/eval/groundtruth.py +1130 -0
  118. gaia/eval/transcript_generator.py +582 -0
  119. gaia/eval/webapp/README.md +168 -0
  120. gaia/eval/webapp/node_modules/.bin/mime +16 -0
  121. gaia/eval/webapp/node_modules/.bin/mime.cmd +17 -0
  122. gaia/eval/webapp/node_modules/.bin/mime.ps1 +28 -0
  123. gaia/eval/webapp/node_modules/.package-lock.json +865 -0
  124. gaia/eval/webapp/node_modules/accepts/HISTORY.md +243 -0
  125. gaia/eval/webapp/node_modules/accepts/LICENSE +23 -0
  126. gaia/eval/webapp/node_modules/accepts/README.md +140 -0
  127. gaia/eval/webapp/node_modules/accepts/index.js +238 -0
  128. gaia/eval/webapp/node_modules/accepts/package.json +47 -0
  129. gaia/eval/webapp/node_modules/array-flatten/LICENSE +21 -0
  130. gaia/eval/webapp/node_modules/array-flatten/README.md +43 -0
  131. gaia/eval/webapp/node_modules/array-flatten/array-flatten.js +64 -0
  132. gaia/eval/webapp/node_modules/array-flatten/package.json +39 -0
  133. gaia/eval/webapp/node_modules/body-parser/HISTORY.md +672 -0
  134. gaia/eval/webapp/node_modules/body-parser/LICENSE +23 -0
  135. gaia/eval/webapp/node_modules/body-parser/README.md +476 -0
  136. gaia/eval/webapp/node_modules/body-parser/SECURITY.md +25 -0
  137. gaia/eval/webapp/node_modules/body-parser/index.js +156 -0
  138. gaia/eval/webapp/node_modules/body-parser/lib/read.js +205 -0
  139. gaia/eval/webapp/node_modules/body-parser/lib/types/json.js +247 -0
  140. gaia/eval/webapp/node_modules/body-parser/lib/types/raw.js +101 -0
  141. gaia/eval/webapp/node_modules/body-parser/lib/types/text.js +121 -0
  142. gaia/eval/webapp/node_modules/body-parser/lib/types/urlencoded.js +307 -0
  143. gaia/eval/webapp/node_modules/body-parser/package.json +56 -0
  144. gaia/eval/webapp/node_modules/bytes/History.md +97 -0
  145. gaia/eval/webapp/node_modules/bytes/LICENSE +23 -0
  146. gaia/eval/webapp/node_modules/bytes/Readme.md +152 -0
  147. gaia/eval/webapp/node_modules/bytes/index.js +170 -0
  148. gaia/eval/webapp/node_modules/bytes/package.json +42 -0
  149. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.eslintrc +17 -0
  150. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.github/FUNDING.yml +12 -0
  151. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.nycrc +9 -0
  152. gaia/eval/webapp/node_modules/call-bind-apply-helpers/CHANGELOG.md +30 -0
  153. gaia/eval/webapp/node_modules/call-bind-apply-helpers/LICENSE +21 -0
  154. gaia/eval/webapp/node_modules/call-bind-apply-helpers/README.md +62 -0
  155. gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.d.ts +1 -0
  156. gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.js +10 -0
  157. gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.d.ts +19 -0
  158. gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.js +10 -0
  159. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.d.ts +1 -0
  160. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.js +4 -0
  161. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.d.ts +1 -0
  162. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.js +4 -0
  163. gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.d.ts +64 -0
  164. gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.js +15 -0
  165. gaia/eval/webapp/node_modules/call-bind-apply-helpers/package.json +85 -0
  166. gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.d.ts +3 -0
  167. gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.js +4 -0
  168. gaia/eval/webapp/node_modules/call-bind-apply-helpers/test/index.js +63 -0
  169. gaia/eval/webapp/node_modules/call-bind-apply-helpers/tsconfig.json +9 -0
  170. gaia/eval/webapp/node_modules/call-bound/.eslintrc +13 -0
  171. gaia/eval/webapp/node_modules/call-bound/.github/FUNDING.yml +12 -0
  172. gaia/eval/webapp/node_modules/call-bound/.nycrc +9 -0
  173. gaia/eval/webapp/node_modules/call-bound/CHANGELOG.md +42 -0
  174. gaia/eval/webapp/node_modules/call-bound/LICENSE +21 -0
  175. gaia/eval/webapp/node_modules/call-bound/README.md +53 -0
  176. gaia/eval/webapp/node_modules/call-bound/index.d.ts +94 -0
  177. gaia/eval/webapp/node_modules/call-bound/index.js +19 -0
  178. gaia/eval/webapp/node_modules/call-bound/package.json +99 -0
  179. gaia/eval/webapp/node_modules/call-bound/test/index.js +61 -0
  180. gaia/eval/webapp/node_modules/call-bound/tsconfig.json +10 -0
  181. gaia/eval/webapp/node_modules/content-disposition/HISTORY.md +60 -0
  182. gaia/eval/webapp/node_modules/content-disposition/LICENSE +22 -0
  183. gaia/eval/webapp/node_modules/content-disposition/README.md +142 -0
  184. gaia/eval/webapp/node_modules/content-disposition/index.js +458 -0
  185. gaia/eval/webapp/node_modules/content-disposition/package.json +44 -0
  186. gaia/eval/webapp/node_modules/content-type/HISTORY.md +29 -0
  187. gaia/eval/webapp/node_modules/content-type/LICENSE +22 -0
  188. gaia/eval/webapp/node_modules/content-type/README.md +94 -0
  189. gaia/eval/webapp/node_modules/content-type/index.js +225 -0
  190. gaia/eval/webapp/node_modules/content-type/package.json +42 -0
  191. gaia/eval/webapp/node_modules/cookie/LICENSE +24 -0
  192. gaia/eval/webapp/node_modules/cookie/README.md +317 -0
  193. gaia/eval/webapp/node_modules/cookie/SECURITY.md +25 -0
  194. gaia/eval/webapp/node_modules/cookie/index.js +334 -0
  195. gaia/eval/webapp/node_modules/cookie/package.json +44 -0
  196. gaia/eval/webapp/node_modules/cookie-signature/.npmignore +4 -0
  197. gaia/eval/webapp/node_modules/cookie-signature/History.md +38 -0
  198. gaia/eval/webapp/node_modules/cookie-signature/Readme.md +42 -0
  199. gaia/eval/webapp/node_modules/cookie-signature/index.js +51 -0
  200. gaia/eval/webapp/node_modules/cookie-signature/package.json +18 -0
  201. gaia/eval/webapp/node_modules/debug/.coveralls.yml +1 -0
  202. gaia/eval/webapp/node_modules/debug/.eslintrc +11 -0
  203. gaia/eval/webapp/node_modules/debug/.npmignore +9 -0
  204. gaia/eval/webapp/node_modules/debug/.travis.yml +14 -0
  205. gaia/eval/webapp/node_modules/debug/CHANGELOG.md +362 -0
  206. gaia/eval/webapp/node_modules/debug/LICENSE +19 -0
  207. gaia/eval/webapp/node_modules/debug/Makefile +50 -0
  208. gaia/eval/webapp/node_modules/debug/README.md +312 -0
  209. gaia/eval/webapp/node_modules/debug/component.json +19 -0
  210. gaia/eval/webapp/node_modules/debug/karma.conf.js +70 -0
  211. gaia/eval/webapp/node_modules/debug/node.js +1 -0
  212. gaia/eval/webapp/node_modules/debug/package.json +49 -0
  213. gaia/eval/webapp/node_modules/debug/src/browser.js +185 -0
  214. gaia/eval/webapp/node_modules/debug/src/debug.js +202 -0
  215. gaia/eval/webapp/node_modules/debug/src/index.js +10 -0
  216. gaia/eval/webapp/node_modules/debug/src/inspector-log.js +15 -0
  217. gaia/eval/webapp/node_modules/debug/src/node.js +248 -0
  218. gaia/eval/webapp/node_modules/depd/History.md +103 -0
  219. gaia/eval/webapp/node_modules/depd/LICENSE +22 -0
  220. gaia/eval/webapp/node_modules/depd/Readme.md +280 -0
  221. gaia/eval/webapp/node_modules/depd/index.js +538 -0
  222. gaia/eval/webapp/node_modules/depd/lib/browser/index.js +77 -0
  223. gaia/eval/webapp/node_modules/depd/package.json +45 -0
  224. gaia/eval/webapp/node_modules/destroy/LICENSE +23 -0
  225. gaia/eval/webapp/node_modules/destroy/README.md +63 -0
  226. gaia/eval/webapp/node_modules/destroy/index.js +209 -0
  227. gaia/eval/webapp/node_modules/destroy/package.json +48 -0
  228. gaia/eval/webapp/node_modules/dunder-proto/.eslintrc +5 -0
  229. gaia/eval/webapp/node_modules/dunder-proto/.github/FUNDING.yml +12 -0
  230. gaia/eval/webapp/node_modules/dunder-proto/.nycrc +13 -0
  231. gaia/eval/webapp/node_modules/dunder-proto/CHANGELOG.md +24 -0
  232. gaia/eval/webapp/node_modules/dunder-proto/LICENSE +21 -0
  233. gaia/eval/webapp/node_modules/dunder-proto/README.md +54 -0
  234. gaia/eval/webapp/node_modules/dunder-proto/get.d.ts +5 -0
  235. gaia/eval/webapp/node_modules/dunder-proto/get.js +30 -0
  236. gaia/eval/webapp/node_modules/dunder-proto/package.json +76 -0
  237. gaia/eval/webapp/node_modules/dunder-proto/set.d.ts +5 -0
  238. gaia/eval/webapp/node_modules/dunder-proto/set.js +35 -0
  239. gaia/eval/webapp/node_modules/dunder-proto/test/get.js +34 -0
  240. gaia/eval/webapp/node_modules/dunder-proto/test/index.js +4 -0
  241. gaia/eval/webapp/node_modules/dunder-proto/test/set.js +50 -0
  242. gaia/eval/webapp/node_modules/dunder-proto/tsconfig.json +9 -0
  243. gaia/eval/webapp/node_modules/ee-first/LICENSE +22 -0
  244. gaia/eval/webapp/node_modules/ee-first/README.md +80 -0
  245. gaia/eval/webapp/node_modules/ee-first/index.js +95 -0
  246. gaia/eval/webapp/node_modules/ee-first/package.json +29 -0
  247. gaia/eval/webapp/node_modules/encodeurl/LICENSE +22 -0
  248. gaia/eval/webapp/node_modules/encodeurl/README.md +109 -0
  249. gaia/eval/webapp/node_modules/encodeurl/index.js +60 -0
  250. gaia/eval/webapp/node_modules/encodeurl/package.json +40 -0
  251. gaia/eval/webapp/node_modules/es-define-property/.eslintrc +13 -0
  252. gaia/eval/webapp/node_modules/es-define-property/.github/FUNDING.yml +12 -0
  253. gaia/eval/webapp/node_modules/es-define-property/.nycrc +9 -0
  254. gaia/eval/webapp/node_modules/es-define-property/CHANGELOG.md +29 -0
  255. gaia/eval/webapp/node_modules/es-define-property/LICENSE +21 -0
  256. gaia/eval/webapp/node_modules/es-define-property/README.md +49 -0
  257. gaia/eval/webapp/node_modules/es-define-property/index.d.ts +3 -0
  258. gaia/eval/webapp/node_modules/es-define-property/index.js +14 -0
  259. gaia/eval/webapp/node_modules/es-define-property/package.json +81 -0
  260. gaia/eval/webapp/node_modules/es-define-property/test/index.js +56 -0
  261. gaia/eval/webapp/node_modules/es-define-property/tsconfig.json +10 -0
  262. gaia/eval/webapp/node_modules/es-errors/.eslintrc +5 -0
  263. gaia/eval/webapp/node_modules/es-errors/.github/FUNDING.yml +12 -0
  264. gaia/eval/webapp/node_modules/es-errors/CHANGELOG.md +40 -0
  265. gaia/eval/webapp/node_modules/es-errors/LICENSE +21 -0
  266. gaia/eval/webapp/node_modules/es-errors/README.md +55 -0
  267. gaia/eval/webapp/node_modules/es-errors/eval.d.ts +3 -0
  268. gaia/eval/webapp/node_modules/es-errors/eval.js +4 -0
  269. gaia/eval/webapp/node_modules/es-errors/index.d.ts +3 -0
  270. gaia/eval/webapp/node_modules/es-errors/index.js +4 -0
  271. gaia/eval/webapp/node_modules/es-errors/package.json +80 -0
  272. gaia/eval/webapp/node_modules/es-errors/range.d.ts +3 -0
  273. gaia/eval/webapp/node_modules/es-errors/range.js +4 -0
  274. gaia/eval/webapp/node_modules/es-errors/ref.d.ts +3 -0
  275. gaia/eval/webapp/node_modules/es-errors/ref.js +4 -0
  276. gaia/eval/webapp/node_modules/es-errors/syntax.d.ts +3 -0
  277. gaia/eval/webapp/node_modules/es-errors/syntax.js +4 -0
  278. gaia/eval/webapp/node_modules/es-errors/test/index.js +19 -0
  279. gaia/eval/webapp/node_modules/es-errors/tsconfig.json +49 -0
  280. gaia/eval/webapp/node_modules/es-errors/type.d.ts +3 -0
  281. gaia/eval/webapp/node_modules/es-errors/type.js +4 -0
  282. gaia/eval/webapp/node_modules/es-errors/uri.d.ts +3 -0
  283. gaia/eval/webapp/node_modules/es-errors/uri.js +4 -0
  284. gaia/eval/webapp/node_modules/es-object-atoms/.eslintrc +16 -0
  285. gaia/eval/webapp/node_modules/es-object-atoms/.github/FUNDING.yml +12 -0
  286. gaia/eval/webapp/node_modules/es-object-atoms/CHANGELOG.md +37 -0
  287. gaia/eval/webapp/node_modules/es-object-atoms/LICENSE +21 -0
  288. gaia/eval/webapp/node_modules/es-object-atoms/README.md +63 -0
  289. gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.d.ts +3 -0
  290. gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.js +11 -0
  291. gaia/eval/webapp/node_modules/es-object-atoms/ToObject.d.ts +7 -0
  292. gaia/eval/webapp/node_modules/es-object-atoms/ToObject.js +10 -0
  293. gaia/eval/webapp/node_modules/es-object-atoms/index.d.ts +3 -0
  294. gaia/eval/webapp/node_modules/es-object-atoms/index.js +4 -0
  295. gaia/eval/webapp/node_modules/es-object-atoms/isObject.d.ts +3 -0
  296. gaia/eval/webapp/node_modules/es-object-atoms/isObject.js +6 -0
  297. gaia/eval/webapp/node_modules/es-object-atoms/package.json +80 -0
  298. gaia/eval/webapp/node_modules/es-object-atoms/test/index.js +38 -0
  299. gaia/eval/webapp/node_modules/es-object-atoms/tsconfig.json +6 -0
  300. gaia/eval/webapp/node_modules/escape-html/LICENSE +24 -0
  301. gaia/eval/webapp/node_modules/escape-html/Readme.md +43 -0
  302. gaia/eval/webapp/node_modules/escape-html/index.js +78 -0
  303. gaia/eval/webapp/node_modules/escape-html/package.json +24 -0
  304. gaia/eval/webapp/node_modules/etag/HISTORY.md +83 -0
  305. gaia/eval/webapp/node_modules/etag/LICENSE +22 -0
  306. gaia/eval/webapp/node_modules/etag/README.md +159 -0
  307. gaia/eval/webapp/node_modules/etag/index.js +131 -0
  308. gaia/eval/webapp/node_modules/etag/package.json +47 -0
  309. gaia/eval/webapp/node_modules/express/History.md +3656 -0
  310. gaia/eval/webapp/node_modules/express/LICENSE +24 -0
  311. gaia/eval/webapp/node_modules/express/Readme.md +260 -0
  312. gaia/eval/webapp/node_modules/express/index.js +11 -0
  313. gaia/eval/webapp/node_modules/express/lib/application.js +661 -0
  314. gaia/eval/webapp/node_modules/express/lib/express.js +116 -0
  315. gaia/eval/webapp/node_modules/express/lib/middleware/init.js +43 -0
  316. gaia/eval/webapp/node_modules/express/lib/middleware/query.js +47 -0
  317. gaia/eval/webapp/node_modules/express/lib/request.js +525 -0
  318. gaia/eval/webapp/node_modules/express/lib/response.js +1179 -0
  319. gaia/eval/webapp/node_modules/express/lib/router/index.js +673 -0
  320. gaia/eval/webapp/node_modules/express/lib/router/layer.js +181 -0
  321. gaia/eval/webapp/node_modules/express/lib/router/route.js +230 -0
  322. gaia/eval/webapp/node_modules/express/lib/utils.js +303 -0
  323. gaia/eval/webapp/node_modules/express/lib/view.js +182 -0
  324. gaia/eval/webapp/node_modules/express/package.json +102 -0
  325. gaia/eval/webapp/node_modules/finalhandler/HISTORY.md +210 -0
  326. gaia/eval/webapp/node_modules/finalhandler/LICENSE +22 -0
  327. gaia/eval/webapp/node_modules/finalhandler/README.md +147 -0
  328. gaia/eval/webapp/node_modules/finalhandler/SECURITY.md +25 -0
  329. gaia/eval/webapp/node_modules/finalhandler/index.js +341 -0
  330. gaia/eval/webapp/node_modules/finalhandler/package.json +47 -0
  331. gaia/eval/webapp/node_modules/forwarded/HISTORY.md +21 -0
  332. gaia/eval/webapp/node_modules/forwarded/LICENSE +22 -0
  333. gaia/eval/webapp/node_modules/forwarded/README.md +57 -0
  334. gaia/eval/webapp/node_modules/forwarded/index.js +90 -0
  335. gaia/eval/webapp/node_modules/forwarded/package.json +45 -0
  336. gaia/eval/webapp/node_modules/fresh/HISTORY.md +70 -0
  337. gaia/eval/webapp/node_modules/fresh/LICENSE +23 -0
  338. gaia/eval/webapp/node_modules/fresh/README.md +119 -0
  339. gaia/eval/webapp/node_modules/fresh/index.js +137 -0
  340. gaia/eval/webapp/node_modules/fresh/package.json +46 -0
  341. gaia/eval/webapp/node_modules/fs/README.md +9 -0
  342. gaia/eval/webapp/node_modules/fs/package.json +20 -0
  343. gaia/eval/webapp/node_modules/function-bind/.eslintrc +21 -0
  344. gaia/eval/webapp/node_modules/function-bind/.github/FUNDING.yml +12 -0
  345. gaia/eval/webapp/node_modules/function-bind/.github/SECURITY.md +3 -0
  346. gaia/eval/webapp/node_modules/function-bind/.nycrc +13 -0
  347. gaia/eval/webapp/node_modules/function-bind/CHANGELOG.md +136 -0
  348. gaia/eval/webapp/node_modules/function-bind/LICENSE +20 -0
  349. gaia/eval/webapp/node_modules/function-bind/README.md +46 -0
  350. gaia/eval/webapp/node_modules/function-bind/implementation.js +84 -0
  351. gaia/eval/webapp/node_modules/function-bind/index.js +5 -0
  352. gaia/eval/webapp/node_modules/function-bind/package.json +87 -0
  353. gaia/eval/webapp/node_modules/function-bind/test/.eslintrc +9 -0
  354. gaia/eval/webapp/node_modules/function-bind/test/index.js +252 -0
  355. gaia/eval/webapp/node_modules/get-intrinsic/.eslintrc +42 -0
  356. gaia/eval/webapp/node_modules/get-intrinsic/.github/FUNDING.yml +12 -0
  357. gaia/eval/webapp/node_modules/get-intrinsic/.nycrc +9 -0
  358. gaia/eval/webapp/node_modules/get-intrinsic/CHANGELOG.md +186 -0
  359. gaia/eval/webapp/node_modules/get-intrinsic/LICENSE +21 -0
  360. gaia/eval/webapp/node_modules/get-intrinsic/README.md +71 -0
  361. gaia/eval/webapp/node_modules/get-intrinsic/index.js +378 -0
  362. gaia/eval/webapp/node_modules/get-intrinsic/package.json +97 -0
  363. gaia/eval/webapp/node_modules/get-intrinsic/test/GetIntrinsic.js +274 -0
  364. gaia/eval/webapp/node_modules/get-proto/.eslintrc +10 -0
  365. gaia/eval/webapp/node_modules/get-proto/.github/FUNDING.yml +12 -0
  366. gaia/eval/webapp/node_modules/get-proto/.nycrc +9 -0
  367. gaia/eval/webapp/node_modules/get-proto/CHANGELOG.md +21 -0
  368. gaia/eval/webapp/node_modules/get-proto/LICENSE +21 -0
  369. gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.d.ts +5 -0
  370. gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.js +6 -0
  371. gaia/eval/webapp/node_modules/get-proto/README.md +50 -0
  372. gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.d.ts +3 -0
  373. gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.js +4 -0
  374. gaia/eval/webapp/node_modules/get-proto/index.d.ts +5 -0
  375. gaia/eval/webapp/node_modules/get-proto/index.js +27 -0
  376. gaia/eval/webapp/node_modules/get-proto/package.json +81 -0
  377. gaia/eval/webapp/node_modules/get-proto/test/index.js +68 -0
  378. gaia/eval/webapp/node_modules/get-proto/tsconfig.json +9 -0
  379. gaia/eval/webapp/node_modules/gopd/.eslintrc +16 -0
  380. gaia/eval/webapp/node_modules/gopd/.github/FUNDING.yml +12 -0
  381. gaia/eval/webapp/node_modules/gopd/CHANGELOG.md +45 -0
  382. gaia/eval/webapp/node_modules/gopd/LICENSE +21 -0
  383. gaia/eval/webapp/node_modules/gopd/README.md +40 -0
  384. gaia/eval/webapp/node_modules/gopd/gOPD.d.ts +1 -0
  385. gaia/eval/webapp/node_modules/gopd/gOPD.js +4 -0
  386. gaia/eval/webapp/node_modules/gopd/index.d.ts +5 -0
  387. gaia/eval/webapp/node_modules/gopd/index.js +15 -0
  388. gaia/eval/webapp/node_modules/gopd/package.json +77 -0
  389. gaia/eval/webapp/node_modules/gopd/test/index.js +36 -0
  390. gaia/eval/webapp/node_modules/gopd/tsconfig.json +9 -0
  391. gaia/eval/webapp/node_modules/has-symbols/.eslintrc +11 -0
  392. gaia/eval/webapp/node_modules/has-symbols/.github/FUNDING.yml +12 -0
  393. gaia/eval/webapp/node_modules/has-symbols/.nycrc +9 -0
  394. gaia/eval/webapp/node_modules/has-symbols/CHANGELOG.md +91 -0
  395. gaia/eval/webapp/node_modules/has-symbols/LICENSE +21 -0
  396. gaia/eval/webapp/node_modules/has-symbols/README.md +46 -0
  397. gaia/eval/webapp/node_modules/has-symbols/index.d.ts +3 -0
  398. gaia/eval/webapp/node_modules/has-symbols/index.js +14 -0
  399. gaia/eval/webapp/node_modules/has-symbols/package.json +111 -0
  400. gaia/eval/webapp/node_modules/has-symbols/shams.d.ts +3 -0
  401. gaia/eval/webapp/node_modules/has-symbols/shams.js +45 -0
  402. gaia/eval/webapp/node_modules/has-symbols/test/index.js +22 -0
  403. gaia/eval/webapp/node_modules/has-symbols/test/shams/core-js.js +29 -0
  404. gaia/eval/webapp/node_modules/has-symbols/test/shams/get-own-property-symbols.js +29 -0
  405. gaia/eval/webapp/node_modules/has-symbols/test/tests.js +58 -0
  406. gaia/eval/webapp/node_modules/has-symbols/tsconfig.json +10 -0
  407. gaia/eval/webapp/node_modules/hasown/.eslintrc +5 -0
  408. gaia/eval/webapp/node_modules/hasown/.github/FUNDING.yml +12 -0
  409. gaia/eval/webapp/node_modules/hasown/.nycrc +13 -0
  410. gaia/eval/webapp/node_modules/hasown/CHANGELOG.md +40 -0
  411. gaia/eval/webapp/node_modules/hasown/LICENSE +21 -0
  412. gaia/eval/webapp/node_modules/hasown/README.md +40 -0
  413. gaia/eval/webapp/node_modules/hasown/index.d.ts +3 -0
  414. gaia/eval/webapp/node_modules/hasown/index.js +8 -0
  415. gaia/eval/webapp/node_modules/hasown/package.json +92 -0
  416. gaia/eval/webapp/node_modules/hasown/tsconfig.json +6 -0
  417. gaia/eval/webapp/node_modules/http-errors/HISTORY.md +180 -0
  418. gaia/eval/webapp/node_modules/http-errors/LICENSE +23 -0
  419. gaia/eval/webapp/node_modules/http-errors/README.md +169 -0
  420. gaia/eval/webapp/node_modules/http-errors/index.js +289 -0
  421. gaia/eval/webapp/node_modules/http-errors/package.json +50 -0
  422. gaia/eval/webapp/node_modules/iconv-lite/Changelog.md +162 -0
  423. gaia/eval/webapp/node_modules/iconv-lite/LICENSE +21 -0
  424. gaia/eval/webapp/node_modules/iconv-lite/README.md +156 -0
  425. gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-codec.js +555 -0
  426. gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-data.js +176 -0
  427. gaia/eval/webapp/node_modules/iconv-lite/encodings/index.js +22 -0
  428. gaia/eval/webapp/node_modules/iconv-lite/encodings/internal.js +188 -0
  429. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-codec.js +72 -0
  430. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data-generated.js +451 -0
  431. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data.js +174 -0
  432. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/big5-added.json +122 -0
  433. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp936.json +264 -0
  434. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp949.json +273 -0
  435. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp950.json +177 -0
  436. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/eucjp.json +182 -0
  437. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gb18030-ranges.json +1 -0
  438. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gbk-added.json +55 -0
  439. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/shiftjis.json +125 -0
  440. gaia/eval/webapp/node_modules/iconv-lite/encodings/utf16.js +177 -0
  441. gaia/eval/webapp/node_modules/iconv-lite/encodings/utf7.js +290 -0
  442. gaia/eval/webapp/node_modules/iconv-lite/lib/bom-handling.js +52 -0
  443. gaia/eval/webapp/node_modules/iconv-lite/lib/extend-node.js +217 -0
  444. gaia/eval/webapp/node_modules/iconv-lite/lib/index.d.ts +24 -0
  445. gaia/eval/webapp/node_modules/iconv-lite/lib/index.js +153 -0
  446. gaia/eval/webapp/node_modules/iconv-lite/lib/streams.js +121 -0
  447. gaia/eval/webapp/node_modules/iconv-lite/package.json +46 -0
  448. gaia/eval/webapp/node_modules/inherits/LICENSE +16 -0
  449. gaia/eval/webapp/node_modules/inherits/README.md +42 -0
  450. gaia/eval/webapp/node_modules/inherits/inherits.js +9 -0
  451. gaia/eval/webapp/node_modules/inherits/inherits_browser.js +27 -0
  452. gaia/eval/webapp/node_modules/inherits/package.json +29 -0
  453. gaia/eval/webapp/node_modules/ipaddr.js/LICENSE +19 -0
  454. gaia/eval/webapp/node_modules/ipaddr.js/README.md +233 -0
  455. gaia/eval/webapp/node_modules/ipaddr.js/ipaddr.min.js +1 -0
  456. gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js +673 -0
  457. gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js.d.ts +68 -0
  458. gaia/eval/webapp/node_modules/ipaddr.js/package.json +35 -0
  459. gaia/eval/webapp/node_modules/math-intrinsics/.eslintrc +16 -0
  460. gaia/eval/webapp/node_modules/math-intrinsics/.github/FUNDING.yml +12 -0
  461. gaia/eval/webapp/node_modules/math-intrinsics/CHANGELOG.md +24 -0
  462. gaia/eval/webapp/node_modules/math-intrinsics/LICENSE +21 -0
  463. gaia/eval/webapp/node_modules/math-intrinsics/README.md +50 -0
  464. gaia/eval/webapp/node_modules/math-intrinsics/abs.d.ts +1 -0
  465. gaia/eval/webapp/node_modules/math-intrinsics/abs.js +4 -0
  466. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.d.ts +3 -0
  467. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.js +4 -0
  468. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.d.ts +3 -0
  469. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.js +5 -0
  470. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.d.ts +3 -0
  471. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.js +5 -0
  472. gaia/eval/webapp/node_modules/math-intrinsics/floor.d.ts +1 -0
  473. gaia/eval/webapp/node_modules/math-intrinsics/floor.js +4 -0
  474. gaia/eval/webapp/node_modules/math-intrinsics/isFinite.d.ts +3 -0
  475. gaia/eval/webapp/node_modules/math-intrinsics/isFinite.js +12 -0
  476. gaia/eval/webapp/node_modules/math-intrinsics/isInteger.d.ts +3 -0
  477. gaia/eval/webapp/node_modules/math-intrinsics/isInteger.js +16 -0
  478. gaia/eval/webapp/node_modules/math-intrinsics/isNaN.d.ts +1 -0
  479. gaia/eval/webapp/node_modules/math-intrinsics/isNaN.js +6 -0
  480. gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.d.ts +3 -0
  481. gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.js +6 -0
  482. gaia/eval/webapp/node_modules/math-intrinsics/max.d.ts +1 -0
  483. gaia/eval/webapp/node_modules/math-intrinsics/max.js +4 -0
  484. gaia/eval/webapp/node_modules/math-intrinsics/min.d.ts +1 -0
  485. gaia/eval/webapp/node_modules/math-intrinsics/min.js +4 -0
  486. gaia/eval/webapp/node_modules/math-intrinsics/mod.d.ts +3 -0
  487. gaia/eval/webapp/node_modules/math-intrinsics/mod.js +9 -0
  488. gaia/eval/webapp/node_modules/math-intrinsics/package.json +86 -0
  489. gaia/eval/webapp/node_modules/math-intrinsics/pow.d.ts +1 -0
  490. gaia/eval/webapp/node_modules/math-intrinsics/pow.js +4 -0
  491. gaia/eval/webapp/node_modules/math-intrinsics/round.d.ts +1 -0
  492. gaia/eval/webapp/node_modules/math-intrinsics/round.js +4 -0
  493. gaia/eval/webapp/node_modules/math-intrinsics/sign.d.ts +3 -0
  494. gaia/eval/webapp/node_modules/math-intrinsics/sign.js +11 -0
  495. gaia/eval/webapp/node_modules/math-intrinsics/test/index.js +192 -0
  496. gaia/eval/webapp/node_modules/math-intrinsics/tsconfig.json +3 -0
  497. gaia/eval/webapp/node_modules/media-typer/HISTORY.md +22 -0
  498. gaia/eval/webapp/node_modules/media-typer/LICENSE +22 -0
  499. gaia/eval/webapp/node_modules/media-typer/README.md +81 -0
  500. gaia/eval/webapp/node_modules/media-typer/index.js +270 -0
  501. gaia/eval/webapp/node_modules/media-typer/package.json +26 -0
  502. gaia/eval/webapp/node_modules/merge-descriptors/HISTORY.md +21 -0
  503. gaia/eval/webapp/node_modules/merge-descriptors/LICENSE +23 -0
  504. gaia/eval/webapp/node_modules/merge-descriptors/README.md +49 -0
  505. gaia/eval/webapp/node_modules/merge-descriptors/index.js +60 -0
  506. gaia/eval/webapp/node_modules/merge-descriptors/package.json +39 -0
  507. gaia/eval/webapp/node_modules/methods/HISTORY.md +29 -0
  508. gaia/eval/webapp/node_modules/methods/LICENSE +24 -0
  509. gaia/eval/webapp/node_modules/methods/README.md +51 -0
  510. gaia/eval/webapp/node_modules/methods/index.js +69 -0
  511. gaia/eval/webapp/node_modules/methods/package.json +36 -0
  512. gaia/eval/webapp/node_modules/mime/.npmignore +0 -0
  513. gaia/eval/webapp/node_modules/mime/CHANGELOG.md +164 -0
  514. gaia/eval/webapp/node_modules/mime/LICENSE +21 -0
  515. gaia/eval/webapp/node_modules/mime/README.md +90 -0
  516. gaia/eval/webapp/node_modules/mime/cli.js +8 -0
  517. gaia/eval/webapp/node_modules/mime/mime.js +108 -0
  518. gaia/eval/webapp/node_modules/mime/package.json +44 -0
  519. gaia/eval/webapp/node_modules/mime/src/build.js +53 -0
  520. gaia/eval/webapp/node_modules/mime/src/test.js +60 -0
  521. gaia/eval/webapp/node_modules/mime/types.json +1 -0
  522. gaia/eval/webapp/node_modules/mime-db/HISTORY.md +507 -0
  523. gaia/eval/webapp/node_modules/mime-db/LICENSE +23 -0
  524. gaia/eval/webapp/node_modules/mime-db/README.md +100 -0
  525. gaia/eval/webapp/node_modules/mime-db/db.json +8519 -0
  526. gaia/eval/webapp/node_modules/mime-db/index.js +12 -0
  527. gaia/eval/webapp/node_modules/mime-db/package.json +60 -0
  528. gaia/eval/webapp/node_modules/mime-types/HISTORY.md +397 -0
  529. gaia/eval/webapp/node_modules/mime-types/LICENSE +23 -0
  530. gaia/eval/webapp/node_modules/mime-types/README.md +113 -0
  531. gaia/eval/webapp/node_modules/mime-types/index.js +188 -0
  532. gaia/eval/webapp/node_modules/mime-types/package.json +44 -0
  533. gaia/eval/webapp/node_modules/ms/index.js +152 -0
  534. gaia/eval/webapp/node_modules/ms/license.md +21 -0
  535. gaia/eval/webapp/node_modules/ms/package.json +37 -0
  536. gaia/eval/webapp/node_modules/ms/readme.md +51 -0
  537. gaia/eval/webapp/node_modules/negotiator/HISTORY.md +108 -0
  538. gaia/eval/webapp/node_modules/negotiator/LICENSE +24 -0
  539. gaia/eval/webapp/node_modules/negotiator/README.md +203 -0
  540. gaia/eval/webapp/node_modules/negotiator/index.js +82 -0
  541. gaia/eval/webapp/node_modules/negotiator/lib/charset.js +169 -0
  542. gaia/eval/webapp/node_modules/negotiator/lib/encoding.js +184 -0
  543. gaia/eval/webapp/node_modules/negotiator/lib/language.js +179 -0
  544. gaia/eval/webapp/node_modules/negotiator/lib/mediaType.js +294 -0
  545. gaia/eval/webapp/node_modules/negotiator/package.json +42 -0
  546. gaia/eval/webapp/node_modules/object-inspect/.eslintrc +53 -0
  547. gaia/eval/webapp/node_modules/object-inspect/.github/FUNDING.yml +12 -0
  548. gaia/eval/webapp/node_modules/object-inspect/.nycrc +13 -0
  549. gaia/eval/webapp/node_modules/object-inspect/CHANGELOG.md +424 -0
  550. gaia/eval/webapp/node_modules/object-inspect/LICENSE +21 -0
  551. gaia/eval/webapp/node_modules/object-inspect/example/all.js +23 -0
  552. gaia/eval/webapp/node_modules/object-inspect/example/circular.js +6 -0
  553. gaia/eval/webapp/node_modules/object-inspect/example/fn.js +5 -0
  554. gaia/eval/webapp/node_modules/object-inspect/example/inspect.js +10 -0
  555. gaia/eval/webapp/node_modules/object-inspect/index.js +544 -0
  556. gaia/eval/webapp/node_modules/object-inspect/package-support.json +20 -0
  557. gaia/eval/webapp/node_modules/object-inspect/package.json +105 -0
  558. gaia/eval/webapp/node_modules/object-inspect/readme.markdown +84 -0
  559. gaia/eval/webapp/node_modules/object-inspect/test/bigint.js +58 -0
  560. gaia/eval/webapp/node_modules/object-inspect/test/browser/dom.js +15 -0
  561. gaia/eval/webapp/node_modules/object-inspect/test/circular.js +16 -0
  562. gaia/eval/webapp/node_modules/object-inspect/test/deep.js +12 -0
  563. gaia/eval/webapp/node_modules/object-inspect/test/element.js +53 -0
  564. gaia/eval/webapp/node_modules/object-inspect/test/err.js +48 -0
  565. gaia/eval/webapp/node_modules/object-inspect/test/fakes.js +29 -0
  566. gaia/eval/webapp/node_modules/object-inspect/test/fn.js +76 -0
  567. gaia/eval/webapp/node_modules/object-inspect/test/global.js +17 -0
  568. gaia/eval/webapp/node_modules/object-inspect/test/has.js +15 -0
  569. gaia/eval/webapp/node_modules/object-inspect/test/holes.js +15 -0
  570. gaia/eval/webapp/node_modules/object-inspect/test/indent-option.js +271 -0
  571. gaia/eval/webapp/node_modules/object-inspect/test/inspect.js +139 -0
  572. gaia/eval/webapp/node_modules/object-inspect/test/lowbyte.js +12 -0
  573. gaia/eval/webapp/node_modules/object-inspect/test/number.js +58 -0
  574. gaia/eval/webapp/node_modules/object-inspect/test/quoteStyle.js +26 -0
  575. gaia/eval/webapp/node_modules/object-inspect/test/toStringTag.js +40 -0
  576. gaia/eval/webapp/node_modules/object-inspect/test/undef.js +12 -0
  577. gaia/eval/webapp/node_modules/object-inspect/test/values.js +261 -0
  578. gaia/eval/webapp/node_modules/object-inspect/test-core-js.js +26 -0
  579. gaia/eval/webapp/node_modules/object-inspect/util.inspect.js +1 -0
  580. gaia/eval/webapp/node_modules/on-finished/HISTORY.md +98 -0
  581. gaia/eval/webapp/node_modules/on-finished/LICENSE +23 -0
  582. gaia/eval/webapp/node_modules/on-finished/README.md +162 -0
  583. gaia/eval/webapp/node_modules/on-finished/index.js +234 -0
  584. gaia/eval/webapp/node_modules/on-finished/package.json +39 -0
  585. gaia/eval/webapp/node_modules/parseurl/HISTORY.md +58 -0
  586. gaia/eval/webapp/node_modules/parseurl/LICENSE +24 -0
  587. gaia/eval/webapp/node_modules/parseurl/README.md +133 -0
  588. gaia/eval/webapp/node_modules/parseurl/index.js +158 -0
  589. gaia/eval/webapp/node_modules/parseurl/package.json +40 -0
  590. gaia/eval/webapp/node_modules/path/.npmignore +1 -0
  591. gaia/eval/webapp/node_modules/path/LICENSE +18 -0
  592. gaia/eval/webapp/node_modules/path/README.md +15 -0
  593. gaia/eval/webapp/node_modules/path/package.json +24 -0
  594. gaia/eval/webapp/node_modules/path/path.js +628 -0
  595. gaia/eval/webapp/node_modules/path-to-regexp/LICENSE +21 -0
  596. gaia/eval/webapp/node_modules/path-to-regexp/Readme.md +35 -0
  597. gaia/eval/webapp/node_modules/path-to-regexp/index.js +156 -0
  598. gaia/eval/webapp/node_modules/path-to-regexp/package.json +30 -0
  599. gaia/eval/webapp/node_modules/process/.eslintrc +21 -0
  600. gaia/eval/webapp/node_modules/process/LICENSE +22 -0
  601. gaia/eval/webapp/node_modules/process/README.md +26 -0
  602. gaia/eval/webapp/node_modules/process/browser.js +184 -0
  603. gaia/eval/webapp/node_modules/process/index.js +2 -0
  604. gaia/eval/webapp/node_modules/process/package.json +27 -0
  605. gaia/eval/webapp/node_modules/process/test.js +199 -0
  606. gaia/eval/webapp/node_modules/proxy-addr/HISTORY.md +161 -0
  607. gaia/eval/webapp/node_modules/proxy-addr/LICENSE +22 -0
  608. gaia/eval/webapp/node_modules/proxy-addr/README.md +139 -0
  609. gaia/eval/webapp/node_modules/proxy-addr/index.js +327 -0
  610. gaia/eval/webapp/node_modules/proxy-addr/package.json +47 -0
  611. gaia/eval/webapp/node_modules/qs/.editorconfig +46 -0
  612. gaia/eval/webapp/node_modules/qs/.eslintrc +38 -0
  613. gaia/eval/webapp/node_modules/qs/.github/FUNDING.yml +12 -0
  614. gaia/eval/webapp/node_modules/qs/.nycrc +13 -0
  615. gaia/eval/webapp/node_modules/qs/CHANGELOG.md +600 -0
  616. gaia/eval/webapp/node_modules/qs/LICENSE.md +29 -0
  617. gaia/eval/webapp/node_modules/qs/README.md +709 -0
  618. gaia/eval/webapp/node_modules/qs/dist/qs.js +90 -0
  619. gaia/eval/webapp/node_modules/qs/lib/formats.js +23 -0
  620. gaia/eval/webapp/node_modules/qs/lib/index.js +11 -0
  621. gaia/eval/webapp/node_modules/qs/lib/parse.js +296 -0
  622. gaia/eval/webapp/node_modules/qs/lib/stringify.js +351 -0
  623. gaia/eval/webapp/node_modules/qs/lib/utils.js +265 -0
  624. gaia/eval/webapp/node_modules/qs/package.json +91 -0
  625. gaia/eval/webapp/node_modules/qs/test/empty-keys-cases.js +267 -0
  626. gaia/eval/webapp/node_modules/qs/test/parse.js +1170 -0
  627. gaia/eval/webapp/node_modules/qs/test/stringify.js +1298 -0
  628. gaia/eval/webapp/node_modules/qs/test/utils.js +136 -0
  629. gaia/eval/webapp/node_modules/range-parser/HISTORY.md +56 -0
  630. gaia/eval/webapp/node_modules/range-parser/LICENSE +23 -0
  631. gaia/eval/webapp/node_modules/range-parser/README.md +84 -0
  632. gaia/eval/webapp/node_modules/range-parser/index.js +162 -0
  633. gaia/eval/webapp/node_modules/range-parser/package.json +44 -0
  634. gaia/eval/webapp/node_modules/raw-body/HISTORY.md +308 -0
  635. gaia/eval/webapp/node_modules/raw-body/LICENSE +22 -0
  636. gaia/eval/webapp/node_modules/raw-body/README.md +223 -0
  637. gaia/eval/webapp/node_modules/raw-body/SECURITY.md +24 -0
  638. gaia/eval/webapp/node_modules/raw-body/index.d.ts +87 -0
  639. gaia/eval/webapp/node_modules/raw-body/index.js +336 -0
  640. gaia/eval/webapp/node_modules/raw-body/package.json +49 -0
  641. gaia/eval/webapp/node_modules/safe-buffer/LICENSE +21 -0
  642. gaia/eval/webapp/node_modules/safe-buffer/README.md +584 -0
  643. gaia/eval/webapp/node_modules/safe-buffer/index.d.ts +187 -0
  644. gaia/eval/webapp/node_modules/safe-buffer/index.js +65 -0
  645. gaia/eval/webapp/node_modules/safe-buffer/package.json +51 -0
  646. gaia/eval/webapp/node_modules/safer-buffer/LICENSE +21 -0
  647. gaia/eval/webapp/node_modules/safer-buffer/Porting-Buffer.md +268 -0
  648. gaia/eval/webapp/node_modules/safer-buffer/Readme.md +156 -0
  649. gaia/eval/webapp/node_modules/safer-buffer/dangerous.js +58 -0
  650. gaia/eval/webapp/node_modules/safer-buffer/package.json +34 -0
  651. gaia/eval/webapp/node_modules/safer-buffer/safer.js +77 -0
  652. gaia/eval/webapp/node_modules/safer-buffer/tests.js +406 -0
  653. gaia/eval/webapp/node_modules/send/HISTORY.md +526 -0
  654. gaia/eval/webapp/node_modules/send/LICENSE +23 -0
  655. gaia/eval/webapp/node_modules/send/README.md +327 -0
  656. gaia/eval/webapp/node_modules/send/SECURITY.md +24 -0
  657. gaia/eval/webapp/node_modules/send/index.js +1142 -0
  658. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/HISTORY.md +14 -0
  659. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/LICENSE +22 -0
  660. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/README.md +128 -0
  661. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/index.js +60 -0
  662. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/package.json +40 -0
  663. gaia/eval/webapp/node_modules/send/node_modules/ms/index.js +162 -0
  664. gaia/eval/webapp/node_modules/send/node_modules/ms/license.md +21 -0
  665. gaia/eval/webapp/node_modules/send/node_modules/ms/package.json +38 -0
  666. gaia/eval/webapp/node_modules/send/node_modules/ms/readme.md +59 -0
  667. gaia/eval/webapp/node_modules/send/package.json +62 -0
  668. gaia/eval/webapp/node_modules/serve-static/HISTORY.md +487 -0
  669. gaia/eval/webapp/node_modules/serve-static/LICENSE +25 -0
  670. gaia/eval/webapp/node_modules/serve-static/README.md +257 -0
  671. gaia/eval/webapp/node_modules/serve-static/index.js +209 -0
  672. gaia/eval/webapp/node_modules/serve-static/package.json +42 -0
  673. gaia/eval/webapp/node_modules/setprototypeof/LICENSE +13 -0
  674. gaia/eval/webapp/node_modules/setprototypeof/README.md +31 -0
  675. gaia/eval/webapp/node_modules/setprototypeof/index.d.ts +2 -0
  676. gaia/eval/webapp/node_modules/setprototypeof/index.js +17 -0
  677. gaia/eval/webapp/node_modules/setprototypeof/package.json +38 -0
  678. gaia/eval/webapp/node_modules/setprototypeof/test/index.js +24 -0
  679. gaia/eval/webapp/node_modules/side-channel/.editorconfig +9 -0
  680. gaia/eval/webapp/node_modules/side-channel/.eslintrc +12 -0
  681. gaia/eval/webapp/node_modules/side-channel/.github/FUNDING.yml +12 -0
  682. gaia/eval/webapp/node_modules/side-channel/.nycrc +13 -0
  683. gaia/eval/webapp/node_modules/side-channel/CHANGELOG.md +110 -0
  684. gaia/eval/webapp/node_modules/side-channel/LICENSE +21 -0
  685. gaia/eval/webapp/node_modules/side-channel/README.md +61 -0
  686. gaia/eval/webapp/node_modules/side-channel/index.d.ts +14 -0
  687. gaia/eval/webapp/node_modules/side-channel/index.js +43 -0
  688. gaia/eval/webapp/node_modules/side-channel/package.json +85 -0
  689. gaia/eval/webapp/node_modules/side-channel/test/index.js +104 -0
  690. gaia/eval/webapp/node_modules/side-channel/tsconfig.json +9 -0
  691. gaia/eval/webapp/node_modules/side-channel-list/.editorconfig +9 -0
  692. gaia/eval/webapp/node_modules/side-channel-list/.eslintrc +11 -0
  693. gaia/eval/webapp/node_modules/side-channel-list/.github/FUNDING.yml +12 -0
  694. gaia/eval/webapp/node_modules/side-channel-list/.nycrc +13 -0
  695. gaia/eval/webapp/node_modules/side-channel-list/CHANGELOG.md +15 -0
  696. gaia/eval/webapp/node_modules/side-channel-list/LICENSE +21 -0
  697. gaia/eval/webapp/node_modules/side-channel-list/README.md +62 -0
  698. gaia/eval/webapp/node_modules/side-channel-list/index.d.ts +13 -0
  699. gaia/eval/webapp/node_modules/side-channel-list/index.js +113 -0
  700. gaia/eval/webapp/node_modules/side-channel-list/list.d.ts +14 -0
  701. gaia/eval/webapp/node_modules/side-channel-list/package.json +77 -0
  702. gaia/eval/webapp/node_modules/side-channel-list/test/index.js +104 -0
  703. gaia/eval/webapp/node_modules/side-channel-list/tsconfig.json +9 -0
  704. gaia/eval/webapp/node_modules/side-channel-map/.editorconfig +9 -0
  705. gaia/eval/webapp/node_modules/side-channel-map/.eslintrc +11 -0
  706. gaia/eval/webapp/node_modules/side-channel-map/.github/FUNDING.yml +12 -0
  707. gaia/eval/webapp/node_modules/side-channel-map/.nycrc +13 -0
  708. gaia/eval/webapp/node_modules/side-channel-map/CHANGELOG.md +22 -0
  709. gaia/eval/webapp/node_modules/side-channel-map/LICENSE +21 -0
  710. gaia/eval/webapp/node_modules/side-channel-map/README.md +62 -0
  711. gaia/eval/webapp/node_modules/side-channel-map/index.d.ts +15 -0
  712. gaia/eval/webapp/node_modules/side-channel-map/index.js +68 -0
  713. gaia/eval/webapp/node_modules/side-channel-map/package.json +80 -0
  714. gaia/eval/webapp/node_modules/side-channel-map/test/index.js +114 -0
  715. gaia/eval/webapp/node_modules/side-channel-map/tsconfig.json +9 -0
  716. gaia/eval/webapp/node_modules/side-channel-weakmap/.editorconfig +9 -0
  717. gaia/eval/webapp/node_modules/side-channel-weakmap/.eslintrc +12 -0
  718. gaia/eval/webapp/node_modules/side-channel-weakmap/.github/FUNDING.yml +12 -0
  719. gaia/eval/webapp/node_modules/side-channel-weakmap/.nycrc +13 -0
  720. gaia/eval/webapp/node_modules/side-channel-weakmap/CHANGELOG.md +28 -0
  721. gaia/eval/webapp/node_modules/side-channel-weakmap/LICENSE +21 -0
  722. gaia/eval/webapp/node_modules/side-channel-weakmap/README.md +62 -0
  723. gaia/eval/webapp/node_modules/side-channel-weakmap/index.d.ts +15 -0
  724. gaia/eval/webapp/node_modules/side-channel-weakmap/index.js +84 -0
  725. gaia/eval/webapp/node_modules/side-channel-weakmap/package.json +87 -0
  726. gaia/eval/webapp/node_modules/side-channel-weakmap/test/index.js +114 -0
  727. gaia/eval/webapp/node_modules/side-channel-weakmap/tsconfig.json +9 -0
  728. gaia/eval/webapp/node_modules/statuses/HISTORY.md +82 -0
  729. gaia/eval/webapp/node_modules/statuses/LICENSE +23 -0
  730. gaia/eval/webapp/node_modules/statuses/README.md +136 -0
  731. gaia/eval/webapp/node_modules/statuses/codes.json +65 -0
  732. gaia/eval/webapp/node_modules/statuses/index.js +146 -0
  733. gaia/eval/webapp/node_modules/statuses/package.json +49 -0
  734. gaia/eval/webapp/node_modules/toidentifier/HISTORY.md +9 -0
  735. gaia/eval/webapp/node_modules/toidentifier/LICENSE +21 -0
  736. gaia/eval/webapp/node_modules/toidentifier/README.md +61 -0
  737. gaia/eval/webapp/node_modules/toidentifier/index.js +32 -0
  738. gaia/eval/webapp/node_modules/toidentifier/package.json +38 -0
  739. gaia/eval/webapp/node_modules/type-is/HISTORY.md +259 -0
  740. gaia/eval/webapp/node_modules/type-is/LICENSE +23 -0
  741. gaia/eval/webapp/node_modules/type-is/README.md +170 -0
  742. gaia/eval/webapp/node_modules/type-is/index.js +266 -0
  743. gaia/eval/webapp/node_modules/type-is/package.json +45 -0
  744. gaia/eval/webapp/node_modules/unpipe/HISTORY.md +4 -0
  745. gaia/eval/webapp/node_modules/unpipe/LICENSE +22 -0
  746. gaia/eval/webapp/node_modules/unpipe/README.md +43 -0
  747. gaia/eval/webapp/node_modules/unpipe/index.js +69 -0
  748. gaia/eval/webapp/node_modules/unpipe/package.json +27 -0
  749. gaia/eval/webapp/node_modules/util/LICENSE +18 -0
  750. gaia/eval/webapp/node_modules/util/README.md +15 -0
  751. gaia/eval/webapp/node_modules/util/node_modules/inherits/LICENSE +16 -0
  752. gaia/eval/webapp/node_modules/util/node_modules/inherits/README.md +42 -0
  753. gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits.js +7 -0
  754. gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits_browser.js +23 -0
  755. gaia/eval/webapp/node_modules/util/node_modules/inherits/package.json +29 -0
  756. gaia/eval/webapp/node_modules/util/package.json +35 -0
  757. gaia/eval/webapp/node_modules/util/support/isBuffer.js +3 -0
  758. gaia/eval/webapp/node_modules/util/support/isBufferBrowser.js +6 -0
  759. gaia/eval/webapp/node_modules/util/util.js +586 -0
  760. gaia/eval/webapp/node_modules/utils-merge/.npmignore +9 -0
  761. gaia/eval/webapp/node_modules/utils-merge/LICENSE +20 -0
  762. gaia/eval/webapp/node_modules/utils-merge/README.md +34 -0
  763. gaia/eval/webapp/node_modules/utils-merge/index.js +23 -0
  764. gaia/eval/webapp/node_modules/utils-merge/package.json +40 -0
  765. gaia/eval/webapp/node_modules/vary/HISTORY.md +39 -0
  766. gaia/eval/webapp/node_modules/vary/LICENSE +22 -0
  767. gaia/eval/webapp/node_modules/vary/README.md +101 -0
  768. gaia/eval/webapp/node_modules/vary/index.js +149 -0
  769. gaia/eval/webapp/node_modules/vary/package.json +43 -0
  770. gaia/eval/webapp/package-lock.json +875 -0
  771. gaia/eval/webapp/package.json +21 -0
  772. gaia/eval/webapp/public/app.js +3403 -0
  773. gaia/eval/webapp/public/index.html +88 -0
  774. gaia/eval/webapp/public/styles.css +3661 -0
  775. gaia/eval/webapp/server.js +416 -0
  776. gaia/eval/webapp/test-setup.js +73 -0
  777. gaia/llm/__init__.py +2 -0
  778. gaia/llm/lemonade_client.py +3083 -0
  779. gaia/llm/lemonade_manager.py +269 -0
  780. gaia/llm/llm_client.py +729 -0
  781. gaia/llm/vlm_client.py +307 -0
  782. gaia/logger.py +189 -0
  783. gaia/mcp/agent_mcp_server.py +245 -0
  784. gaia/mcp/blender_mcp_client.py +138 -0
  785. gaia/mcp/blender_mcp_server.py +648 -0
  786. gaia/mcp/context7_cache.py +332 -0
  787. gaia/mcp/external_services.py +518 -0
  788. gaia/mcp/mcp_bridge.py +550 -0
  789. gaia/mcp/servers/__init__.py +6 -0
  790. gaia/mcp/servers/docker_mcp.py +83 -0
  791. gaia/rag/__init__.py +10 -0
  792. gaia/rag/app.py +293 -0
  793. gaia/rag/demo.py +304 -0
  794. gaia/rag/pdf_utils.py +235 -0
  795. gaia/rag/sdk.py +2194 -0
  796. gaia/security.py +163 -0
  797. gaia/talk/app.py +289 -0
  798. gaia/talk/sdk.py +538 -0
  799. gaia/util.py +46 -0
  800. gaia/version.py +100 -0
@@ -0,0 +1,3083 @@
1
+ #!/usr/bin/env python
2
+ # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
3
+ # SPDX-License-Identifier: MIT
4
+ """
5
+ Lemonade Server Client for GAIA.
6
+
7
+ This module provides a client for interacting with the Lemonade server's
8
+ OpenAI-compatible API and additional functionality.
9
+ """
10
+
11
+ import json
12
+ import logging
13
+ import os
14
+ import re
15
+ import shutil
16
+ import signal
17
+ import socket
18
+ import subprocess
19
+ import sys
20
+ import threading
21
+ import time
22
+ from dataclasses import dataclass, field
23
+ from enum import Enum
24
+ from threading import Event, Thread
25
+ from typing import Any, Callable, Dict, Generator, List, Optional, Union
26
+
27
+ import openai # For exception types
28
+ import psutil
29
+ import requests
30
+ from dotenv import load_dotenv
31
+
32
+ # Import OpenAI client for internal use
33
+ from openai import OpenAI
34
+
35
+ from gaia.logger import get_logger
36
+
37
+ # Load environment variables from .env file
38
+ load_dotenv()
39
+
40
+ # =========================================================================
41
+ # Server Configuration Defaults
42
+ # =========================================================================
43
+ # Default server host and port (can be overridden via LEMONADE_BASE_URL env var)
44
+ DEFAULT_HOST = "localhost"
45
+ DEFAULT_PORT = 8000
46
+ # API version supported by this client
47
+ LEMONADE_API_VERSION = "v1"
48
+ # Default URL includes /api/v1 to match documentation and other clients
49
+ DEFAULT_LEMONADE_URL = (
50
+ f"http://{DEFAULT_HOST}:{DEFAULT_PORT}/api/{LEMONADE_API_VERSION}"
51
+ )
52
+
53
+
54
+ def _get_lemonade_config() -> tuple:
55
+ """
56
+ Get Lemonade host, port, and base_url from environment or defaults.
57
+
58
+ Parses LEMONADE_BASE_URL env var if set, otherwise uses defaults.
59
+ The base_url is expected to include /api/v1 suffix per documentation.
60
+
61
+ Returns:
62
+ Tuple of (host, port, base_url)
63
+ """
64
+ from urllib.parse import urlparse
65
+
66
+ base_url = os.getenv("LEMONADE_BASE_URL", DEFAULT_LEMONADE_URL)
67
+ # Parse the URL to extract host and port for backwards compatibility
68
+ parsed = urlparse(base_url)
69
+ host = parsed.hostname or DEFAULT_HOST
70
+ port = (
71
+ 80
72
+ if (parsed.port is None and host is not None)
73
+ else (parsed.port or DEFAULT_PORT)
74
+ )
75
+ return (host, port, base_url)
76
+
77
+
78
+ # =========================================================================
79
+ # Model Configuration Defaults
80
+ # =========================================================================
81
+ # Default model for text generation - lightweight CPU model for testing
82
+ DEFAULT_MODEL_NAME = "Qwen2.5-0.5B-Instruct-CPU"
83
+ # DEFAULT_MODEL_NAME = "Llama-3.2-3B-Instruct-Hybrid"
84
+
85
+ # =========================================================================
86
+ # Request Configuration Defaults
87
+ # =========================================================================
88
+ # Default timeout in seconds for regular API requests
89
+ # Increased to accommodate long-running coding and evaluation tasks
90
+ DEFAULT_REQUEST_TIMEOUT = 900
91
+ # Default timeout in seconds for model loading operations
92
+ # Increased for large model downloads and loading
93
+ DEFAULT_MODEL_LOAD_TIMEOUT = 1200
94
+
95
+
96
+ # =========================================================================
97
+ # Model Types and Agent Profiles
98
+ # =========================================================================
99
+
100
+
101
+ class ModelType(Enum):
102
+ """Types of models supported by Lemonade"""
103
+
104
+ LLM = "llm" # Large Language Model for chat/reasoning
105
+ EMBEDDING = "embed" # Embedding model for RAG
106
+ VLM = "vlm" # Vision-Language Model for image understanding
107
+ ASR = "asr" # Automatic Speech Recognition
108
+ TTS = "tts" # Text-to-Speech
109
+
110
+
111
+ @dataclass
112
+ class ModelRequirement:
113
+ """Defines a model requirement for an agent"""
114
+
115
+ model_type: ModelType
116
+ model_id: str
117
+ display_name: str
118
+ required: bool = True
119
+ min_ctx_size: int = 4096 # Minimum context size needed
120
+
121
+
122
+ @dataclass
123
+ class AgentProfile:
124
+ """Defines the requirements for an agent"""
125
+
126
+ name: str
127
+ display_name: str
128
+ models: list = field(default_factory=list)
129
+ min_ctx_size: int = 4096
130
+ description: str = ""
131
+
132
+
133
+ @dataclass
134
+ class LemonadeStatus:
135
+ """Status of Lemonade Server"""
136
+
137
+ running: bool = False
138
+ url: str = field(
139
+ default_factory=lambda: os.getenv("LEMONADE_BASE_URL", DEFAULT_LEMONADE_URL)
140
+ )
141
+ context_size: int = 0
142
+ loaded_models: list = field(default_factory=list)
143
+ health_data: dict = field(default_factory=dict)
144
+ error: Optional[str] = None
145
+
146
+
147
+ # Define available models
148
+ MODELS = {
149
+ # LLM Models
150
+ "qwen3-coder-30b": ModelRequirement(
151
+ model_type=ModelType.LLM,
152
+ model_id="Qwen3-Coder-30B-A3B-Instruct-GGUF",
153
+ display_name="Qwen3 Coder 30B",
154
+ min_ctx_size=32768,
155
+ ),
156
+ "qwen2.5-0.5b": ModelRequirement(
157
+ model_type=ModelType.LLM,
158
+ model_id="Qwen2.5-0.5B-Instruct-CPU",
159
+ display_name="Qwen2.5 0.5B (Fast)",
160
+ min_ctx_size=4096,
161
+ ),
162
+ # Embedding Models
163
+ "nomic-embed": ModelRequirement(
164
+ model_type=ModelType.EMBEDDING,
165
+ model_id="nomic-embed-text-v2-moe-GGUF",
166
+ display_name="Nomic Embed Text v2",
167
+ min_ctx_size=2048,
168
+ ),
169
+ # VLM Models
170
+ "qwen2.5-vl-7b": ModelRequirement(
171
+ model_type=ModelType.VLM,
172
+ model_id="Qwen2.5-VL-7B-Instruct-GGUF",
173
+ display_name="Qwen2.5 VL 7B",
174
+ min_ctx_size=8192,
175
+ ),
176
+ }
177
+
178
+ # Define agent profiles with their model requirements
179
+ AGENT_PROFILES = {
180
+ "chat": AgentProfile(
181
+ name="chat",
182
+ display_name="Chat Agent",
183
+ models=["qwen3-coder-30b", "nomic-embed", "qwen2.5-vl-7b"],
184
+ min_ctx_size=32768,
185
+ description="Interactive chat with RAG and vision support",
186
+ ),
187
+ "code": AgentProfile(
188
+ name="code",
189
+ display_name="Code Agent",
190
+ models=["qwen3-coder-30b"],
191
+ min_ctx_size=32768,
192
+ description="Autonomous coding assistant",
193
+ ),
194
+ "talk": AgentProfile(
195
+ name="talk",
196
+ display_name="Talk Agent",
197
+ models=["qwen3-coder-30b"],
198
+ min_ctx_size=32768,
199
+ description="Voice-enabled chat",
200
+ ),
201
+ "rag": AgentProfile(
202
+ name="rag",
203
+ display_name="RAG System",
204
+ models=["qwen3-coder-30b", "nomic-embed", "qwen2.5-vl-7b"],
205
+ min_ctx_size=32768,
206
+ description="Document Q&A with retrieval and vision",
207
+ ),
208
+ "blender": AgentProfile(
209
+ name="blender",
210
+ display_name="Blender Agent",
211
+ models=["qwen3-coder-30b"],
212
+ min_ctx_size=32768,
213
+ description="3D content generation in Blender",
214
+ ),
215
+ "jira": AgentProfile(
216
+ name="jira",
217
+ display_name="Jira Agent",
218
+ models=["qwen3-coder-30b"],
219
+ min_ctx_size=32768,
220
+ description="Jira issue management",
221
+ ),
222
+ "docker": AgentProfile(
223
+ name="docker",
224
+ display_name="Docker Agent",
225
+ models=["qwen3-coder-30b"],
226
+ min_ctx_size=32768,
227
+ description="Docker container management",
228
+ ),
229
+ "vlm": AgentProfile(
230
+ name="vlm",
231
+ display_name="Vision Agent",
232
+ models=["qwen2.5-vl-7b"],
233
+ min_ctx_size=8192,
234
+ description="Image understanding and analysis",
235
+ ),
236
+ "minimal": AgentProfile(
237
+ name="minimal",
238
+ display_name="Minimal (Fast)",
239
+ models=["qwen2.5-0.5b"],
240
+ min_ctx_size=4096,
241
+ description="Fast responses with smaller model",
242
+ ),
243
+ "mcp": AgentProfile(
244
+ name="mcp",
245
+ display_name="MCP Bridge",
246
+ models=["qwen3-coder-30b", "nomic-embed", "qwen2.5-vl-7b"],
247
+ min_ctx_size=32768,
248
+ description="Model Context Protocol bridge server with vision",
249
+ ),
250
+ }
251
+
252
+
253
+ class LemonadeClientError(Exception):
254
+ """Base exception for Lemonade client errors."""
255
+
256
+
257
+ class ModelDownloadCancelledError(LemonadeClientError):
258
+ """Raised when a model download is cancelled by user."""
259
+
260
+
261
+ class InsufficientDiskSpaceError(LemonadeClientError):
262
+ """Raised when there's not enough disk space for model download."""
263
+
264
+
265
+ @dataclass
266
+ class DownloadTask:
267
+ """Represents an ongoing model download."""
268
+
269
+ model_name: str
270
+ size_gb: float = 0.0
271
+ start_time: float = field(default_factory=time.time)
272
+ cancel_event: Event = field(default_factory=Event)
273
+ progress_percent: float = 0.0
274
+
275
+ def cancel(self):
276
+ """Cancel this download."""
277
+ self.cancel_event.set()
278
+
279
+ def is_cancelled(self) -> bool:
280
+ """Check if download was cancelled."""
281
+ return self.cancel_event.is_set()
282
+
283
+ def elapsed_time(self) -> float:
284
+ """Get elapsed time in seconds."""
285
+ return time.time() - self.start_time
286
+
287
+
288
+ def _supports_unicode() -> bool:
289
+ """
290
+ Check if the terminal supports Unicode output.
291
+
292
+ Returns:
293
+ True if UTF-8 encoding is supported, False otherwise
294
+ """
295
+ try:
296
+ # Check stdout encoding
297
+ encoding = sys.stdout.encoding
298
+ if encoding and "utf" in encoding.lower():
299
+ return True
300
+ # Try encoding a test emoji
301
+ "✓".encode(encoding or "utf-8")
302
+ return True
303
+ except (UnicodeEncodeError, AttributeError, LookupError):
304
+ return False
305
+
306
+
307
+ # Cache unicode support check
308
+ _UNICODE_SUPPORTED = _supports_unicode()
309
+
310
+
311
+ def _emoji(unicode_char: str, ascii_fallback: str) -> str:
312
+ """
313
+ Return emoji if terminal supports unicode, otherwise ASCII fallback.
314
+
315
+ Args:
316
+ unicode_char: Unicode emoji character
317
+ ascii_fallback: ASCII fallback string
318
+
319
+ Returns:
320
+ Unicode emoji or ASCII fallback
321
+
322
+ Examples:
323
+ _emoji("✅", "[OK]") # Returns "✅" or "[OK]"
324
+ _emoji("❌", "[X]") # Returns "❌" or "[X]"
325
+ _emoji("📥", "[DL]") # Returns "📥" or "[DL]"
326
+ """
327
+ return unicode_char if _UNICODE_SUPPORTED else ascii_fallback
328
+
329
+
330
+ def kill_process_on_port(port):
331
+ """Kill any process that is using the specified port."""
332
+ for proc in psutil.process_iter(["pid", "name"]):
333
+ try:
334
+ connections = proc.net_connections()
335
+ for conn in connections:
336
+ if conn.laddr.port == port:
337
+ proc_name = proc.name()
338
+ proc_pid = proc.pid
339
+ proc.kill()
340
+ print(
341
+ f"Killed process {proc_name} (PID: {proc_pid}) using port {port}"
342
+ )
343
+ except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
344
+ continue
345
+
346
+
347
+ def _prompt_user_for_download(
348
+ model_name: str, size_gb: float, estimated_minutes: int
349
+ ) -> bool:
350
+ """
351
+ Prompt user for confirmation before downloading a large model.
352
+
353
+ Args:
354
+ model_name: Name of the model to download
355
+ size_gb: Size in gigabytes
356
+ estimated_minutes: Estimated download time in minutes
357
+
358
+ Returns:
359
+ True if user confirms, False otherwise
360
+ """
361
+ # Check if we're in an interactive terminal
362
+ if not sys.stdin.isatty() or not sys.stdout.isatty():
363
+ # Non-interactive environment - auto-approve
364
+ return True
365
+
366
+ print("\n" + "=" * 60)
367
+ print(f"{_emoji('📥', '[DOWNLOAD]')} Model Download Required")
368
+ print("=" * 60)
369
+ print(f"Model: {model_name}")
370
+ print(f"Size: {size_gb:.1f} GB")
371
+ print(f"Estimated time: ~{estimated_minutes} minutes (@ 100Mbps)")
372
+ print("=" * 60)
373
+
374
+ while True:
375
+ response = input("Download this model? [Y/n]: ").strip().lower()
376
+ if response in ("", "y", "yes"):
377
+ return True
378
+ elif response in ("n", "no"):
379
+ return False
380
+ else:
381
+ print("Please enter 'y' or 'n'")
382
+
383
+
384
+ def _check_disk_space(size_gb: float, path: Optional[str] = None) -> bool:
385
+ """
386
+ Check if there's enough disk space for download.
387
+
388
+ Args:
389
+ size_gb: Required space in GB
390
+ path: Path to check. If None (default), checks current working directory.
391
+ This is cross-platform compatible (works on Windows and Unix).
392
+
393
+ Returns:
394
+ True if enough space available
395
+
396
+ Raises:
397
+ InsufficientDiskSpaceError: If not enough space
398
+
399
+ Note:
400
+ The default checks the current working directory's drive/partition.
401
+ Ideally, this should check the actual model storage location, but that
402
+ requires server API support to report the storage path.
403
+ """
404
+ try:
405
+ # Use current working directory if no path specified (cross-platform)
406
+ check_path = path if path is not None else os.getcwd()
407
+ stat = shutil.disk_usage(check_path)
408
+ free_gb = stat.free / (1024**3)
409
+ required_gb = size_gb * 1.5 # Need 50% buffer for extraction/temp files
410
+
411
+ if free_gb < required_gb:
412
+ raise InsufficientDiskSpaceError(
413
+ f"Insufficient disk space: need {required_gb:.1f}GB, "
414
+ f"have {free_gb:.1f}GB free"
415
+ )
416
+ return True
417
+ except InsufficientDiskSpaceError:
418
+ raise
419
+ except Exception as e:
420
+ # If we can't check disk space, log warning but continue
421
+ logger = logging.getLogger(__name__)
422
+ logger.warning(f"Could not check disk space: {e}")
423
+ return True
424
+
425
+
426
+ class LemonadeClient:
427
+ """Client for interacting with the Lemonade server REST API."""
428
+
429
+ def __init__(
430
+ self,
431
+ model: Optional[str] = None,
432
+ host: Optional[str] = None,
433
+ port: Optional[int] = None,
434
+ verbose: bool = True,
435
+ keep_alive: bool = False,
436
+ ):
437
+ """
438
+ Initialize the Lemonade client.
439
+
440
+ Args:
441
+ model: Name of the model to load (optional)
442
+ host: Host address of the Lemonade server (defaults to LEMONADE_BASE_URL env var)
443
+ port: Port number of the Lemonade server (defaults to LEMONADE_BASE_URL env var)
444
+ verbose: If False, reduce logging verbosity during initialization
445
+ keep_alive: If True, don't terminate server in __del__
446
+ """
447
+ # Use provided host/port, or get from env var, or use defaults
448
+ env_host, env_port, env_base_url = _get_lemonade_config()
449
+ self.host = host if host is not None else env_host
450
+ self.port = port if port is not None else env_port
451
+ # If host/port explicitly provided, construct URL; otherwise use env URL directly
452
+ if host is not None or port is not None:
453
+ self.base_url = f"http://{self.host}:{self.port}/api/{LEMONADE_API_VERSION}"
454
+ else:
455
+ self.base_url = env_base_url
456
+ self.model = model
457
+ self.server_process = None
458
+ self.log = get_logger(__name__)
459
+ self.keep_alive = keep_alive
460
+
461
+ # Track active downloads for cancellation support
462
+ self.active_downloads: Dict[str, DownloadTask] = {}
463
+ self._downloads_lock = threading.Lock()
464
+
465
+ # Set logging level based on verbosity
466
+ if not verbose:
467
+ self.log.setLevel(logging.WARNING)
468
+
469
+ self.log.info(f"Initialized Lemonade client for {host}:{port}")
470
+ if model:
471
+ self.log.info(f"Initial model set to: {model}")
472
+
473
+ def launch_server(self, log_level="info", background="none", ctx_size=None):
474
+ """
475
+ Launch the Lemonade server using subprocess.
476
+
477
+ Args:
478
+ log_level: Logging level for the server
479
+ ('critical', 'error', 'warning', 'info', 'debug', 'trace').
480
+ Defaults to 'info'.
481
+ background: How to run the server:
482
+ - "terminal": Launch in a new terminal window
483
+ - "silent": Run in background with output to log file
484
+ - "none": Run in foreground (default)
485
+ ctx_size: Context size for the model (default: None, uses server default).
486
+ For chat/RAG applications, use 32768 or higher.
487
+
488
+ This method follows the approach in test_lemonade_server.py.
489
+ """
490
+ self.log.info("Starting Lemonade server...")
491
+
492
+ # Ensure we kill anything using the port
493
+ kill_process_on_port(self.port)
494
+
495
+ # Build the base command
496
+ base_cmd = ["lemonade-server", "serve"]
497
+ if log_level != "info":
498
+ base_cmd.extend(["--log-level", log_level])
499
+ if ctx_size is not None:
500
+ base_cmd.extend(["--ctx-size", str(ctx_size)])
501
+ self.log.info(f"Context size set to: {ctx_size}")
502
+
503
+ if background == "terminal":
504
+ # Launch in a new terminal window
505
+ cmd = f'start cmd /k "{" ".join(base_cmd)}"'
506
+ self.server_process = subprocess.Popen(cmd, shell=True)
507
+ elif background == "silent":
508
+ # Run in background with subprocess
509
+ log_file = open("lemonade.log", "w", encoding="utf-8")
510
+ self.server_process = subprocess.Popen(
511
+ base_cmd,
512
+ stdout=log_file,
513
+ stderr=log_file,
514
+ text=True,
515
+ bufsize=1,
516
+ shell=True,
517
+ )
518
+ else: # "none" or any other value
519
+ # Run in foreground with real-time output
520
+ self.server_process = subprocess.Popen(
521
+ base_cmd,
522
+ stdout=subprocess.PIPE,
523
+ stderr=subprocess.PIPE,
524
+ text=True,
525
+ bufsize=1,
526
+ shell=True,
527
+ )
528
+
529
+ # Print stdout and stderr in real-time only for foreground mode
530
+ def print_output():
531
+ while True:
532
+ if self.server_process is None:
533
+ break
534
+ try:
535
+ stdout = self.server_process.stdout.readline()
536
+ stderr = self.server_process.stderr.readline()
537
+ if stdout:
538
+ self.log.debug(f"[Server stdout] {stdout.strip()}")
539
+ if stderr:
540
+ self.log.warning(f"[Server stderr] {stderr.strip()}")
541
+ if (
542
+ not stdout
543
+ and not stderr
544
+ and self.server_process is not None
545
+ and self.server_process.poll() is not None
546
+ ):
547
+ break
548
+ except AttributeError:
549
+ # This happens if server_process becomes None
550
+ # while we're executing this function
551
+ break
552
+
553
+ output_thread = Thread(target=print_output, daemon=True)
554
+ output_thread.start()
555
+
556
+ # Wait for the server to start by checking port
557
+ start_time = time.time()
558
+ while True:
559
+ if time.time() - start_time > 60:
560
+ self.log.error("Server failed to start within 60 seconds")
561
+ raise TimeoutError("Server failed to start within 60 seconds")
562
+ try:
563
+ conn = socket.create_connection((self.host, self.port))
564
+ conn.close()
565
+ break
566
+ except socket.error:
567
+ time.sleep(1)
568
+
569
+ # Wait a few other seconds after the port is available
570
+ time.sleep(5)
571
+ self.log.info("Lemonade server started successfully")
572
+
573
+ def terminate_server(self):
574
+ """Terminate the Lemonade server process if it exists."""
575
+ if not self.server_process:
576
+ return
577
+
578
+ try:
579
+ self.log.info("Terminating Lemonade server...")
580
+
581
+ # Handle different process types
582
+ if hasattr(self.server_process, "join"):
583
+ # Handle multiprocessing.Process objects
584
+ self.server_process.terminate()
585
+ self.server_process.join(timeout=5)
586
+ else:
587
+ # For subprocess.Popen
588
+ if sys.platform.startswith("win") and self.server_process.pid:
589
+ # On Windows, use taskkill to ensure process tree is terminated
590
+ os.system(f"taskkill /F /PID {self.server_process.pid} /T")
591
+ elif self.server_process.pid:
592
+ # On Linux/Unix, kill the process group to terminate child processes
593
+ try:
594
+ os.killpg(os.getpgid(self.server_process.pid), signal.SIGTERM)
595
+ # Wait a bit for graceful termination
596
+ try:
597
+ self.server_process.wait(timeout=2)
598
+ except subprocess.TimeoutExpired:
599
+ # Force kill if graceful termination failed
600
+ os.killpg(
601
+ os.getpgid(self.server_process.pid), signal.SIGKILL
602
+ )
603
+ except (OSError, ProcessLookupError):
604
+ # Process or process group doesn't exist, try individual kill
605
+ try:
606
+ self.server_process.kill()
607
+ except ProcessLookupError:
608
+ pass # Process already terminated
609
+ else:
610
+ # Fallback: try to kill normally
611
+ self.server_process.kill()
612
+ # Wait for process to terminate
613
+ try:
614
+ self.server_process.wait(timeout=5)
615
+ except subprocess.TimeoutExpired:
616
+ self.log.warning("Process did not terminate within timeout")
617
+
618
+ # Ensure port is free
619
+ kill_process_on_port(self.port)
620
+
621
+ # Reset reference
622
+ self.server_process = None
623
+ self.log.info("Lemonade server terminated successfully")
624
+ except Exception as e:
625
+ self.log.error(f"Error terminating server process: {e}")
626
+ # Reset reference even on error
627
+ self.server_process = None
628
+
629
+ def __del__(self):
630
+ """Cleanup server process on deletion."""
631
+ # Check if keep_alive attribute exists (might not if __init__ failed early)
632
+ if hasattr(self, "keep_alive") and not self.keep_alive:
633
+ self.terminate_server()
634
+ elif hasattr(self, "server_process") and self.server_process:
635
+ if hasattr(self, "log"):
636
+ self.log.info("Not terminating server because keep_alive=True")
637
+
638
+ def get_model_info(self, model_name: str) -> Dict[str, Any]:
639
+ """
640
+ Get information about a model from the server.
641
+
642
+ Args:
643
+ model_name: Name of the model
644
+
645
+ Returns:
646
+ Dict with model info including size_gb estimate
647
+ """
648
+ try:
649
+ models_response = self.list_models()
650
+ for model in models_response.get("data", []):
651
+ if model.get("id", "").lower() == model_name.lower():
652
+ # Estimate size based on model name if not provided
653
+ size_gb = model.get(
654
+ "size_gb", self._estimate_model_size(model_name)
655
+ )
656
+ return {
657
+ "id": model.get("id"),
658
+ "size_gb": size_gb,
659
+ "downloaded": model.get("downloaded", False),
660
+ }
661
+
662
+ # Model not found in list, provide estimate
663
+ return {
664
+ "id": model_name,
665
+ "size_gb": self._estimate_model_size(model_name),
666
+ "downloaded": False,
667
+ }
668
+ except Exception:
669
+ # If we can't get info, provide conservative estimate
670
+ return {
671
+ "id": model_name,
672
+ "size_gb": self._estimate_model_size(model_name),
673
+ "downloaded": False,
674
+ }
675
+
676
+ def _estimate_model_size(self, model_name: str) -> float:
677
+ """
678
+ Estimate model size in GB based on model name.
679
+
680
+ Args:
681
+ model_name: Name of the model
682
+
683
+ Returns:
684
+ Estimated size in GB
685
+ """
686
+ model_lower = model_name.lower()
687
+
688
+ # Look for billion parameter indicators
689
+ if "70b" in model_lower or "72b" in model_lower:
690
+ return 40.0 # ~40GB for 70B models
691
+ elif "30b" in model_lower or "34b" in model_lower:
692
+ return 20.0 # ~20GB for 30B models
693
+ elif "13b" in model_lower or "14b" in model_lower:
694
+ return 8.0 # ~8GB for 13B models
695
+ elif "7b" in model_lower:
696
+ return 5.0 # ~5GB for 7B models
697
+ elif "3b" in model_lower:
698
+ return 2.0 # ~2GB for 3B models
699
+ elif "1b" in model_lower or "0.5b" in model_lower:
700
+ return 1.0 # ~1GB for small models
701
+ elif "embed" in model_lower:
702
+ return 0.5 # Embedding models are usually small
703
+ else:
704
+ return 10.0 # Conservative default
705
+
706
+ def _estimate_download_time(self, size_gb: float, mbps: int = 100) -> int:
707
+ """
708
+ Estimate download time in minutes.
709
+
710
+ Args:
711
+ size_gb: Size in gigabytes
712
+ mbps: Connection speed in megabits per second
713
+
714
+ Returns:
715
+ Estimated time in minutes
716
+ """
717
+ # Convert GB to megabits: 1 GB = 8000 megabits
718
+ megabits = size_gb * 8000
719
+ # Time in seconds
720
+ seconds = megabits / mbps
721
+ # Convert to minutes and round up
722
+ return int(seconds / 60) + 1
723
+
724
+ def cancel_download(self, model_name: str) -> bool:
725
+ """
726
+ Stop waiting for an ongoing model download.
727
+
728
+ **IMPORTANT:** This only stops the client from waiting for the download.
729
+ The server will continue downloading the model in the background.
730
+ This limitation exists because the server's `/api/v1/pull` endpoint does not
731
+ support cancellation.
732
+
733
+ To truly cancel a download, you would need to:
734
+ 1. Stop the Lemonade server process, or
735
+ 2. Wait for server API to support download cancellation
736
+
737
+ Args:
738
+ model_name: Name of the model being downloaded
739
+
740
+ Returns:
741
+ True if waiting was stopped, False if download not found
742
+
743
+ Example:
744
+ # User initiates download
745
+ client.load_model("large-model", auto_download=True)
746
+
747
+ # In another thread, user wants to "cancel"
748
+ client.cancel_download("large-model")
749
+ # Client stops waiting, but server keeps downloading
750
+
751
+ See Also:
752
+ - get_active_downloads(): List downloads client is waiting for
753
+ - Future: Server will support DELETE /api/v1/downloads/{id}
754
+ """
755
+ with self._downloads_lock:
756
+ if model_name in self.active_downloads:
757
+ task = self.active_downloads[model_name]
758
+ task.cancel()
759
+ self.log.warning(
760
+ f"Stopped waiting for {model_name} download. "
761
+ f"Note: Server continues downloading in background."
762
+ )
763
+ return True
764
+ return False
765
+
766
+ def get_active_downloads(self) -> List[DownloadTask]:
767
+ """Get list of active download tasks."""
768
+ with self._downloads_lock:
769
+ return list(self.active_downloads.values())
770
+
771
+ def _extract_error_info(self, error: Union[str, Dict, Exception]) -> Dict[str, Any]:
772
+ """
773
+ Extract structured error information from various error formats.
774
+
775
+ Lemonade server returns errors in two formats:
776
+ 1. Structured: {"error": {"message": "...", "type": "not_found"}}
777
+ 2. Operation: {"status": "error", "message": "..."}
778
+
779
+ Args:
780
+ error: Error as string, dict, or exception
781
+
782
+ Returns:
783
+ Dict with normalized error info:
784
+ - message: Error message text
785
+ - type: Error type if available (e.g., "not_found")
786
+ - code: Error code if available
787
+ - is_structured: Whether error had type/code field
788
+
789
+ Examples:
790
+ # From exception
791
+ info = self._extract_error_info(LemonadeClientError("Model not found"))
792
+ # Returns: {"message": "Model not found", "type": None, ...}
793
+
794
+ # From structured response
795
+ response = {"error": {"message": "Not found", "type": "not_found"}}
796
+ info = self._extract_error_info(response)
797
+ # Returns: {"message": "Not found", "type": "not_found", ...}
798
+ """
799
+ result = {
800
+ "message": "",
801
+ "type": None,
802
+ "code": None,
803
+ "is_structured": False,
804
+ }
805
+
806
+ # Handle exception objects
807
+ if isinstance(error, Exception):
808
+ error = str(error)
809
+
810
+ # Handle string errors
811
+ if isinstance(error, str):
812
+ result["message"] = error
813
+ return result
814
+
815
+ # Handle dict responses
816
+ if isinstance(error, dict):
817
+ # Format 1: {"error": {"message": "...", "type": "..."}}
818
+ if "error" in error and isinstance(error["error"], dict):
819
+ error_obj = error["error"]
820
+ result["message"] = error_obj.get("message", "")
821
+ result["type"] = error_obj.get("type")
822
+ result["code"] = error_obj.get("code")
823
+ result["is_structured"] = (
824
+ result["type"] is not None or result["code"] is not None
825
+ )
826
+
827
+ # Format 2: {"status": "error", "message": "..."}
828
+ elif error.get("status") == "error":
829
+ result["message"] = error.get("message", "")
830
+
831
+ # Fallback: use the dict as string
832
+ else:
833
+ result["message"] = str(error)
834
+
835
+ return result
836
+
837
+ def _is_model_error(self, error: Union[str, Dict, Exception]) -> bool:
838
+ """
839
+ Check if an error is related to model not being loaded.
840
+
841
+ Uses structured error types when available (e.g., type="not_found"),
842
+ falls back to string matching for unstructured errors.
843
+
844
+ Args:
845
+ error: Error as string, dict, or exception
846
+
847
+ Returns:
848
+ True if this is a model loading error
849
+
850
+ Examples:
851
+ # Structured error (preferred)
852
+ error = {"error": {"message": "...", "type": "not_found"}}
853
+ is_model_error = self._is_model_error(error) # Returns True
854
+
855
+ # String error (fallback)
856
+ is_model_error = self._is_model_error("model not loaded") # Returns True
857
+ """
858
+ # Extract structured error info
859
+ error_info = self._extract_error_info(error)
860
+
861
+ # Check structured error type first (more reliable)
862
+ error_type = error_info.get("type")
863
+ if error_type:
864
+ error_type_lower = error_type.lower()
865
+ if error_type_lower in ["not_found", "model_not_found", "model_not_loaded"]:
866
+ return True
867
+
868
+ # Fallback to string matching for unstructured errors
869
+ error_message = error_info.get("message") or ""
870
+ error_message = error_message.lower()
871
+ return any(
872
+ phrase in error_message
873
+ for phrase in [
874
+ "model not loaded",
875
+ "no model loaded",
876
+ "model not found",
877
+ "model is not loaded",
878
+ "model does not exist",
879
+ "model not available",
880
+ ]
881
+ )
882
+
883
+ def _execute_with_auto_download(
884
+ self, api_call: Callable, model: str, auto_download: bool = True
885
+ ):
886
+ """
887
+ Execute an API call with auto-download retry logic.
888
+
889
+ Args:
890
+ api_call: Function to call (should raise exception if model not loaded)
891
+ model: Model name
892
+ auto_download: Whether to auto-download on model error
893
+
894
+ Returns:
895
+ Result of api_call()
896
+
897
+ Raises:
898
+ ModelDownloadCancelledError: If user cancels download
899
+ InsufficientDiskSpaceError: If not enough disk space
900
+ LemonadeClientError: If download/load fails
901
+ """
902
+ try:
903
+ return api_call()
904
+ except Exception as e:
905
+ # Check if this is a model loading error and auto_download is enabled
906
+ if auto_download and self._is_model_error(e):
907
+ self.log.info(
908
+ f"{_emoji('📥', '[AUTO-DOWNLOAD]')} Model '{model}' not loaded, "
909
+ f"attempting auto-download and load..."
910
+ )
911
+
912
+ # Load model with auto-download (includes prompt, validation, etc.)
913
+ self.load_model(model, timeout=60, auto_download=True)
914
+
915
+ # Retry the API call
916
+ self.log.info(
917
+ f"{_emoji('🔄', '[RETRY]')} Retrying API call with model: {model}"
918
+ )
919
+ return api_call()
920
+
921
+ # Re-raise original error
922
+ raise
923
+
924
+ def chat_completions(
925
+ self,
926
+ model: str,
927
+ messages: List[Dict[str, str]],
928
+ temperature: float = 0.7,
929
+ max_completion_tokens: Optional[int] = None,
930
+ max_tokens: Optional[int] = None,
931
+ stop: Optional[Union[str, List[str]]] = None,
932
+ stream: bool = False,
933
+ timeout: int = DEFAULT_REQUEST_TIMEOUT,
934
+ logprobs: Optional[bool] = None,
935
+ tools: Optional[List[Dict[str, Any]]] = None,
936
+ auto_download: bool = True,
937
+ **kwargs,
938
+ ) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
939
+ """
940
+ Call the chat completions endpoint.
941
+
942
+ If the model is not loaded, it will be automatically downloaded and loaded.
943
+
944
+ Args:
945
+ model: The model to use for completion
946
+ messages: List of conversation messages with 'role' and 'content'
947
+ temperature: Controls randomness (higher = more random)
948
+ max_completion_tokens: Maximum number of output tokens to generate (preferred)
949
+ max_tokens: Maximum number of output tokens to generate
950
+ (deprecated, use max_completion_tokens)
951
+ stop: Sequences where generation should stop
952
+ stream: Whether to stream the response
953
+ timeout: Request timeout in seconds
954
+ logprobs: Whether to include log probabilities
955
+ tools: List of tools the model may call
956
+ auto_download: Automatically download model if not available (default: True)
957
+ **kwargs: Additional parameters to pass to the API
958
+
959
+ Returns:
960
+ For non-streaming: Dict with completion data
961
+ For streaming: Generator yielding completion chunks
962
+
963
+ Example response (non-streaming):
964
+ {
965
+ "id": "0",
966
+ "object": "chat.completion",
967
+ "created": 1742927481,
968
+ "model": "model-name",
969
+ "choices": [{
970
+ "index": 0,
971
+ "message": {
972
+ "role": "assistant",
973
+ "content": "Response text here"
974
+ },
975
+ "finish_reason": "stop"
976
+ }]
977
+ }
978
+ """
979
+ # Handle max_tokens vs max_completion_tokens
980
+ if max_completion_tokens is None and max_tokens is None:
981
+ max_completion_tokens = 1000 # Default value
982
+ elif max_completion_tokens is not None and max_tokens is not None:
983
+ self.log.warning(
984
+ "Both max_completion_tokens and max_tokens provided. Using max_completion_tokens."
985
+ )
986
+ elif max_tokens is not None:
987
+ max_completion_tokens = max_tokens
988
+
989
+ # Use the OpenAI client for streaming if requested
990
+ if stream:
991
+ return self._stream_chat_completions_with_openai(
992
+ model=model,
993
+ messages=messages,
994
+ temperature=temperature,
995
+ max_completion_tokens=max_completion_tokens,
996
+ stop=stop,
997
+ timeout=timeout,
998
+ logprobs=logprobs,
999
+ tools=tools,
1000
+ auto_download=auto_download,
1001
+ **kwargs,
1002
+ )
1003
+
1004
+ # Note: self.base_url already includes /api/v1
1005
+ url = f"{self.base_url}/chat/completions"
1006
+ data = {
1007
+ "model": model,
1008
+ "messages": messages,
1009
+ "temperature": temperature,
1010
+ "max_completion_tokens": max_completion_tokens,
1011
+ "stream": stream,
1012
+ **kwargs,
1013
+ }
1014
+
1015
+ if stop:
1016
+ data["stop"] = stop
1017
+
1018
+ if logprobs:
1019
+ data["logprobs"] = logprobs
1020
+
1021
+ if tools:
1022
+ data["tools"] = tools
1023
+
1024
+ # Helper function for the actual API call
1025
+ def _make_request():
1026
+ self.log.debug(f"Sending chat completion request to model: {model}")
1027
+ response = requests.post(
1028
+ url,
1029
+ json=data,
1030
+ headers={"Content-Type": "application/json"},
1031
+ timeout=timeout,
1032
+ )
1033
+
1034
+ if response.status_code != 200:
1035
+ error_msg = (
1036
+ f"Error in chat completions "
1037
+ f"(status {response.status_code}): {response.text}"
1038
+ )
1039
+ self.log.error(error_msg)
1040
+ raise LemonadeClientError(error_msg)
1041
+
1042
+ result = response.json()
1043
+ if "choices" in result and len(result["choices"]) > 0:
1044
+ token_count = len(
1045
+ result["choices"][0].get("message", {}).get("content", "")
1046
+ )
1047
+ self.log.debug(
1048
+ f"Chat completion successful. "
1049
+ f"Approximate response length: {token_count} characters"
1050
+ )
1051
+
1052
+ return result
1053
+
1054
+ # Execute with auto-download retry logic
1055
+ try:
1056
+ return _make_request()
1057
+ except (requests.exceptions.RequestException, LemonadeClientError):
1058
+ # Use helper to handle auto-download and retry
1059
+ return self._execute_with_auto_download(_make_request, model, auto_download)
1060
+
1061
+ def _stream_chat_completions_with_openai(
1062
+ self,
1063
+ model: str,
1064
+ messages: List[Dict[str, str]],
1065
+ temperature: float = 0.7,
1066
+ max_completion_tokens: int = 1000,
1067
+ stop: Optional[Union[str, List[str]]] = None,
1068
+ timeout: int = DEFAULT_REQUEST_TIMEOUT,
1069
+ logprobs: Optional[bool] = None,
1070
+ tools: Optional[List[Dict[str, Any]]] = None,
1071
+ auto_download: bool = True,
1072
+ **kwargs,
1073
+ ) -> Generator[Dict[str, Any], None, None]:
1074
+ """
1075
+ Stream chat completions using the OpenAI client.
1076
+
1077
+ Returns chunks in the format:
1078
+ {
1079
+ "id": "...",
1080
+ "object": "chat.completion.chunk",
1081
+ "created": 1742927481,
1082
+ "model": "...",
1083
+ "choices": [{
1084
+ "index": 0,
1085
+ "delta": {
1086
+ "role": "assistant",
1087
+ "content": "..."
1088
+ },
1089
+ "finish_reason": null
1090
+ }]
1091
+ }
1092
+ """
1093
+ # Create a client just for this request
1094
+ client = OpenAI(
1095
+ base_url=self.base_url,
1096
+ api_key="lemonade", # required, but unused
1097
+ timeout=timeout,
1098
+ )
1099
+
1100
+ # Create request parameters
1101
+ request_params = {
1102
+ "model": model,
1103
+ "messages": messages,
1104
+ "temperature": temperature,
1105
+ "max_completion_tokens": max_completion_tokens,
1106
+ "stream": True,
1107
+ **kwargs,
1108
+ }
1109
+
1110
+ if stop:
1111
+ request_params["stop"] = stop
1112
+
1113
+ if logprobs:
1114
+ request_params["logprobs"] = logprobs
1115
+
1116
+ if tools:
1117
+ request_params["tools"] = tools
1118
+
1119
+ try:
1120
+ # Use the client to stream responses
1121
+ self.log.debug(f"Starting streaming chat completion with model: {model}")
1122
+ stream = client.chat.completions.create(**request_params)
1123
+
1124
+ # Convert OpenAI client responses to our format
1125
+ tokens_generated = 0
1126
+ for chunk in stream:
1127
+ tokens_generated += 1
1128
+ # Convert to dict format expected by our API
1129
+ yield {
1130
+ "id": chunk.id,
1131
+ "object": "chat.completion.chunk",
1132
+ "created": chunk.created,
1133
+ "model": chunk.model,
1134
+ "choices": [
1135
+ {
1136
+ "index": choice.index,
1137
+ "delta": {
1138
+ "role": (
1139
+ choice.delta.role
1140
+ if hasattr(choice.delta, "role")
1141
+ and choice.delta.role
1142
+ else None
1143
+ ),
1144
+ "content": (
1145
+ choice.delta.content
1146
+ if hasattr(choice.delta, "content")
1147
+ and choice.delta.content
1148
+ else None
1149
+ ),
1150
+ },
1151
+ "finish_reason": choice.finish_reason,
1152
+ }
1153
+ for choice in chunk.choices
1154
+ ],
1155
+ }
1156
+
1157
+ self.log.debug(
1158
+ f"Completed streaming chat completion. Generated {tokens_generated} tokens."
1159
+ )
1160
+
1161
+ except (openai.APIError, openai.APIConnectionError, openai.RateLimitError) as e:
1162
+ error_type = e.__class__.__name__
1163
+ error_msg = str(e)
1164
+
1165
+ # Check if this is a model loading error and auto_download is enabled
1166
+ if auto_download and self._is_model_error(e):
1167
+ self.log.info(
1168
+ f"{_emoji('📥', '[AUTO-DOWNLOAD]')} Model '{model}' not loaded, "
1169
+ f"attempting auto-download and load..."
1170
+ )
1171
+ try:
1172
+ # Load model with auto-download (may take hours for large models)
1173
+ self.load_model(model, timeout=60, auto_download=True)
1174
+
1175
+ # Retry streaming
1176
+ self.log.info(
1177
+ f"{_emoji('🔄', '[RETRY]')} Retrying streaming chat completion "
1178
+ f"with model: {model}"
1179
+ )
1180
+ stream = client.chat.completions.create(**request_params)
1181
+
1182
+ tokens_generated = 0
1183
+ for chunk in stream:
1184
+ tokens_generated += 1
1185
+ yield {
1186
+ "id": chunk.id,
1187
+ "object": "chat.completion.chunk",
1188
+ "created": chunk.created,
1189
+ "model": chunk.model,
1190
+ "choices": [
1191
+ {
1192
+ "index": choice.index,
1193
+ "delta": {
1194
+ "role": (
1195
+ choice.delta.role
1196
+ if hasattr(choice.delta, "role")
1197
+ and choice.delta.role
1198
+ else None
1199
+ ),
1200
+ "content": (
1201
+ choice.delta.content
1202
+ if hasattr(choice.delta, "content")
1203
+ and choice.delta.content
1204
+ else None
1205
+ ),
1206
+ },
1207
+ "finish_reason": choice.finish_reason,
1208
+ }
1209
+ for choice in chunk.choices
1210
+ ],
1211
+ }
1212
+
1213
+ self.log.debug(
1214
+ f"Completed streaming chat completion. Generated {tokens_generated} tokens."
1215
+ )
1216
+ return
1217
+
1218
+ except Exception as load_error:
1219
+ self.log.error(f"Auto-download/load failed: {load_error}")
1220
+ raise LemonadeClientError(
1221
+ f"Model '{model}' not loaded and auto-load failed: {load_error}"
1222
+ )
1223
+
1224
+ # Re-raise original error
1225
+ self.log.error(f"OpenAI {error_type}: {error_msg}")
1226
+ raise LemonadeClientError(f"OpenAI {error_type}: {error_msg}")
1227
+ except Exception as e:
1228
+ self.log.error(f"Error using OpenAI client for streaming: {str(e)}")
1229
+ raise LemonadeClientError(f"Streaming request failed: {str(e)}")
1230
+
1231
+ def completions(
1232
+ self,
1233
+ model: str,
1234
+ prompt: str,
1235
+ temperature: float = 0.7,
1236
+ max_tokens: int = 1000,
1237
+ stop: Optional[Union[str, List[str]]] = None,
1238
+ stream: bool = False,
1239
+ echo: bool = False,
1240
+ timeout: int = DEFAULT_REQUEST_TIMEOUT,
1241
+ logprobs: Optional[bool] = None,
1242
+ auto_download: bool = True,
1243
+ **kwargs,
1244
+ ) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
1245
+ """
1246
+ Call the completions endpoint.
1247
+
1248
+ If the model is not loaded, it will be automatically downloaded and loaded.
1249
+
1250
+ Args:
1251
+ model: The model to use for completion
1252
+ prompt: The prompt to generate a completion for
1253
+ temperature: Controls randomness (higher = more random)
1254
+ max_tokens: Maximum number of tokens to generate (including input tokens)
1255
+ stop: Sequences where generation should stop
1256
+ stream: Whether to stream the response
1257
+ echo: Whether to include the prompt in the response
1258
+ timeout: Request timeout in seconds
1259
+ logprobs: Whether to include log probabilities
1260
+ auto_download: Automatically download model if not available (default: True)
1261
+ **kwargs: Additional parameters to pass to the API
1262
+
1263
+ Returns:
1264
+ For non-streaming: Dict with completion data
1265
+ For streaming: Generator yielding completion chunks
1266
+
1267
+ Example response:
1268
+ {
1269
+ "id": "0",
1270
+ "object": "text_completion",
1271
+ "created": 1742927481,
1272
+ "model": "model-name",
1273
+ "choices": [{
1274
+ "index": 0,
1275
+ "text": "Response text here",
1276
+ "finish_reason": "stop"
1277
+ }]
1278
+ }
1279
+ """
1280
+ # Use the OpenAI client for streaming if requested
1281
+ if stream:
1282
+ return self._stream_completions_with_openai(
1283
+ model=model,
1284
+ prompt=prompt,
1285
+ temperature=temperature,
1286
+ max_tokens=max_tokens,
1287
+ stop=stop,
1288
+ echo=echo,
1289
+ timeout=timeout,
1290
+ logprobs=logprobs,
1291
+ auto_download=auto_download,
1292
+ **kwargs,
1293
+ )
1294
+
1295
+ # Note: self.base_url already includes /api/v1
1296
+ url = f"{self.base_url}/completions"
1297
+ data = {
1298
+ "model": model,
1299
+ "prompt": prompt,
1300
+ "temperature": temperature,
1301
+ "max_tokens": max_tokens,
1302
+ "stream": stream,
1303
+ "echo": echo,
1304
+ **kwargs,
1305
+ }
1306
+
1307
+ if stop:
1308
+ data["stop"] = stop
1309
+
1310
+ if logprobs:
1311
+ data["logprobs"] = logprobs
1312
+
1313
+ # Helper function for the actual API call
1314
+ def _make_request():
1315
+ self.log.debug(f"Sending text completion request to model: {model}")
1316
+ response = requests.post(
1317
+ url,
1318
+ json=data,
1319
+ headers={"Content-Type": "application/json"},
1320
+ timeout=timeout,
1321
+ )
1322
+
1323
+ if response.status_code != 200:
1324
+ error_msg = f"Error in completions (status {response.status_code}): {response.text}"
1325
+ self.log.error(error_msg)
1326
+ raise LemonadeClientError(error_msg)
1327
+
1328
+ result = response.json()
1329
+ if "choices" in result and len(result["choices"]) > 0:
1330
+ token_count = len(result["choices"][0].get("text", ""))
1331
+ self.log.debug(
1332
+ f"Text completion successful. "
1333
+ f"Approximate response length: {token_count} characters"
1334
+ )
1335
+
1336
+ return result
1337
+
1338
+ # Execute with auto-download retry logic
1339
+ try:
1340
+ return _make_request()
1341
+ except (requests.exceptions.RequestException, LemonadeClientError):
1342
+ # Use helper to handle auto-download and retry
1343
+ return self._execute_with_auto_download(_make_request, model, auto_download)
1344
+
1345
+ def _stream_completions_with_openai(
1346
+ self,
1347
+ model: str,
1348
+ prompt: str,
1349
+ temperature: float = 0.7,
1350
+ max_tokens: int = 1000,
1351
+ stop: Optional[Union[str, List[str]]] = None,
1352
+ echo: bool = False,
1353
+ timeout: int = DEFAULT_REQUEST_TIMEOUT,
1354
+ logprobs: Optional[bool] = None,
1355
+ **kwargs,
1356
+ ) -> Generator[Dict[str, Any], None, None]:
1357
+ """
1358
+ Stream completions using the OpenAI client.
1359
+
1360
+ Returns chunks in the format:
1361
+ {
1362
+ "id": "...",
1363
+ "object": "text_completion",
1364
+ "created": 1742927481,
1365
+ "model": "...",
1366
+ "choices": [{
1367
+ "index": 0,
1368
+ "text": "...",
1369
+ "finish_reason": null
1370
+ }]
1371
+ }
1372
+ """
1373
+ client = OpenAI(
1374
+ base_url=self.base_url,
1375
+ api_key="lemonade", # required, but unused
1376
+ timeout=timeout,
1377
+ )
1378
+
1379
+ try:
1380
+ self.log.debug(f"Starting streaming text completion with model: {model}")
1381
+ # Create request parameters
1382
+ request_params = {
1383
+ "model": model,
1384
+ "prompt": prompt,
1385
+ "temperature": temperature,
1386
+ "max_tokens": max_tokens,
1387
+ "stop": stop,
1388
+ "echo": echo,
1389
+ "stream": True,
1390
+ **kwargs,
1391
+ }
1392
+
1393
+ if logprobs is not None:
1394
+ request_params["logprobs"] = logprobs
1395
+
1396
+ response = client.completions.create(**request_params)
1397
+
1398
+ tokens_generated = 0
1399
+ for chunk in response:
1400
+ tokens_generated += 1
1401
+ yield chunk.model_dump()
1402
+
1403
+ self.log.debug(
1404
+ f"Completed streaming text completion. Generated {tokens_generated} tokens."
1405
+ )
1406
+
1407
+ except (openai.APIError, openai.APIConnectionError, openai.RateLimitError) as e:
1408
+ error_type = e.__class__.__name__
1409
+ self.log.error(f"OpenAI {error_type}: {str(e)}")
1410
+ raise LemonadeClientError(f"OpenAI {error_type}: {str(e)}")
1411
+ except Exception as e:
1412
+ self.log.error(f"Error in OpenAI completion streaming: {str(e)}")
1413
+ raise LemonadeClientError(f"Error in OpenAI completion streaming: {str(e)}")
1414
+
1415
+ def embeddings(
1416
+ self,
1417
+ input_texts: Union[str, List[str]],
1418
+ model: Optional[str] = None,
1419
+ timeout: int = DEFAULT_REQUEST_TIMEOUT,
1420
+ ) -> Dict[str, Any]:
1421
+ """
1422
+ Generate embeddings for input text(s) using Lemonade server.
1423
+
1424
+ Args:
1425
+ input_texts: Single string or list of strings to embed
1426
+ model: Embedding model to use (defaults to self.model or nomic-embed-text-v2)
1427
+ timeout: Request timeout in seconds
1428
+
1429
+ Returns:
1430
+ Dict with 'data' containing list of embedding vectors
1431
+ """
1432
+ try:
1433
+ # Ensure input is a list
1434
+ if isinstance(input_texts, str):
1435
+ input_texts = [input_texts]
1436
+
1437
+ # Use specified model or default
1438
+ embedding_model = model or self.model or "nomic-embed-text-v2"
1439
+
1440
+ payload = {"model": embedding_model, "input": input_texts}
1441
+
1442
+ url = f"{self.base_url}/embeddings"
1443
+ response = self._send_request("POST", url, data=payload, timeout=timeout)
1444
+
1445
+ return response
1446
+
1447
+ except Exception as e:
1448
+ self.log.error(f"Error generating embeddings: {str(e)}")
1449
+ raise LemonadeClientError(f"Error generating embeddings: {str(e)}")
1450
+
1451
+ def list_models(self, show_all: bool = False) -> Dict[str, Any]:
1452
+ """
1453
+ List available models from the server.
1454
+
1455
+ Args:
1456
+ show_all: If True, returns full catalog including models not yet downloaded.
1457
+ If False (default), returns only downloaded models.
1458
+ When True, response includes additional fields:
1459
+ - name: Human-readable model name
1460
+ - downloaded: Boolean indicating local availability
1461
+ - labels: Array of descriptive tags (e.g., "hot", "cpu", "hybrid")
1462
+
1463
+ Returns:
1464
+ Dict containing the list of available models
1465
+
1466
+ Examples:
1467
+ # List only downloaded models
1468
+ downloaded = client.list_models()
1469
+
1470
+ # List full catalog for model discovery
1471
+ all_models = client.list_models(show_all=True)
1472
+ available = [m for m in all_models["data"] if not m.get("downloaded")]
1473
+ """
1474
+ url = f"{self.base_url}/models"
1475
+ if show_all:
1476
+ url += "?show_all=true"
1477
+ return self._send_request("get", url)
1478
+
1479
+ def get_model_details(self, model_id: str) -> Dict[str, Any]:
1480
+ """
1481
+ Get detailed information about a specific model.
1482
+
1483
+ Args:
1484
+ model_id: The model identifier (e.g., "Qwen3-Coder-30B-GGUF")
1485
+
1486
+ Returns:
1487
+ Dict containing model metadata:
1488
+ - id: Model identifier
1489
+ - created: Unix timestamp
1490
+ - object: Always "model"
1491
+ - owned_by: Attribution field
1492
+ - checkpoint: HuggingFace checkpoint reference
1493
+ - recipe: Framework/device specification (e.g., "oga-cpu", "oga-hybrid")
1494
+
1495
+ Raises:
1496
+ LemonadeClientError: If model not found (404 error)
1497
+
1498
+ Examples:
1499
+ # Get model checkpoint and recipe
1500
+ model = client.get_model_details("Qwen3-Coder-30B-GGUF")
1501
+ print(f"Checkpoint: {model['checkpoint']}")
1502
+ print(f"Recipe: {model['recipe']}")
1503
+
1504
+ # Verify model exists before loading
1505
+ try:
1506
+ details = client.get_model_details(model_name)
1507
+ client.load_model(model_name)
1508
+ except LemonadeClientError as e:
1509
+ print(f"Model not found: {e}")
1510
+ """
1511
+ url = f"{self.base_url}/models/{model_id}"
1512
+ return self._send_request("get", url)
1513
+
1514
+ def pull_model(
1515
+ self,
1516
+ model_name: str,
1517
+ checkpoint: Optional[str] = None,
1518
+ recipe: Optional[str] = None,
1519
+ reasoning: Optional[bool] = None,
1520
+ mmproj: Optional[str] = None,
1521
+ timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
1522
+ ) -> Dict[str, Any]:
1523
+ """
1524
+ Install a model on the server.
1525
+
1526
+ Args:
1527
+ model_name: Model name to install
1528
+ checkpoint: HuggingFace checkpoint to install (for registering new models)
1529
+ recipe: Lemonade API recipe to load the model with (for registering new models)
1530
+ reasoning: Whether the model is a reasoning model (for registering new models)
1531
+ mmproj: Multimodal Projector file for vision models (for registering new models)
1532
+ timeout: Request timeout in seconds (longer for model installation)
1533
+
1534
+ Returns:
1535
+ Dict containing the status of the pull operation
1536
+
1537
+ Raises:
1538
+ LemonadeClientError: If the model installation fails
1539
+ """
1540
+ self.log.info(f"Installing {model_name}")
1541
+
1542
+ request_data = {"model_name": model_name}
1543
+
1544
+ if checkpoint:
1545
+ request_data["checkpoint"] = checkpoint
1546
+ if recipe:
1547
+ request_data["recipe"] = recipe
1548
+ if reasoning is not None:
1549
+ request_data["reasoning"] = reasoning
1550
+ if mmproj:
1551
+ request_data["mmproj"] = mmproj
1552
+
1553
+ url = f"{self.base_url}/pull"
1554
+ try:
1555
+ response = self._send_request("post", url, request_data, timeout=timeout)
1556
+ self.log.info(f"Installed {model_name} successfully: response={response}")
1557
+ return response
1558
+ except Exception as e:
1559
+ message = f"Failed to install {model_name}: {e}"
1560
+ self.log.error(message)
1561
+ raise LemonadeClientError(message)
1562
+
1563
+ def pull_model_stream(
1564
+ self,
1565
+ model_name: str,
1566
+ checkpoint: Optional[str] = None,
1567
+ recipe: Optional[str] = None,
1568
+ reasoning: Optional[bool] = None,
1569
+ vision: Optional[bool] = None,
1570
+ embedding: Optional[bool] = None,
1571
+ reranking: Optional[bool] = None,
1572
+ mmproj: Optional[str] = None,
1573
+ timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
1574
+ progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
1575
+ ) -> Generator[Dict[str, Any], None, None]:
1576
+ """
1577
+ Install a model on the server with streaming progress updates.
1578
+
1579
+ This method streams Server-Sent Events (SSE) during the download,
1580
+ providing real-time progress information.
1581
+
1582
+ Args:
1583
+ model_name: Model name to install
1584
+ checkpoint: HuggingFace checkpoint to install (for registering new models)
1585
+ recipe: Lemonade API recipe to load the model with (for registering new models)
1586
+ reasoning: Whether the model is a reasoning model (for registering new models)
1587
+ vision: Whether the model has vision capabilities (for registering new models)
1588
+ embedding: Whether the model is an embedding model (for registering new models)
1589
+ reranking: Whether the model is a reranking model (for registering new models)
1590
+ mmproj: Multimodal Projector file for vision models (for registering new models)
1591
+ timeout: Request timeout in seconds (longer for model installation)
1592
+ progress_callback: Optional callback function called with progress dict on each event.
1593
+ Signature: callback(event_type: str, data: dict) -> None
1594
+ event_type is one of: "progress", "complete", "error"
1595
+
1596
+ Yields:
1597
+ Dict containing progress event data with fields:
1598
+ - For "progress" events: file, file_index, total_files, bytes_downloaded,
1599
+ bytes_total, percent
1600
+ - For "complete" events: file_index, total_files, percent (100)
1601
+ - For "error" events: error message
1602
+
1603
+ Raises:
1604
+ LemonadeClientError: If the model installation fails
1605
+
1606
+ Example:
1607
+ # Using as generator
1608
+ for event in client.pull_model_stream("Qwen3-0.6B-GGUF"):
1609
+ if event.get("event") == "progress":
1610
+ print(f"Downloading: {event['percent']}%")
1611
+
1612
+ # Using with callback
1613
+ def on_progress(event_type, data):
1614
+ if event_type == "progress":
1615
+ print(f"{data['file']}: {data['percent']}%")
1616
+
1617
+ for _ in client.pull_model_stream("Qwen3-0.6B-GGUF", progress_callback=on_progress):
1618
+ pass
1619
+ """
1620
+ self.log.info(f"Installing {model_name} with streaming progress")
1621
+
1622
+ request_data = {"model_name": model_name, "stream": True}
1623
+
1624
+ if checkpoint:
1625
+ request_data["checkpoint"] = checkpoint
1626
+ if recipe:
1627
+ request_data["recipe"] = recipe
1628
+ if reasoning is not None:
1629
+ request_data["reasoning"] = reasoning
1630
+ if vision is not None:
1631
+ request_data["vision"] = vision
1632
+ if embedding is not None:
1633
+ request_data["embedding"] = embedding
1634
+ if reranking is not None:
1635
+ request_data["reranking"] = reranking
1636
+ if mmproj:
1637
+ request_data["mmproj"] = mmproj
1638
+
1639
+ url = f"{self.base_url}/pull"
1640
+
1641
+ try:
1642
+ response = requests.post(
1643
+ url,
1644
+ json=request_data,
1645
+ headers={"Content-Type": "application/json"},
1646
+ timeout=timeout,
1647
+ stream=True,
1648
+ )
1649
+
1650
+ if response.status_code != 200:
1651
+ error_msg = f"Error pulling model (status {response.status_code}): {response.text}"
1652
+ self.log.error(error_msg)
1653
+ raise LemonadeClientError(error_msg)
1654
+
1655
+ # Parse SSE stream
1656
+ event_type = None
1657
+ received_complete = False
1658
+ try:
1659
+ for line in response.iter_lines(decode_unicode=True):
1660
+ if not line:
1661
+ continue
1662
+
1663
+ if line.startswith("event:"):
1664
+ event_type = line[6:].strip()
1665
+ elif line.startswith("data:"):
1666
+ data_str = line[5:].strip()
1667
+ try:
1668
+ data = json.loads(data_str)
1669
+ data["event"] = event_type or "progress"
1670
+
1671
+ # Call the progress callback if provided
1672
+ if progress_callback:
1673
+ progress_callback(event_type or "progress", data)
1674
+
1675
+ yield data
1676
+
1677
+ # Track complete event
1678
+ if event_type == "complete":
1679
+ received_complete = True
1680
+
1681
+ # Check for error event
1682
+ if event_type == "error":
1683
+ error_msg = data.get(
1684
+ "error", "Unknown error during model pull"
1685
+ )
1686
+ raise LemonadeClientError(error_msg)
1687
+
1688
+ except json.JSONDecodeError:
1689
+ self.log.warning(f"Failed to parse SSE data: {data_str}")
1690
+ continue
1691
+ except requests.exceptions.ChunkedEncodingError:
1692
+ # Connection closed by server - this is normal after complete event
1693
+ if not received_complete:
1694
+ raise
1695
+
1696
+ self.log.info(f"Installed {model_name} successfully via streaming")
1697
+
1698
+ except requests.exceptions.RequestException as e:
1699
+ message = f"Failed to install {model_name}: {e}"
1700
+ self.log.error(message)
1701
+ raise LemonadeClientError(message)
1702
+
1703
+ def delete_model(
1704
+ self,
1705
+ model_name: str,
1706
+ timeout: int = DEFAULT_REQUEST_TIMEOUT,
1707
+ ) -> Dict[str, Any]:
1708
+ """
1709
+ Delete a model from the server.
1710
+
1711
+ Args:
1712
+ model_name: Model name to delete
1713
+ timeout: Request timeout in seconds
1714
+
1715
+ Returns:
1716
+ Dict containing the status of the delete operation
1717
+
1718
+ Raises:
1719
+ LemonadeClientError: If the model deletion fails
1720
+ """
1721
+ self.log.info(f"Deleting {model_name}")
1722
+
1723
+ request_data = {"model_name": model_name}
1724
+
1725
+ url = f"{self.base_url}/delete"
1726
+ try:
1727
+ response = self._send_request("post", url, request_data, timeout=timeout)
1728
+ self.log.info(f"Deleted {model_name} successfully: response={response}")
1729
+ return response
1730
+ except Exception as e:
1731
+ message = f"Failed to delete {model_name}: {e}"
1732
+ self.log.error(message)
1733
+ raise LemonadeClientError(message)
1734
+
1735
+ def ensure_model_downloaded(
1736
+ self,
1737
+ model_name: str,
1738
+ show_progress: bool = True,
1739
+ timeout: int = 7200,
1740
+ ) -> bool:
1741
+ """
1742
+ Ensure a model is downloaded, downloading if necessary.
1743
+
1744
+ This method checks if the model is available on the server,
1745
+ and if not, downloads it via the /api/v1/pull endpoint.
1746
+
1747
+ Large models can be 100GB+ and take hours to download on typical connections.
1748
+
1749
+ Args:
1750
+ model_name: Model name to ensure is downloaded
1751
+ show_progress: Show progress messages during download
1752
+ timeout: Download timeout in seconds (default: 7200 = 2 hours)
1753
+
1754
+ Returns:
1755
+ True if model is available (was already downloaded or successfully downloaded),
1756
+ False if download failed
1757
+
1758
+ Example:
1759
+ client = LemonadeClient()
1760
+ if client.ensure_model_downloaded("Qwen3-0.6B-GGUF"):
1761
+ client.load_model("Qwen3-0.6B-GGUF")
1762
+ """
1763
+ try:
1764
+ # Check if model is already downloaded
1765
+ models_response = self.list_models()
1766
+ for model in models_response.get("data", []):
1767
+ if model.get("id") == model_name:
1768
+ if model.get("downloaded", False):
1769
+ if show_progress:
1770
+ self.log.info(
1771
+ f"{_emoji('✅', '[OK]')} Model already downloaded: {model_name}"
1772
+ )
1773
+ return True
1774
+
1775
+ # Model not downloaded - attempt download
1776
+ if show_progress:
1777
+ self.log.info(
1778
+ f"{_emoji('📥', '[DOWNLOADING]')} Downloading model: {model_name}"
1779
+ )
1780
+ self.log.info(
1781
+ " This may take minutes to hours depending on model size..."
1782
+ )
1783
+
1784
+ # Download via pull_model
1785
+ self.pull_model(model_name, timeout=timeout)
1786
+
1787
+ # Use the centralized download waiter
1788
+ return self._wait_for_model_download(
1789
+ model_name, timeout=timeout, show_progress=show_progress
1790
+ )
1791
+
1792
+ except Exception as e:
1793
+ self.log.error(f"Failed to ensure model downloaded: {e}")
1794
+ return False
1795
+
1796
+ def responses(
1797
+ self,
1798
+ model: str,
1799
+ input: Union[str, List[Dict[str, str]]],
1800
+ temperature: float = 0.7,
1801
+ max_output_tokens: Optional[int] = None,
1802
+ stream: bool = False,
1803
+ timeout: int = DEFAULT_REQUEST_TIMEOUT,
1804
+ **kwargs,
1805
+ ) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
1806
+ """
1807
+ Call the responses endpoint.
1808
+
1809
+ Args:
1810
+ model: The model to use for the response
1811
+ input: A string or list of dictionaries input for the model to respond to
1812
+ temperature: Controls randomness (higher = more random)
1813
+ max_output_tokens: Maximum number of output tokens to generate
1814
+ stream: Whether to stream the response
1815
+ timeout: Request timeout in seconds
1816
+ **kwargs: Additional parameters to pass to the API
1817
+
1818
+ Returns:
1819
+ For non-streaming: Dict with response data
1820
+ For streaming: Generator yielding response events
1821
+
1822
+ Example response (non-streaming):
1823
+ {
1824
+ "id": "0",
1825
+ "created_at": 1746225832.0,
1826
+ "model": "model-name",
1827
+ "object": "response",
1828
+ "output": [{
1829
+ "id": "0",
1830
+ "content": [{
1831
+ "annotations": [],
1832
+ "text": "Response text here"
1833
+ }]
1834
+ }]
1835
+ }
1836
+ """
1837
+ # Note: self.base_url already includes /api/v1
1838
+ url = f"{self.base_url}/responses"
1839
+ data = {
1840
+ "model": model,
1841
+ "input": input,
1842
+ "temperature": temperature,
1843
+ "stream": stream,
1844
+ **kwargs,
1845
+ }
1846
+
1847
+ if max_output_tokens:
1848
+ data["max_output_tokens"] = max_output_tokens
1849
+
1850
+ try:
1851
+ self.log.debug(f"Sending responses request to model: {model}")
1852
+ response = requests.post(
1853
+ url,
1854
+ json=data,
1855
+ headers={"Content-Type": "application/json"},
1856
+ timeout=timeout,
1857
+ )
1858
+
1859
+ if response.status_code != 200:
1860
+ error_msg = f"Error in responses (status {response.status_code}): {response.text}"
1861
+ self.log.error(error_msg)
1862
+ raise LemonadeClientError(error_msg)
1863
+
1864
+ if stream:
1865
+ # For streaming responses, we need to handle server-sent events
1866
+ # This is a simplified implementation - full SSE parsing might be needed
1867
+ return self._parse_sse_stream(response)
1868
+ else:
1869
+ result = response.json()
1870
+ if "output" in result and len(result["output"]) > 0:
1871
+ content = result["output"][0].get("content", [])
1872
+ if content and len(content) > 0:
1873
+ text_length = len(content[0].get("text", ""))
1874
+ self.log.debug(
1875
+ f"Response successful. "
1876
+ f"Approximate response length: {text_length} characters"
1877
+ )
1878
+ return result
1879
+
1880
+ except requests.exceptions.RequestException as e:
1881
+ self.log.error(f"Request failed: {str(e)}")
1882
+ raise LemonadeClientError(f"Request failed: {str(e)}")
1883
+
1884
+ def _parse_sse_stream(self, response) -> Generator[Dict[str, Any], None, None]:
1885
+ """
1886
+ Parse server-sent events from streaming responses endpoint.
1887
+
1888
+ This is a simplified implementation that may need enhancement
1889
+ for full SSE specification compliance.
1890
+ """
1891
+ for line in response.iter_lines(decode_unicode=True):
1892
+ if line.startswith("data: "):
1893
+ try:
1894
+ data = line[6:] # Remove "data: " prefix
1895
+ if data.strip() == "[DONE]":
1896
+ break
1897
+ yield json.loads(data)
1898
+ except json.JSONDecodeError:
1899
+ continue
1900
+
1901
+ def _wait_for_model_download(
1902
+ self,
1903
+ model_name: str,
1904
+ timeout: int = 7200,
1905
+ show_progress: bool = True,
1906
+ download_task: Optional[DownloadTask] = None,
1907
+ ) -> bool:
1908
+ """
1909
+ Wait for a model download to complete by polling the models endpoint.
1910
+
1911
+ Large models (up to 100GB) can take hours to download on typical connections:
1912
+ - 100GB @ 100Mbps = ~2-3 hours
1913
+ - 100GB @ 1Gbps = ~15-20 minutes
1914
+
1915
+ Args:
1916
+ model_name: Model name to wait for
1917
+ timeout: Maximum time to wait in seconds (default: 7200 = 2 hours)
1918
+ show_progress: Show progress messages
1919
+ download_task: Optional DownloadTask for cancellation support
1920
+
1921
+ Returns:
1922
+ True if model download completed, False if timeout or error
1923
+
1924
+ Raises:
1925
+ ModelDownloadCancelledError: If download is cancelled
1926
+ """
1927
+ poll_interval = 30 # Check every 30 seconds for large downloads
1928
+ elapsed = 0
1929
+
1930
+ while elapsed < timeout:
1931
+ # Check for cancellation
1932
+ if download_task and download_task.is_cancelled():
1933
+ if show_progress:
1934
+ self.log.warning(
1935
+ f"{_emoji('🚫', '[CANCELLED]')} Download cancelled for {model_name}"
1936
+ )
1937
+ raise ModelDownloadCancelledError(f"Download cancelled: {model_name}")
1938
+
1939
+ time.sleep(poll_interval)
1940
+ elapsed += poll_interval
1941
+
1942
+ try:
1943
+ # Check if model is now downloaded
1944
+ models_response = self.list_models()
1945
+ for model in models_response.get("data", []):
1946
+ if model.get("id") == model_name:
1947
+ if model.get("downloaded", False):
1948
+ if show_progress:
1949
+ minutes = elapsed // 60
1950
+ seconds = elapsed % 60
1951
+ self.log.info(
1952
+ f"{_emoji('✅', '[OK]')} Model downloaded successfully: "
1953
+ f"{model_name} ({minutes}m {seconds}s)"
1954
+ )
1955
+ return True
1956
+
1957
+ if show_progress and elapsed % 60 == 0: # Show every 60s
1958
+ minutes = elapsed // 60
1959
+ self.log.info(
1960
+ f" {_emoji('⏳', '[WAIT]')} Downloading... {minutes} minutes elapsed"
1961
+ )
1962
+ except ModelDownloadCancelledError:
1963
+ raise # Re-raise cancellation
1964
+ except Exception as e:
1965
+ self.log.warning(f"Error checking download status: {e}")
1966
+
1967
+ # Timeout reached
1968
+ if show_progress:
1969
+ minutes = timeout // 60
1970
+ self.log.warning(
1971
+ f"{_emoji('⏰', '[TIMEOUT]')} Download timeout ({minutes} minutes) "
1972
+ f"reached for {model_name}"
1973
+ )
1974
+ return False
1975
+
1976
+ def load_model(
1977
+ self,
1978
+ model_name: str,
1979
+ timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
1980
+ auto_download: bool = False,
1981
+ download_timeout: int = 7200,
1982
+ llamacpp_args: Optional[str] = None,
1983
+ ) -> Dict[str, Any]:
1984
+ """
1985
+ Load a model on the server.
1986
+
1987
+ If auto_download is enabled and the model is not available:
1988
+ 1. Prompts user for confirmation (with size and ETA)
1989
+ 2. Validates disk space
1990
+ 3. Downloads model with cancellation support
1991
+ 4. Retries loading
1992
+
1993
+ Args:
1994
+ model_name: Model name to load
1995
+ timeout: Request timeout in seconds (longer for model loading)
1996
+ auto_download: If True, automatically download the model if not available
1997
+ download_timeout: Timeout for model download in seconds (default: 7200 = 2 hours)
1998
+ Large models can be 100GB+ and take hours to download
1999
+ llamacpp_args: Optional llama.cpp arguments (e.g., "--ubatch-size 2048").
2000
+ Used to configure model loading parameters like batch sizes.
2001
+
2002
+ Returns:
2003
+ Dict containing the status of the load operation
2004
+
2005
+ Raises:
2006
+ ModelDownloadCancelledError: If user declines download or cancels
2007
+ InsufficientDiskSpaceError: If not enough disk space
2008
+ LemonadeClientError: If model loading fails
2009
+ """
2010
+ self.log.info(f"Loading {model_name}")
2011
+
2012
+ request_data = {"model_name": model_name}
2013
+ if llamacpp_args:
2014
+ request_data["llamacpp_args"] = llamacpp_args
2015
+ url = f"{self.base_url}/load"
2016
+
2017
+ try:
2018
+ response = self._send_request("post", url, request_data, timeout=timeout)
2019
+ self.log.info(f"Loaded {model_name} successfully: response={response}")
2020
+ self.model = model_name
2021
+ return response
2022
+ except Exception as e:
2023
+ original_error = str(e)
2024
+
2025
+ # Check if this is a "model not found" error and auto_download is enabled
2026
+ if not (auto_download and self._is_model_error(e)):
2027
+ # Not a model error or auto_download disabled - re-raise
2028
+ self.log.error(f"Failed to load {model_name}: {original_error}")
2029
+ if isinstance(e, LemonadeClientError):
2030
+ raise
2031
+ raise LemonadeClientError(
2032
+ f"Failed to load {model_name}: {original_error}"
2033
+ )
2034
+
2035
+ # Auto-download flow
2036
+ self.log.info(
2037
+ f"{_emoji('📥', '[AUTO-DOWNLOAD]')} Model '{model_name}' not found, "
2038
+ f"initiating auto-download..."
2039
+ )
2040
+
2041
+ # Get model info and size estimate
2042
+ model_info = self.get_model_info(model_name)
2043
+ size_gb = model_info["size_gb"]
2044
+ estimated_minutes = self._estimate_download_time(size_gb)
2045
+
2046
+ # Prompt user for confirmation
2047
+ if not _prompt_user_for_download(model_name, size_gb, estimated_minutes):
2048
+ raise ModelDownloadCancelledError(
2049
+ f"User declined download of {model_name}"
2050
+ )
2051
+
2052
+ # Validate disk space
2053
+ _check_disk_space(size_gb)
2054
+
2055
+ # Create and track download task
2056
+ download_task = DownloadTask(model_name=model_name, size_gb=size_gb)
2057
+ with self._downloads_lock:
2058
+ self.active_downloads[model_name] = download_task
2059
+
2060
+ try:
2061
+ # Trigger model download
2062
+ self.pull_model(model_name, timeout=download_timeout)
2063
+
2064
+ # Wait for download to complete (with cancellation support)
2065
+ self.log.info(
2066
+ f" {_emoji('⏳', '[WAIT]')} Waiting for model download to complete..."
2067
+ )
2068
+ self.log.info(
2069
+ f" {_emoji('💡', '[TIP]')} Tip: You can cancel with "
2070
+ f"client.cancel_download(model_name)"
2071
+ )
2072
+
2073
+ if self._wait_for_model_download(
2074
+ model_name,
2075
+ timeout=download_timeout,
2076
+ show_progress=True,
2077
+ download_task=download_task,
2078
+ ):
2079
+ # Retry loading after successful download
2080
+ self.log.info(
2081
+ f"{_emoji('🔄', '[RETRY]')} Retrying model load: {model_name}"
2082
+ )
2083
+ response = self._send_request(
2084
+ "post", url, request_data, timeout=timeout
2085
+ )
2086
+ self.log.info(
2087
+ f"{_emoji('✅', '[OK]')} Loaded {model_name} successfully after download"
2088
+ )
2089
+ self.model = model_name
2090
+ return response
2091
+ else:
2092
+ raise LemonadeClientError(
2093
+ f"Model download timed out for '{model_name}'"
2094
+ )
2095
+
2096
+ except ModelDownloadCancelledError:
2097
+ self.log.warning(f"Download cancelled for {model_name}")
2098
+ raise
2099
+ except InsufficientDiskSpaceError:
2100
+ self.log.error(f"Insufficient disk space for {model_name}")
2101
+ raise
2102
+ except Exception as download_error:
2103
+ self.log.error(f"Auto-download failed: {download_error}")
2104
+ raise LemonadeClientError(
2105
+ f"Failed to auto-download '{model_name}': {download_error}"
2106
+ )
2107
+ finally:
2108
+ # Clean up download task
2109
+ with self._downloads_lock:
2110
+ self.active_downloads.pop(model_name, None)
2111
+
2112
+ def unload_model(self) -> Dict[str, Any]:
2113
+ """
2114
+ Unload the current model from the server.
2115
+
2116
+ Returns:
2117
+ Dict containing the status of the unload operation
2118
+ """
2119
+ url = f"{self.base_url}/unload"
2120
+ response = self._send_request("post", url)
2121
+ self.model = None
2122
+ self.log.info(f"Model unloaded successfully: {response}")
2123
+ return response
2124
+
2125
+ def set_params(
2126
+ self,
2127
+ temperature: Optional[float] = None,
2128
+ top_p: Optional[float] = None,
2129
+ top_k: Optional[int] = None,
2130
+ min_length: Optional[int] = None,
2131
+ max_length: Optional[int] = None,
2132
+ do_sample: Optional[bool] = None,
2133
+ ) -> Dict[str, Any]:
2134
+ """
2135
+ Set generation parameters for text completion.
2136
+
2137
+ Args:
2138
+ temperature: Controls randomness (higher = more random)
2139
+ top_p: Controls diversity via nucleus sampling
2140
+ top_k: Controls diversity by limiting to k most likely tokens
2141
+ min_length: Minimum length of generated text in tokens
2142
+ max_length: Maximum length of generated text in tokens
2143
+ do_sample: Whether to use sampling or greedy decoding
2144
+
2145
+ Returns:
2146
+ Dict containing the status and updated parameters
2147
+ """
2148
+ request_data = {}
2149
+
2150
+ if temperature is not None:
2151
+ request_data["temperature"] = temperature
2152
+ if top_p is not None:
2153
+ request_data["top_p"] = top_p
2154
+ if top_k is not None:
2155
+ request_data["top_k"] = top_k
2156
+ if min_length is not None:
2157
+ request_data["min_length"] = min_length
2158
+ if max_length is not None:
2159
+ request_data["max_length"] = max_length
2160
+ if do_sample is not None:
2161
+ request_data["do_sample"] = do_sample
2162
+
2163
+ url = f"{self.base_url}/params"
2164
+ return self._send_request("post", url, request_data)
2165
+
2166
+ def health_check(self) -> Dict[str, Any]:
2167
+ """
2168
+ Check server health.
2169
+
2170
+ Returns:
2171
+ Dict containing the server status and loaded model
2172
+
2173
+ Raises:
2174
+ LemonadeClientError: If the health check fails
2175
+ """
2176
+ url = f"{self.base_url}/health"
2177
+ return self._send_request("get", url)
2178
+
2179
+ def get_stats(self) -> Dict[str, Any]:
2180
+ """
2181
+ Get performance statistics from the last request.
2182
+
2183
+ Returns:
2184
+ Dict containing performance statistics
2185
+ """
2186
+ url = f"{self.base_url}/stats"
2187
+ return self._send_request("get", url)
2188
+
2189
+ def get_system_info(self, verbose: bool = False) -> Dict[str, Any]:
2190
+ """
2191
+ Get system hardware information and device enumeration.
2192
+
2193
+ Args:
2194
+ verbose: If True, returns additional details like Python packages
2195
+ and extended system information
2196
+
2197
+ Returns:
2198
+ Dict containing system information:
2199
+ - OS Version
2200
+ - Processor details
2201
+ - Physical Memory (RAM)
2202
+ - devices: Dictionary with device information
2203
+ - cpu: Name, cores, threads, availability
2204
+ - gpu: AMD iGPU/dGPU name, memory (MB), driver version, availability
2205
+ - npu: Name, driver version, power mode, availability
2206
+
2207
+ Examples:
2208
+ # Check available devices
2209
+ sysinfo = client.get_system_info()
2210
+ devices = sysinfo.get("devices", {})
2211
+
2212
+ # Select best device
2213
+ if devices.get("npu", {}).get("available"):
2214
+ print("Using NPU for acceleration")
2215
+ elif devices.get("gpu", {}).get("available"):
2216
+ print("Using GPU for acceleration")
2217
+ else:
2218
+ print("Using CPU")
2219
+
2220
+ # Get detailed info
2221
+ detailed = client.get_system_info(verbose=True)
2222
+ """
2223
+ url = f"{self.base_url}/system-info"
2224
+ if verbose:
2225
+ url += "?verbose=true"
2226
+ return self._send_request("get", url)
2227
+
2228
+ def ready(self) -> bool:
2229
+ """
2230
+ Check if the client is ready for use.
2231
+
2232
+ Returns:
2233
+ bool: True if the client exists and the server is healthy, False otherwise
2234
+ """
2235
+ try:
2236
+ # Check if client exists and server is healthy
2237
+ health = self.health_check()
2238
+ return health.get("status") == "ok"
2239
+ except Exception:
2240
+ return False
2241
+
2242
+ def validate_context_size(
2243
+ self,
2244
+ required_tokens: int = 32768,
2245
+ quiet: bool = False,
2246
+ ) -> tuple:
2247
+ """
2248
+ Validate that Lemonade server has sufficient context size.
2249
+
2250
+ Checks the /health endpoint to verify the server's context size
2251
+ meets the required minimum.
2252
+
2253
+ Args:
2254
+ required_tokens: Minimum required context size in tokens (default: 32768)
2255
+ quiet: Suppress output messages
2256
+
2257
+ Returns:
2258
+ Tuple of (success: bool, error_message: Optional[str])
2259
+ - success: True if context size is sufficient
2260
+ - error_message: Description of the issue if validation failed, None if successful
2261
+
2262
+ Example:
2263
+ client = LemonadeClient()
2264
+ success, error = client.validate_context_size(required_tokens=32768)
2265
+ if not success:
2266
+ print(f"Context validation failed: {error}")
2267
+ sys.exit(1)
2268
+ """
2269
+ try:
2270
+ health = self.health_check()
2271
+ reported_ctx = health.get("context_size", 0)
2272
+
2273
+ if reported_ctx >= required_tokens:
2274
+ self.log.debug(
2275
+ f"Context size validated: {reported_ctx} >= {required_tokens}"
2276
+ )
2277
+ return True, None
2278
+ else:
2279
+ error_msg = (
2280
+ f"Insufficient context size: server has {reported_ctx} tokens, "
2281
+ f"but {required_tokens} tokens are required. "
2282
+ f"Restart with: lemonade-server serve --ctx-size {required_tokens}"
2283
+ )
2284
+ if not quiet:
2285
+ print(f"❌ {error_msg}")
2286
+ return False, error_msg
2287
+
2288
+ except Exception as e:
2289
+ self.log.warning(f"Context validation failed: {e}")
2290
+ if not quiet:
2291
+ print(f"⚠️ Context validation failed: {e}")
2292
+ return True, None # Don't block on connection errors
2293
+
2294
+ def get_status(self) -> LemonadeStatus:
2295
+ """
2296
+ Get comprehensive Lemonade status.
2297
+
2298
+ Returns:
2299
+ LemonadeStatus with server status and loaded models
2300
+ """
2301
+ status = LemonadeStatus(url=f"http://{self.host}:{self.port}")
2302
+
2303
+ try:
2304
+ health = self.health_check()
2305
+ status.running = True
2306
+ status.health_data = health
2307
+ status.context_size = health.get("context_size", 0)
2308
+
2309
+ # Get loaded models
2310
+ models_response = self.list_models()
2311
+ status.loaded_models = models_response.get("data", [])
2312
+ except Exception as e:
2313
+ self.log.debug(f"Failed to get status: {e}")
2314
+ status.running = False
2315
+ status.error = str(e)
2316
+
2317
+ return status
2318
+
2319
+ def get_agent_profile(self, agent: str) -> Optional[AgentProfile]:
2320
+ """
2321
+ Get agent profile by name.
2322
+
2323
+ Args:
2324
+ agent: Name of the agent (chat, code, rag, talk, blender, etc.)
2325
+
2326
+ Returns:
2327
+ AgentProfile if found, None otherwise
2328
+ """
2329
+ return AGENT_PROFILES.get(agent.lower())
2330
+
2331
+ def list_agents(self) -> List[str]:
2332
+ """
2333
+ List all available agent profiles.
2334
+
2335
+ Returns:
2336
+ List of agent profile names
2337
+ """
2338
+ return list(AGENT_PROFILES.keys())
2339
+
2340
+ def get_required_models(self, agent: str = "all") -> List[str]:
2341
+ """
2342
+ Get list of model IDs required for an agent or all agents.
2343
+
2344
+ Args:
2345
+ agent: Agent name or "all" for all unique models
2346
+
2347
+ Returns:
2348
+ List of model IDs (e.g., ["Qwen3-Coder-30B-A3B-Instruct-GGUF", ...])
2349
+ """
2350
+ model_ids = set()
2351
+
2352
+ if agent.lower() == "all":
2353
+ # Collect all unique models across all agents
2354
+ for profile in AGENT_PROFILES.values():
2355
+ for model_key in profile.models:
2356
+ if model_key in MODELS:
2357
+ model_ids.add(MODELS[model_key].model_id)
2358
+ else:
2359
+ # Get models for specific agent
2360
+ profile = self.get_agent_profile(agent)
2361
+ if profile:
2362
+ for model_key in profile.models:
2363
+ if model_key in MODELS:
2364
+ model_ids.add(MODELS[model_key].model_id)
2365
+
2366
+ return list(model_ids)
2367
+
2368
+ def check_model_available(self, model_id: str) -> bool:
2369
+ """
2370
+ Check if a model is available (downloaded) on the server.
2371
+
2372
+ Args:
2373
+ model_id: Model ID to check
2374
+
2375
+ Returns:
2376
+ True if model is available, False otherwise
2377
+ """
2378
+ try:
2379
+ # Use list_models with show_all=True to get download status
2380
+ models = self.list_models(show_all=True)
2381
+ for model in models.get("data", []):
2382
+ if model.get("id", "").lower() == model_id.lower():
2383
+ return model.get("downloaded", False)
2384
+ except Exception:
2385
+ pass
2386
+ return False
2387
+
2388
+ def download_agent_models(
2389
+ self,
2390
+ agent: str = "all",
2391
+ timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
2392
+ progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
2393
+ ) -> Dict[str, Any]:
2394
+ """
2395
+ Download all models required for an agent with streaming progress.
2396
+
2397
+ This method downloads all models needed by an agent (or all agents)
2398
+ and provides real-time progress updates via SSE streaming.
2399
+
2400
+ Args:
2401
+ agent: Agent name (chat, code, rag, etc.) or "all" for all models
2402
+ timeout: Timeout per model in seconds
2403
+ progress_callback: Optional callback for progress updates.
2404
+ Signature: callback(event_type: str, data: dict) -> None
2405
+
2406
+ Returns:
2407
+ Dict with download results:
2408
+ - success: bool - True if all models downloaded
2409
+ - models: List[Dict] - Status for each model
2410
+ - errors: List[str] - Any error messages
2411
+
2412
+ Example:
2413
+ def on_progress(event_type, data):
2414
+ if event_type == "progress":
2415
+ print(f"{data['file']}: {data['percent']}%")
2416
+
2417
+ result = client.download_agent_models("chat", progress_callback=on_progress)
2418
+ """
2419
+ model_ids = self.get_required_models(agent)
2420
+
2421
+ if not model_ids:
2422
+ return {
2423
+ "success": True,
2424
+ "models": [],
2425
+ "errors": [],
2426
+ "message": f"No models required for agent '{agent}'",
2427
+ }
2428
+
2429
+ results = {"success": True, "models": [], "errors": []}
2430
+
2431
+ for model_id in model_ids:
2432
+ model_result = {"model_id": model_id, "status": "pending", "skipped": False}
2433
+
2434
+ # Check if already available
2435
+ if self.check_model_available(model_id):
2436
+ model_result["status"] = "already_available"
2437
+ model_result["skipped"] = True
2438
+ results["models"].append(model_result)
2439
+ self.log.info(f"Model {model_id} already available, skipping download")
2440
+ continue
2441
+
2442
+ # Download with streaming
2443
+ try:
2444
+ self.log.info(f"Downloading model: {model_id}")
2445
+ completed = False
2446
+
2447
+ for event in self.pull_model_stream(
2448
+ model_name=model_id,
2449
+ timeout=timeout,
2450
+ progress_callback=progress_callback,
2451
+ ):
2452
+ if event.get("event") == "complete":
2453
+ completed = True
2454
+ model_result["status"] = "completed"
2455
+ elif event.get("event") == "error":
2456
+ model_result["status"] = "error"
2457
+ model_result["error"] = event.get("error", "Unknown error")
2458
+ results["errors"].append(f"{model_id}: {model_result['error']}")
2459
+ results["success"] = False
2460
+
2461
+ if not completed and model_result["status"] == "pending":
2462
+ model_result["status"] = "completed" # No explicit complete event
2463
+
2464
+ except LemonadeClientError as e:
2465
+ model_result["status"] = "error"
2466
+ model_result["error"] = str(e)
2467
+ results["errors"].append(f"{model_id}: {e}")
2468
+ results["success"] = False
2469
+
2470
+ results["models"].append(model_result)
2471
+
2472
+ return results
2473
+
2474
+ def check_model_loaded(self, model_id: str) -> bool:
2475
+ """
2476
+ Check if a specific model is loaded.
2477
+
2478
+ Args:
2479
+ model_id: Model ID to check
2480
+
2481
+ Returns:
2482
+ True if model is loaded, False otherwise
2483
+ """
2484
+ try:
2485
+ models_response = self.list_models()
2486
+ for model in models_response.get("data", []):
2487
+ if model.get("id", "").lower() == model_id.lower():
2488
+ return True
2489
+ # Also check for partial match
2490
+ if model_id.lower() in model.get("id", "").lower():
2491
+ return True
2492
+ except Exception:
2493
+ pass
2494
+ return False
2495
+
2496
+ def _check_lemonade_installed(self) -> bool:
2497
+ """
2498
+ Check if lemonade-server is available.
2499
+
2500
+ Checks in this order:
2501
+ 1. Try health check on configured URL (LEMONADE_BASE_URL or default)
2502
+ 2. If localhost and health check fails, check if binary is in PATH (for auto-start)
2503
+ 3. If remote server and health check fails, return False (can't auto-start)
2504
+
2505
+ Returns:
2506
+ True if server is available or can be started, False otherwise
2507
+ """
2508
+ # First, always try health check to see if server is already running
2509
+ try:
2510
+ health = self.health_check()
2511
+ if health.get("status") == "ok":
2512
+ return True
2513
+ except Exception:
2514
+ pass
2515
+
2516
+ # Health check failed - determine if we can auto-start
2517
+ is_localhost = self.host in ("localhost", "127.0.0.1", "::1")
2518
+
2519
+ if is_localhost:
2520
+ # Local server not running - check if binary is installed for auto-start
2521
+ return shutil.which("lemonade-server") is not None
2522
+ else:
2523
+ # Remote server not responding and we can't auto-start it
2524
+ return False
2525
+
2526
+ def get_lemonade_version(self) -> Optional[str]:
2527
+ """
2528
+ Get the installed lemonade-server version.
2529
+
2530
+ Returns:
2531
+ Version string (e.g., "8.2.2") or None if unable to determine
2532
+ """
2533
+ try:
2534
+ result = subprocess.run(
2535
+ ["lemonade-server", "--version"],
2536
+ capture_output=True,
2537
+ text=True,
2538
+ timeout=5,
2539
+ check=False, # We handle errors by checking the output
2540
+ )
2541
+
2542
+ # Combine stdout and stderr to get complete output
2543
+ full_output = result.stdout + result.stderr
2544
+
2545
+ # Extract version number using regex (e.g., "8.2.2")
2546
+ version_match = re.search(r"(\d+\.\d+(?:\.\d+)?)", full_output)
2547
+ if version_match:
2548
+ return version_match.group(1)
2549
+
2550
+ return None
2551
+
2552
+ except Exception:
2553
+ return None
2554
+
2555
+ def _check_version_compatibility(
2556
+ self, expected_version: str, quiet: bool = False
2557
+ ) -> bool:
2558
+ """
2559
+ Check if the installed lemonade-server version is compatible.
2560
+
2561
+ Checks only the major version for compatibility.
2562
+
2563
+ Args:
2564
+ expected_version: Expected version string (e.g., "8.2.2")
2565
+ quiet: Suppress warning output
2566
+
2567
+ Returns:
2568
+ True if compatible (or version check failed), False if incompatible major version
2569
+ """
2570
+ actual_version = self.get_lemonade_version()
2571
+
2572
+ if not actual_version:
2573
+ # Can't determine version, assume compatible (don't block)
2574
+ return True
2575
+
2576
+ try:
2577
+ # Parse versions
2578
+ expected_parts = expected_version.split(".")
2579
+ actual_parts = actual_version.split(".")
2580
+
2581
+ expected_major = int(expected_parts[0])
2582
+ actual_major = int(actual_parts[0])
2583
+
2584
+ if expected_major != actual_major:
2585
+ if not quiet:
2586
+ print("")
2587
+ print(
2588
+ f"{_emoji('⚠️', '[WARN]')} Lemonade Server version mismatch detected!"
2589
+ )
2590
+ print(f" Expected major version: {expected_major}.x.x")
2591
+ print(f" Installed version: {actual_version}")
2592
+ print("")
2593
+ print(
2594
+ " This may cause compatibility issues. "
2595
+ f"Please install Lemonade Server {expected_version}:"
2596
+ )
2597
+ print(" https://lemonade-server.ai")
2598
+ print("")
2599
+
2600
+ return False
2601
+
2602
+ return True
2603
+
2604
+ except Exception:
2605
+ # If parsing fails, assume compatible (don't block)
2606
+ return True
2607
+
2608
+ def initialize(
2609
+ self,
2610
+ agent: str = "mcp",
2611
+ ctx_size: Optional[int] = None,
2612
+ auto_start: bool = True,
2613
+ timeout: int = 120,
2614
+ verbose: bool = False, # pylint: disable=unused-argument
2615
+ quiet: bool = False,
2616
+ ) -> LemonadeStatus:
2617
+ """
2618
+ Initialize Lemonade Server for a specific agent.
2619
+
2620
+ This method:
2621
+ 1. Checks if lemonade-server is installed
2622
+ 2. Checks if server is running (health endpoint)
2623
+ 3. Auto-starts with ctx-size=32768 if not running
2624
+ 4. Validates context size and shows warning if too small
2625
+
2626
+ With auto-download enabled, models are downloaded on-demand when needed,
2627
+ so we don't validate model availability during initialization.
2628
+
2629
+ Args:
2630
+ agent: Agent name (chat, code, rag, talk, blender, jira, docker, vlm, minimal, mcp)
2631
+ ctx_size: Override context size (default: 32768 for most agents)
2632
+ auto_start: Automatically start server if not running
2633
+ timeout: Timeout in seconds for server startup
2634
+ verbose: Enable verbose output
2635
+ quiet: Suppress output (only errors)
2636
+
2637
+ Returns:
2638
+ LemonadeStatus with server status and loaded models
2639
+
2640
+ Example:
2641
+ client = LemonadeClient()
2642
+ status = client.initialize(agent="chat")
2643
+
2644
+ # Initialize with custom context size
2645
+ status = client.initialize(agent="code", ctx_size=65536)
2646
+ """
2647
+ profile = self.get_agent_profile(agent)
2648
+ if not profile:
2649
+ if not quiet:
2650
+ print(
2651
+ f"{_emoji('⚠️', '[WARN]')} Unknown agent '{agent}', using 'mcp' profile"
2652
+ )
2653
+ profile = AGENT_PROFILES["mcp"]
2654
+
2655
+ # Use 32768 as default context size for all agents (suitable for most tasks)
2656
+ # User can override with ctx_size parameter if needed
2657
+ required_ctx = ctx_size or 32768
2658
+
2659
+ if not quiet:
2660
+ print(f"🍋 Initializing Lemonade for {profile.display_name}")
2661
+ print(f" Context size: {required_ctx}")
2662
+
2663
+ # Check if lemonade-server is installed
2664
+ if not self._check_lemonade_installed():
2665
+ if not quiet:
2666
+ print(f"{_emoji('❌', '[ERROR]')} Lemonade Server is not installed")
2667
+ print("")
2668
+ print(f"{_emoji('📥', '[DOWNLOAD]')} Download and install from:")
2669
+ print(" https://lemonade-server.ai")
2670
+ print("")
2671
+ print("GAIA will automatically start Lemonade Server once installed.")
2672
+ print("")
2673
+ status = LemonadeStatus(url=f"http://{self.host}:{self.port}")
2674
+ status.running = False
2675
+ status.error = "Lemonade Server not installed"
2676
+ return status
2677
+
2678
+ # Check version compatibility (warning only, not fatal)
2679
+ from gaia.version import LEMONADE_VERSION
2680
+
2681
+ self._check_version_compatibility(LEMONADE_VERSION, quiet=quiet)
2682
+
2683
+ # Check current status
2684
+ status = self.get_status()
2685
+
2686
+ if status.running:
2687
+ if not quiet:
2688
+ print("✅ Lemonade Server is running")
2689
+ print(f" Current context size: {status.context_size}")
2690
+
2691
+ # Check context size (warning only, not fatal)
2692
+ if status.context_size < required_ctx:
2693
+ if not quiet:
2694
+ print("")
2695
+ print(
2696
+ f"{_emoji('⚠️', '[WARN]')} Context size ({status.context_size}) "
2697
+ f"is less than recommended ({required_ctx})"
2698
+ )
2699
+ print(
2700
+ f" For better performance, restart with: "
2701
+ f"lemonade-server serve --ctx-size {required_ctx}"
2702
+ )
2703
+ print("")
2704
+
2705
+ return status
2706
+
2707
+ # Server not running
2708
+ if not auto_start:
2709
+ if not quiet:
2710
+ print(f"{_emoji('❌', '[ERROR]')} Lemonade Server is not running")
2711
+ print(f" Start with: lemonade-server serve --ctx-size {required_ctx}")
2712
+ status.error = "Server not running"
2713
+ return status
2714
+
2715
+ # Auto-start server
2716
+ if not quiet:
2717
+ print(
2718
+ f"{_emoji('🚀', '[START]')} Starting Lemonade Server "
2719
+ f"with ctx-size={required_ctx}..."
2720
+ )
2721
+
2722
+ try:
2723
+ self.launch_server(ctx_size=required_ctx, background="terminal")
2724
+
2725
+ # Wait for server to be ready
2726
+ start_time = time.time()
2727
+ while time.time() - start_time < timeout:
2728
+ try:
2729
+ health = self.health_check()
2730
+ if health.get("status") == "ok":
2731
+ if not quiet:
2732
+ print(
2733
+ f"{_emoji('✅', '[OK]')} Lemonade Server started successfully"
2734
+ )
2735
+ status = self.get_status()
2736
+ status.running = True
2737
+ return status
2738
+ except Exception:
2739
+ pass
2740
+ time.sleep(2)
2741
+
2742
+ if not quiet:
2743
+ print(f"{_emoji('❌', '[ERROR]')} Failed to start Lemonade Server")
2744
+ status.error = "Failed to start server"
2745
+ except Exception as e:
2746
+ self.log.error(f"Failed to start server: {e}")
2747
+ if not quiet:
2748
+ print(f"{_emoji('❌', '[ERROR]')} Failed to start Lemonade Server: {e}")
2749
+ status.error = str(e)
2750
+
2751
+ return status
2752
+
2753
+ def _send_request(
2754
+ self,
2755
+ method: str,
2756
+ url: str,
2757
+ data: Optional[Dict[str, Any]] = None,
2758
+ timeout: int = DEFAULT_REQUEST_TIMEOUT,
2759
+ ) -> Dict[str, Any]:
2760
+ """
2761
+ Send a request to the server and return the response.
2762
+
2763
+ Args:
2764
+ method: HTTP method (get, post, etc.)
2765
+ url: URL to send the request to
2766
+ data: Request payload
2767
+ timeout: Request timeout in seconds
2768
+
2769
+ Returns:
2770
+ Response as a dict
2771
+
2772
+ Raises:
2773
+ LemonadeClientError: If the request fails
2774
+ """
2775
+ try:
2776
+ headers = {"Content-Type": "application/json"}
2777
+
2778
+ if method.lower() == "get":
2779
+ response = requests.get(url, headers=headers, timeout=timeout)
2780
+ elif method.lower() == "post":
2781
+ response = requests.post(
2782
+ url, json=data, headers=headers, timeout=timeout
2783
+ )
2784
+ else:
2785
+ raise LemonadeClientError(f"Unsupported HTTP method: {method}")
2786
+
2787
+ if response.status_code >= 400:
2788
+ raise LemonadeClientError(
2789
+ f"Request failed with status {response.status_code}: {response.text}"
2790
+ )
2791
+
2792
+ return response.json()
2793
+
2794
+ except requests.exceptions.RequestException as e:
2795
+ raise LemonadeClientError(f"Request failed: {str(e)}")
2796
+ except json.JSONDecodeError:
2797
+ raise LemonadeClientError(
2798
+ f"Failed to parse response as JSON: {response.text}"
2799
+ )
2800
+
2801
+
2802
+ def create_lemonade_client(
2803
+ model: Optional[str] = None,
2804
+ host: Optional[str] = None,
2805
+ port: Optional[int] = None,
2806
+ auto_start: bool = False,
2807
+ auto_load: bool = False,
2808
+ auto_pull: bool = True,
2809
+ verbose: bool = True,
2810
+ background: str = "terminal",
2811
+ keep_alive: bool = False,
2812
+ ) -> LemonadeClient:
2813
+ """
2814
+ Factory function to create and configure a LemonadeClient instance.
2815
+
2816
+ This function provides a simplified way to create a LemonadeClient instance
2817
+ with proper configuration from environment variables and/or explicit parameters.
2818
+
2819
+ Args:
2820
+ model: Name of the model to use
2821
+ (defaults to env var LEMONADE_MODEL or DEFAULT_MODEL_NAME)
2822
+ host: Host address for the Lemonade server
2823
+ (defaults to env var LEMONADE_HOST or DEFAULT_HOST)
2824
+ port: Port number for the Lemonade server
2825
+ (defaults to env var LEMONADE_PORT or DEFAULT_PORT)
2826
+ auto_start: Automatically start the server
2827
+ auto_load: Automatically load the model
2828
+ auto_pull: Whether to automatically pull the model if it's not available
2829
+ (when auto_load=True)
2830
+ verbose: Whether to enable verbose logging
2831
+ background: How to run the server if auto_start is True:
2832
+ - "terminal": Launch in a new terminal window (default)
2833
+ - "silent": Run in background with output to log file
2834
+ - "none": Run in foreground
2835
+ keep_alive: If True, don't terminate server when client is deleted
2836
+
2837
+ Returns:
2838
+ A configured LemonadeClient instance
2839
+ """
2840
+ # Get configuration from environment variables with fallbacks to defaults
2841
+ env_model = os.environ.get("LEMONADE_MODEL")
2842
+ env_host = os.environ.get("LEMONADE_HOST")
2843
+ env_port = os.environ.get("LEMONADE_PORT")
2844
+
2845
+ # Prioritize explicit parameters over environment variables over defaults
2846
+ model_name = model or env_model or DEFAULT_MODEL_NAME
2847
+ server_host = host or env_host or DEFAULT_HOST
2848
+ server_port = port or (int(env_port) if env_port else DEFAULT_PORT)
2849
+
2850
+ # Create the client
2851
+ client = LemonadeClient(
2852
+ model=model_name,
2853
+ host=server_host,
2854
+ port=server_port,
2855
+ verbose=verbose,
2856
+ keep_alive=keep_alive,
2857
+ )
2858
+
2859
+ # Auto-start server if requested
2860
+ if auto_start:
2861
+ try:
2862
+ # Check if server is already running
2863
+ try:
2864
+ client.health_check()
2865
+ client.log.info("Lemonade server is already running")
2866
+ except LemonadeClientError:
2867
+ # Server not running, start it
2868
+ client.log.info(
2869
+ f"Starting Lemonade server at {server_host}:{server_port}"
2870
+ )
2871
+ client.launch_server(background=background)
2872
+
2873
+ # Perform a health check to verify the server is running
2874
+ client.health_check()
2875
+ except Exception as e:
2876
+ client.log.error(f"Failed to start Lemonade server: {str(e)}")
2877
+ raise LemonadeClientError(f"Failed to start Lemonade server: {str(e)}")
2878
+
2879
+ # Auto-load model if requested
2880
+ if auto_load:
2881
+ try:
2882
+ # Check if auto_pull is enabled and model needs to be pulled first
2883
+ if auto_pull:
2884
+ # Check if model is available
2885
+ models_response = client.list_models()
2886
+ available_models = [
2887
+ model.get("id", "") for model in models_response.get("data", [])
2888
+ ]
2889
+
2890
+ if model_name not in available_models:
2891
+ client.log.info(
2892
+ f"Model '{model_name}' not found in registry. "
2893
+ f"Available models: {available_models}"
2894
+ )
2895
+ client.log.info(
2896
+ f"Attempting to pull model '{model_name}' before loading..."
2897
+ )
2898
+
2899
+ try:
2900
+ # Try to pull the model first
2901
+ pull_result = client.pull_model(
2902
+ model_name, timeout=300
2903
+ ) # 5 min timeout for download
2904
+ client.log.info(f"Successfully pulled model: {pull_result}")
2905
+ except Exception as pull_error:
2906
+ client.log.warning(
2907
+ f"Failed to pull model '{model_name}': {pull_error}"
2908
+ )
2909
+ client.log.info(
2910
+ "Proceeding with load anyway - server may auto-install"
2911
+ )
2912
+ else:
2913
+ client.log.info(
2914
+ f"Model '{model_name}' found in registry, proceeding with load"
2915
+ )
2916
+
2917
+ # Now attempt to load the model
2918
+ client.load_model(model_name, timeout=60)
2919
+ except Exception as e:
2920
+ # Extract detailed error information
2921
+ error_details = str(e)
2922
+ client.log.error(f"Failed to load {model_name}: {error_details}")
2923
+
2924
+ # Try to get more details about available models for debugging
2925
+ try:
2926
+ models_response = client.list_models()
2927
+ available_models = [
2928
+ model.get("id", "unknown")
2929
+ for model in models_response.get("data", [])
2930
+ ]
2931
+ client.log.error(f"Available models: {available_models}")
2932
+ client.log.error(f"Attempted to load: {model_name}")
2933
+ if available_models:
2934
+ client.log.error(
2935
+ "Consider using one of the available models instead"
2936
+ )
2937
+ except Exception as list_error:
2938
+ client.log.error(f"Could not list available models: {list_error}")
2939
+
2940
+ # Include both original error and context in the raised exception
2941
+ enhanced_message = f"Failed to load {model_name}: {error_details}"
2942
+ if "available_models" in locals() and available_models:
2943
+ enhanced_message += f" (Available models: {available_models})"
2944
+
2945
+ raise LemonadeClientError(enhanced_message)
2946
+
2947
+ return client
2948
+
2949
+
2950
+ def initialize_lemonade(
2951
+ agent: str = "mcp",
2952
+ ctx_size: Optional[int] = None,
2953
+ auto_start: bool = True,
2954
+ timeout: int = 120,
2955
+ verbose: bool = False,
2956
+ quiet: bool = False,
2957
+ host: str = DEFAULT_HOST,
2958
+ port: int = DEFAULT_PORT,
2959
+ ) -> LemonadeStatus:
2960
+ """
2961
+ Convenience function to initialize Lemonade Server.
2962
+
2963
+ This is a simplified interface for initializing Lemonade with agent-specific
2964
+ profiles. It creates a temporary client and runs initialization.
2965
+
2966
+ Args:
2967
+ agent: Agent name (chat, code, rag, talk, blender, jira, docker, vlm, minimal, mcp)
2968
+ ctx_size: Override context size
2969
+ auto_start: Automatically start server if not running
2970
+ timeout: Timeout for server startup
2971
+ verbose: Enable verbose output
2972
+ quiet: Suppress output
2973
+ host: Lemonade server host
2974
+ port: Lemonade server port
2975
+
2976
+ Returns:
2977
+ LemonadeStatus with server status
2978
+
2979
+ Example:
2980
+ from gaia.llm.lemonade_client import initialize_lemonade
2981
+
2982
+ # Initialize for chat agent
2983
+ status = initialize_lemonade(agent="chat")
2984
+
2985
+ # Initialize for code agent with larger context
2986
+ status = initialize_lemonade(agent="code", ctx_size=65536)
2987
+ """
2988
+ client = LemonadeClient(host=host, port=port, keep_alive=True)
2989
+ return client.initialize(
2990
+ agent=agent,
2991
+ ctx_size=ctx_size,
2992
+ auto_start=auto_start,
2993
+ timeout=timeout,
2994
+ verbose=verbose,
2995
+ quiet=quiet,
2996
+ )
2997
+
2998
+
2999
+ def print_agent_profiles():
3000
+ """Print all available agent profiles and their requirements."""
3001
+ print("\n📋 Available Agent Profiles:\n")
3002
+ print(f"{'Agent':<12} {'Display Name':<20} {'Context Size':<15} {'Models'}")
3003
+ print("-" * 80)
3004
+
3005
+ for name, profile in AGENT_PROFILES.items():
3006
+ models = ", ".join(profile.models) if profile.models else "None"
3007
+ print(
3008
+ f"{name:<12} {profile.display_name:<20} {profile.min_ctx_size:<15} {models}"
3009
+ )
3010
+
3011
+ print("\n📦 Available Models:\n")
3012
+ print(f"{'Key':<20} {'Model ID':<40} {'Type'}")
3013
+ print("-" * 80)
3014
+
3015
+ for key, model in MODELS.items():
3016
+ print(f"{key:<20} {model.model_id:<40} {model.model_type.value}")
3017
+
3018
+
3019
+ if __name__ == "__main__":
3020
+ logging.basicConfig(level=logging.INFO)
3021
+
3022
+ # Show agent profiles
3023
+ print_agent_profiles()
3024
+ print("\n" + "=" * 80 + "\n")
3025
+
3026
+ # Use the new factory function instead of direct instantiation
3027
+ client = create_lemonade_client(
3028
+ model=DEFAULT_MODEL_NAME,
3029
+ auto_start=True,
3030
+ auto_load=True,
3031
+ verbose=True,
3032
+ )
3033
+
3034
+ try:
3035
+ # Check server health
3036
+ try:
3037
+ health = client.health_check()
3038
+ print(f"Server health: {health}")
3039
+ except Exception as e:
3040
+ print(f"Health check failed: {e}")
3041
+
3042
+ # List available models
3043
+ try:
3044
+ print("\nListing available models:")
3045
+ models_list = client.list_models()
3046
+ print(json.dumps(models_list, indent=2))
3047
+ except Exception as e:
3048
+ print(f"Failed to list models: {e}")
3049
+
3050
+ # Example: Using chat completions
3051
+ messages = [
3052
+ {"role": "system", "content": "You are a helpful assistant."},
3053
+ {"role": "user", "content": "What is the capital of France?"},
3054
+ ]
3055
+
3056
+ try:
3057
+ print("\nNon-streaming response:")
3058
+ response = client.chat_completions(
3059
+ model=DEFAULT_MODEL_NAME, messages=messages, timeout=30
3060
+ )
3061
+ print(response["choices"][0]["message"]["content"])
3062
+ except Exception as e:
3063
+ print(f"Chat completion failed: {e}")
3064
+
3065
+ try:
3066
+ print("\nStreaming response:")
3067
+ for chunk in client.chat_completions(
3068
+ model=DEFAULT_MODEL_NAME, messages=messages, stream=True, timeout=30
3069
+ ):
3070
+ if "choices" in chunk and chunk["choices"][0].get("delta", {}).get(
3071
+ "content"
3072
+ ):
3073
+ print(chunk["choices"][0]["delta"]["content"], end="", flush=True)
3074
+ except Exception as e:
3075
+ print(f"Streaming chat completion failed: {e}")
3076
+
3077
+ print("\n\nDone!")
3078
+
3079
+ except Exception as e:
3080
+ print(f"Error occurred: {e}")
3081
+ finally:
3082
+ # Make sure to terminate the server when done
3083
+ client.terminate_server()