amd-gaia 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amd_gaia-0.14.1.dist-info/METADATA +768 -0
- amd_gaia-0.14.1.dist-info/RECORD +800 -0
- amd_gaia-0.14.1.dist-info/WHEEL +5 -0
- amd_gaia-0.14.1.dist-info/entry_points.txt +5 -0
- amd_gaia-0.14.1.dist-info/licenses/LICENSE.md +21 -0
- amd_gaia-0.14.1.dist-info/top_level.txt +1 -0
- gaia/__init__.py +2 -0
- gaia/agents/__init__.py +19 -0
- gaia/agents/base/__init__.py +9 -0
- gaia/agents/base/agent.py +2072 -0
- gaia/agents/base/api_agent.py +120 -0
- gaia/agents/base/console.py +1457 -0
- gaia/agents/base/mcp_agent.py +86 -0
- gaia/agents/base/tools.py +83 -0
- gaia/agents/blender/agent.py +556 -0
- gaia/agents/blender/agent_simple.py +135 -0
- gaia/agents/blender/app.py +211 -0
- gaia/agents/blender/app_simple.py +41 -0
- gaia/agents/blender/core/__init__.py +16 -0
- gaia/agents/blender/core/materials.py +506 -0
- gaia/agents/blender/core/objects.py +316 -0
- gaia/agents/blender/core/rendering.py +225 -0
- gaia/agents/blender/core/scene.py +220 -0
- gaia/agents/blender/core/view.py +146 -0
- gaia/agents/chat/__init__.py +9 -0
- gaia/agents/chat/agent.py +975 -0
- gaia/agents/chat/app.py +1058 -0
- gaia/agents/chat/session.py +508 -0
- gaia/agents/chat/tools/__init__.py +15 -0
- gaia/agents/chat/tools/file_tools.py +96 -0
- gaia/agents/chat/tools/rag_tools.py +1729 -0
- gaia/agents/chat/tools/shell_tools.py +436 -0
- gaia/agents/code/__init__.py +7 -0
- gaia/agents/code/agent.py +547 -0
- gaia/agents/code/app.py +266 -0
- gaia/agents/code/models.py +135 -0
- gaia/agents/code/orchestration/__init__.py +24 -0
- gaia/agents/code/orchestration/checklist_executor.py +1739 -0
- gaia/agents/code/orchestration/checklist_generator.py +709 -0
- gaia/agents/code/orchestration/factories/__init__.py +9 -0
- gaia/agents/code/orchestration/factories/base.py +63 -0
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -0
- gaia/agents/code/orchestration/factories/python_factory.py +106 -0
- gaia/agents/code/orchestration/orchestrator.py +610 -0
- gaia/agents/code/orchestration/project_analyzer.py +391 -0
- gaia/agents/code/orchestration/steps/__init__.py +67 -0
- gaia/agents/code/orchestration/steps/base.py +188 -0
- gaia/agents/code/orchestration/steps/error_handler.py +314 -0
- gaia/agents/code/orchestration/steps/nextjs.py +828 -0
- gaia/agents/code/orchestration/steps/python.py +307 -0
- gaia/agents/code/orchestration/template_catalog.py +463 -0
- gaia/agents/code/orchestration/workflows/__init__.py +14 -0
- gaia/agents/code/orchestration/workflows/base.py +80 -0
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -0
- gaia/agents/code/orchestration/workflows/python.py +94 -0
- gaia/agents/code/prompts/__init__.py +11 -0
- gaia/agents/code/prompts/base_prompt.py +77 -0
- gaia/agents/code/prompts/code_patterns.py +1925 -0
- gaia/agents/code/prompts/nextjs_prompt.py +40 -0
- gaia/agents/code/prompts/python_prompt.py +109 -0
- gaia/agents/code/schema_inference.py +365 -0
- gaia/agents/code/system_prompt.py +41 -0
- gaia/agents/code/tools/__init__.py +42 -0
- gaia/agents/code/tools/cli_tools.py +1138 -0
- gaia/agents/code/tools/code_formatting.py +319 -0
- gaia/agents/code/tools/code_tools.py +769 -0
- gaia/agents/code/tools/error_fixing.py +1347 -0
- gaia/agents/code/tools/external_tools.py +180 -0
- gaia/agents/code/tools/file_io.py +845 -0
- gaia/agents/code/tools/prisma_tools.py +190 -0
- gaia/agents/code/tools/project_management.py +1016 -0
- gaia/agents/code/tools/testing.py +321 -0
- gaia/agents/code/tools/typescript_tools.py +122 -0
- gaia/agents/code/tools/validation_parsing.py +461 -0
- gaia/agents/code/tools/validation_tools.py +803 -0
- gaia/agents/code/tools/web_dev_tools.py +1744 -0
- gaia/agents/code/validators/__init__.py +16 -0
- gaia/agents/code/validators/antipattern_checker.py +241 -0
- gaia/agents/code/validators/ast_analyzer.py +197 -0
- gaia/agents/code/validators/requirements_validator.py +145 -0
- gaia/agents/code/validators/syntax_validator.py +171 -0
- gaia/agents/docker/__init__.py +7 -0
- gaia/agents/docker/agent.py +642 -0
- gaia/agents/jira/__init__.py +11 -0
- gaia/agents/jira/agent.py +894 -0
- gaia/agents/jira/jql_templates.py +299 -0
- gaia/agents/routing/__init__.py +7 -0
- gaia/agents/routing/agent.py +512 -0
- gaia/agents/routing/system_prompt.py +75 -0
- gaia/api/__init__.py +23 -0
- gaia/api/agent_registry.py +238 -0
- gaia/api/app.py +305 -0
- gaia/api/openai_server.py +575 -0
- gaia/api/schemas.py +186 -0
- gaia/api/sse_handler.py +370 -0
- gaia/apps/__init__.py +4 -0
- gaia/apps/llm/__init__.py +6 -0
- gaia/apps/llm/app.py +169 -0
- gaia/apps/summarize/app.py +633 -0
- gaia/apps/summarize/html_viewer.py +133 -0
- gaia/apps/summarize/pdf_formatter.py +284 -0
- gaia/audio/__init__.py +2 -0
- gaia/audio/audio_client.py +439 -0
- gaia/audio/audio_recorder.py +269 -0
- gaia/audio/kokoro_tts.py +599 -0
- gaia/audio/whisper_asr.py +432 -0
- gaia/chat/__init__.py +16 -0
- gaia/chat/app.py +430 -0
- gaia/chat/prompts.py +522 -0
- gaia/chat/sdk.py +1200 -0
- gaia/cli.py +5621 -0
- gaia/eval/batch_experiment.py +2332 -0
- gaia/eval/claude.py +542 -0
- gaia/eval/config.py +37 -0
- gaia/eval/email_generator.py +512 -0
- gaia/eval/eval.py +3179 -0
- gaia/eval/groundtruth.py +1130 -0
- gaia/eval/transcript_generator.py +582 -0
- gaia/eval/webapp/README.md +168 -0
- gaia/eval/webapp/node_modules/.bin/mime +16 -0
- gaia/eval/webapp/node_modules/.bin/mime.cmd +17 -0
- gaia/eval/webapp/node_modules/.bin/mime.ps1 +28 -0
- gaia/eval/webapp/node_modules/.package-lock.json +865 -0
- gaia/eval/webapp/node_modules/accepts/HISTORY.md +243 -0
- gaia/eval/webapp/node_modules/accepts/LICENSE +23 -0
- gaia/eval/webapp/node_modules/accepts/README.md +140 -0
- gaia/eval/webapp/node_modules/accepts/index.js +238 -0
- gaia/eval/webapp/node_modules/accepts/package.json +47 -0
- gaia/eval/webapp/node_modules/array-flatten/LICENSE +21 -0
- gaia/eval/webapp/node_modules/array-flatten/README.md +43 -0
- gaia/eval/webapp/node_modules/array-flatten/array-flatten.js +64 -0
- gaia/eval/webapp/node_modules/array-flatten/package.json +39 -0
- gaia/eval/webapp/node_modules/body-parser/HISTORY.md +672 -0
- gaia/eval/webapp/node_modules/body-parser/LICENSE +23 -0
- gaia/eval/webapp/node_modules/body-parser/README.md +476 -0
- gaia/eval/webapp/node_modules/body-parser/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/body-parser/index.js +156 -0
- gaia/eval/webapp/node_modules/body-parser/lib/read.js +205 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/json.js +247 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/raw.js +101 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/text.js +121 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/urlencoded.js +307 -0
- gaia/eval/webapp/node_modules/body-parser/package.json +56 -0
- gaia/eval/webapp/node_modules/bytes/History.md +97 -0
- gaia/eval/webapp/node_modules/bytes/LICENSE +23 -0
- gaia/eval/webapp/node_modules/bytes/Readme.md +152 -0
- gaia/eval/webapp/node_modules/bytes/index.js +170 -0
- gaia/eval/webapp/node_modules/bytes/package.json +42 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.eslintrc +17 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.nycrc +9 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/CHANGELOG.md +30 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/LICENSE +21 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/README.md +62 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.js +10 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.d.ts +19 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.js +10 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.d.ts +64 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.js +15 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/package.json +85 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.d.ts +3 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/test/index.js +63 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/call-bound/.eslintrc +13 -0
- gaia/eval/webapp/node_modules/call-bound/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/call-bound/.nycrc +9 -0
- gaia/eval/webapp/node_modules/call-bound/CHANGELOG.md +42 -0
- gaia/eval/webapp/node_modules/call-bound/LICENSE +21 -0
- gaia/eval/webapp/node_modules/call-bound/README.md +53 -0
- gaia/eval/webapp/node_modules/call-bound/index.d.ts +94 -0
- gaia/eval/webapp/node_modules/call-bound/index.js +19 -0
- gaia/eval/webapp/node_modules/call-bound/package.json +99 -0
- gaia/eval/webapp/node_modules/call-bound/test/index.js +61 -0
- gaia/eval/webapp/node_modules/call-bound/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/content-disposition/HISTORY.md +60 -0
- gaia/eval/webapp/node_modules/content-disposition/LICENSE +22 -0
- gaia/eval/webapp/node_modules/content-disposition/README.md +142 -0
- gaia/eval/webapp/node_modules/content-disposition/index.js +458 -0
- gaia/eval/webapp/node_modules/content-disposition/package.json +44 -0
- gaia/eval/webapp/node_modules/content-type/HISTORY.md +29 -0
- gaia/eval/webapp/node_modules/content-type/LICENSE +22 -0
- gaia/eval/webapp/node_modules/content-type/README.md +94 -0
- gaia/eval/webapp/node_modules/content-type/index.js +225 -0
- gaia/eval/webapp/node_modules/content-type/package.json +42 -0
- gaia/eval/webapp/node_modules/cookie/LICENSE +24 -0
- gaia/eval/webapp/node_modules/cookie/README.md +317 -0
- gaia/eval/webapp/node_modules/cookie/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/cookie/index.js +334 -0
- gaia/eval/webapp/node_modules/cookie/package.json +44 -0
- gaia/eval/webapp/node_modules/cookie-signature/.npmignore +4 -0
- gaia/eval/webapp/node_modules/cookie-signature/History.md +38 -0
- gaia/eval/webapp/node_modules/cookie-signature/Readme.md +42 -0
- gaia/eval/webapp/node_modules/cookie-signature/index.js +51 -0
- gaia/eval/webapp/node_modules/cookie-signature/package.json +18 -0
- gaia/eval/webapp/node_modules/debug/.coveralls.yml +1 -0
- gaia/eval/webapp/node_modules/debug/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/debug/.npmignore +9 -0
- gaia/eval/webapp/node_modules/debug/.travis.yml +14 -0
- gaia/eval/webapp/node_modules/debug/CHANGELOG.md +362 -0
- gaia/eval/webapp/node_modules/debug/LICENSE +19 -0
- gaia/eval/webapp/node_modules/debug/Makefile +50 -0
- gaia/eval/webapp/node_modules/debug/README.md +312 -0
- gaia/eval/webapp/node_modules/debug/component.json +19 -0
- gaia/eval/webapp/node_modules/debug/karma.conf.js +70 -0
- gaia/eval/webapp/node_modules/debug/node.js +1 -0
- gaia/eval/webapp/node_modules/debug/package.json +49 -0
- gaia/eval/webapp/node_modules/debug/src/browser.js +185 -0
- gaia/eval/webapp/node_modules/debug/src/debug.js +202 -0
- gaia/eval/webapp/node_modules/debug/src/index.js +10 -0
- gaia/eval/webapp/node_modules/debug/src/inspector-log.js +15 -0
- gaia/eval/webapp/node_modules/debug/src/node.js +248 -0
- gaia/eval/webapp/node_modules/depd/History.md +103 -0
- gaia/eval/webapp/node_modules/depd/LICENSE +22 -0
- gaia/eval/webapp/node_modules/depd/Readme.md +280 -0
- gaia/eval/webapp/node_modules/depd/index.js +538 -0
- gaia/eval/webapp/node_modules/depd/lib/browser/index.js +77 -0
- gaia/eval/webapp/node_modules/depd/package.json +45 -0
- gaia/eval/webapp/node_modules/destroy/LICENSE +23 -0
- gaia/eval/webapp/node_modules/destroy/README.md +63 -0
- gaia/eval/webapp/node_modules/destroy/index.js +209 -0
- gaia/eval/webapp/node_modules/destroy/package.json +48 -0
- gaia/eval/webapp/node_modules/dunder-proto/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/dunder-proto/.nycrc +13 -0
- gaia/eval/webapp/node_modules/dunder-proto/CHANGELOG.md +24 -0
- gaia/eval/webapp/node_modules/dunder-proto/LICENSE +21 -0
- gaia/eval/webapp/node_modules/dunder-proto/README.md +54 -0
- gaia/eval/webapp/node_modules/dunder-proto/get.d.ts +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/get.js +30 -0
- gaia/eval/webapp/node_modules/dunder-proto/package.json +76 -0
- gaia/eval/webapp/node_modules/dunder-proto/set.d.ts +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/set.js +35 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/get.js +34 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/index.js +4 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/set.js +50 -0
- gaia/eval/webapp/node_modules/dunder-proto/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/ee-first/LICENSE +22 -0
- gaia/eval/webapp/node_modules/ee-first/README.md +80 -0
- gaia/eval/webapp/node_modules/ee-first/index.js +95 -0
- gaia/eval/webapp/node_modules/ee-first/package.json +29 -0
- gaia/eval/webapp/node_modules/encodeurl/LICENSE +22 -0
- gaia/eval/webapp/node_modules/encodeurl/README.md +109 -0
- gaia/eval/webapp/node_modules/encodeurl/index.js +60 -0
- gaia/eval/webapp/node_modules/encodeurl/package.json +40 -0
- gaia/eval/webapp/node_modules/es-define-property/.eslintrc +13 -0
- gaia/eval/webapp/node_modules/es-define-property/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-define-property/.nycrc +9 -0
- gaia/eval/webapp/node_modules/es-define-property/CHANGELOG.md +29 -0
- gaia/eval/webapp/node_modules/es-define-property/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-define-property/README.md +49 -0
- gaia/eval/webapp/node_modules/es-define-property/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-define-property/index.js +14 -0
- gaia/eval/webapp/node_modules/es-define-property/package.json +81 -0
- gaia/eval/webapp/node_modules/es-define-property/test/index.js +56 -0
- gaia/eval/webapp/node_modules/es-define-property/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/es-errors/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/es-errors/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-errors/CHANGELOG.md +40 -0
- gaia/eval/webapp/node_modules/es-errors/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-errors/README.md +55 -0
- gaia/eval/webapp/node_modules/es-errors/eval.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/eval.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/index.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/package.json +80 -0
- gaia/eval/webapp/node_modules/es-errors/range.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/range.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/ref.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/ref.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/syntax.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/syntax.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/test/index.js +19 -0
- gaia/eval/webapp/node_modules/es-errors/tsconfig.json +49 -0
- gaia/eval/webapp/node_modules/es-errors/type.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/type.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/uri.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/uri.js +4 -0
- gaia/eval/webapp/node_modules/es-object-atoms/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/es-object-atoms/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-object-atoms/CHANGELOG.md +37 -0
- gaia/eval/webapp/node_modules/es-object-atoms/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-object-atoms/README.md +63 -0
- gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.js +11 -0
- gaia/eval/webapp/node_modules/es-object-atoms/ToObject.d.ts +7 -0
- gaia/eval/webapp/node_modules/es-object-atoms/ToObject.js +10 -0
- gaia/eval/webapp/node_modules/es-object-atoms/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/index.js +4 -0
- gaia/eval/webapp/node_modules/es-object-atoms/isObject.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/isObject.js +6 -0
- gaia/eval/webapp/node_modules/es-object-atoms/package.json +80 -0
- gaia/eval/webapp/node_modules/es-object-atoms/test/index.js +38 -0
- gaia/eval/webapp/node_modules/es-object-atoms/tsconfig.json +6 -0
- gaia/eval/webapp/node_modules/escape-html/LICENSE +24 -0
- gaia/eval/webapp/node_modules/escape-html/Readme.md +43 -0
- gaia/eval/webapp/node_modules/escape-html/index.js +78 -0
- gaia/eval/webapp/node_modules/escape-html/package.json +24 -0
- gaia/eval/webapp/node_modules/etag/HISTORY.md +83 -0
- gaia/eval/webapp/node_modules/etag/LICENSE +22 -0
- gaia/eval/webapp/node_modules/etag/README.md +159 -0
- gaia/eval/webapp/node_modules/etag/index.js +131 -0
- gaia/eval/webapp/node_modules/etag/package.json +47 -0
- gaia/eval/webapp/node_modules/express/History.md +3656 -0
- gaia/eval/webapp/node_modules/express/LICENSE +24 -0
- gaia/eval/webapp/node_modules/express/Readme.md +260 -0
- gaia/eval/webapp/node_modules/express/index.js +11 -0
- gaia/eval/webapp/node_modules/express/lib/application.js +661 -0
- gaia/eval/webapp/node_modules/express/lib/express.js +116 -0
- gaia/eval/webapp/node_modules/express/lib/middleware/init.js +43 -0
- gaia/eval/webapp/node_modules/express/lib/middleware/query.js +47 -0
- gaia/eval/webapp/node_modules/express/lib/request.js +525 -0
- gaia/eval/webapp/node_modules/express/lib/response.js +1179 -0
- gaia/eval/webapp/node_modules/express/lib/router/index.js +673 -0
- gaia/eval/webapp/node_modules/express/lib/router/layer.js +181 -0
- gaia/eval/webapp/node_modules/express/lib/router/route.js +230 -0
- gaia/eval/webapp/node_modules/express/lib/utils.js +303 -0
- gaia/eval/webapp/node_modules/express/lib/view.js +182 -0
- gaia/eval/webapp/node_modules/express/package.json +102 -0
- gaia/eval/webapp/node_modules/finalhandler/HISTORY.md +210 -0
- gaia/eval/webapp/node_modules/finalhandler/LICENSE +22 -0
- gaia/eval/webapp/node_modules/finalhandler/README.md +147 -0
- gaia/eval/webapp/node_modules/finalhandler/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/finalhandler/index.js +341 -0
- gaia/eval/webapp/node_modules/finalhandler/package.json +47 -0
- gaia/eval/webapp/node_modules/forwarded/HISTORY.md +21 -0
- gaia/eval/webapp/node_modules/forwarded/LICENSE +22 -0
- gaia/eval/webapp/node_modules/forwarded/README.md +57 -0
- gaia/eval/webapp/node_modules/forwarded/index.js +90 -0
- gaia/eval/webapp/node_modules/forwarded/package.json +45 -0
- gaia/eval/webapp/node_modules/fresh/HISTORY.md +70 -0
- gaia/eval/webapp/node_modules/fresh/LICENSE +23 -0
- gaia/eval/webapp/node_modules/fresh/README.md +119 -0
- gaia/eval/webapp/node_modules/fresh/index.js +137 -0
- gaia/eval/webapp/node_modules/fresh/package.json +46 -0
- gaia/eval/webapp/node_modules/fs/README.md +9 -0
- gaia/eval/webapp/node_modules/fs/package.json +20 -0
- gaia/eval/webapp/node_modules/function-bind/.eslintrc +21 -0
- gaia/eval/webapp/node_modules/function-bind/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/function-bind/.github/SECURITY.md +3 -0
- gaia/eval/webapp/node_modules/function-bind/.nycrc +13 -0
- gaia/eval/webapp/node_modules/function-bind/CHANGELOG.md +136 -0
- gaia/eval/webapp/node_modules/function-bind/LICENSE +20 -0
- gaia/eval/webapp/node_modules/function-bind/README.md +46 -0
- gaia/eval/webapp/node_modules/function-bind/implementation.js +84 -0
- gaia/eval/webapp/node_modules/function-bind/index.js +5 -0
- gaia/eval/webapp/node_modules/function-bind/package.json +87 -0
- gaia/eval/webapp/node_modules/function-bind/test/.eslintrc +9 -0
- gaia/eval/webapp/node_modules/function-bind/test/index.js +252 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.eslintrc +42 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.nycrc +9 -0
- gaia/eval/webapp/node_modules/get-intrinsic/CHANGELOG.md +186 -0
- gaia/eval/webapp/node_modules/get-intrinsic/LICENSE +21 -0
- gaia/eval/webapp/node_modules/get-intrinsic/README.md +71 -0
- gaia/eval/webapp/node_modules/get-intrinsic/index.js +378 -0
- gaia/eval/webapp/node_modules/get-intrinsic/package.json +97 -0
- gaia/eval/webapp/node_modules/get-intrinsic/test/GetIntrinsic.js +274 -0
- gaia/eval/webapp/node_modules/get-proto/.eslintrc +10 -0
- gaia/eval/webapp/node_modules/get-proto/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/get-proto/.nycrc +9 -0
- gaia/eval/webapp/node_modules/get-proto/CHANGELOG.md +21 -0
- gaia/eval/webapp/node_modules/get-proto/LICENSE +21 -0
- gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.d.ts +5 -0
- gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.js +6 -0
- gaia/eval/webapp/node_modules/get-proto/README.md +50 -0
- gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.d.ts +3 -0
- gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.js +4 -0
- gaia/eval/webapp/node_modules/get-proto/index.d.ts +5 -0
- gaia/eval/webapp/node_modules/get-proto/index.js +27 -0
- gaia/eval/webapp/node_modules/get-proto/package.json +81 -0
- gaia/eval/webapp/node_modules/get-proto/test/index.js +68 -0
- gaia/eval/webapp/node_modules/get-proto/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/gopd/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/gopd/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/gopd/CHANGELOG.md +45 -0
- gaia/eval/webapp/node_modules/gopd/LICENSE +21 -0
- gaia/eval/webapp/node_modules/gopd/README.md +40 -0
- gaia/eval/webapp/node_modules/gopd/gOPD.d.ts +1 -0
- gaia/eval/webapp/node_modules/gopd/gOPD.js +4 -0
- gaia/eval/webapp/node_modules/gopd/index.d.ts +5 -0
- gaia/eval/webapp/node_modules/gopd/index.js +15 -0
- gaia/eval/webapp/node_modules/gopd/package.json +77 -0
- gaia/eval/webapp/node_modules/gopd/test/index.js +36 -0
- gaia/eval/webapp/node_modules/gopd/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/has-symbols/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/has-symbols/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/has-symbols/.nycrc +9 -0
- gaia/eval/webapp/node_modules/has-symbols/CHANGELOG.md +91 -0
- gaia/eval/webapp/node_modules/has-symbols/LICENSE +21 -0
- gaia/eval/webapp/node_modules/has-symbols/README.md +46 -0
- gaia/eval/webapp/node_modules/has-symbols/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/has-symbols/index.js +14 -0
- gaia/eval/webapp/node_modules/has-symbols/package.json +111 -0
- gaia/eval/webapp/node_modules/has-symbols/shams.d.ts +3 -0
- gaia/eval/webapp/node_modules/has-symbols/shams.js +45 -0
- gaia/eval/webapp/node_modules/has-symbols/test/index.js +22 -0
- gaia/eval/webapp/node_modules/has-symbols/test/shams/core-js.js +29 -0
- gaia/eval/webapp/node_modules/has-symbols/test/shams/get-own-property-symbols.js +29 -0
- gaia/eval/webapp/node_modules/has-symbols/test/tests.js +58 -0
- gaia/eval/webapp/node_modules/has-symbols/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/hasown/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/hasown/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/hasown/.nycrc +13 -0
- gaia/eval/webapp/node_modules/hasown/CHANGELOG.md +40 -0
- gaia/eval/webapp/node_modules/hasown/LICENSE +21 -0
- gaia/eval/webapp/node_modules/hasown/README.md +40 -0
- gaia/eval/webapp/node_modules/hasown/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/hasown/index.js +8 -0
- gaia/eval/webapp/node_modules/hasown/package.json +92 -0
- gaia/eval/webapp/node_modules/hasown/tsconfig.json +6 -0
- gaia/eval/webapp/node_modules/http-errors/HISTORY.md +180 -0
- gaia/eval/webapp/node_modules/http-errors/LICENSE +23 -0
- gaia/eval/webapp/node_modules/http-errors/README.md +169 -0
- gaia/eval/webapp/node_modules/http-errors/index.js +289 -0
- gaia/eval/webapp/node_modules/http-errors/package.json +50 -0
- gaia/eval/webapp/node_modules/iconv-lite/Changelog.md +162 -0
- gaia/eval/webapp/node_modules/iconv-lite/LICENSE +21 -0
- gaia/eval/webapp/node_modules/iconv-lite/README.md +156 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-codec.js +555 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-data.js +176 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/index.js +22 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/internal.js +188 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-codec.js +72 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data-generated.js +451 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data.js +174 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/big5-added.json +122 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp936.json +264 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp949.json +273 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp950.json +177 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/eucjp.json +182 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gb18030-ranges.json +1 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gbk-added.json +55 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/shiftjis.json +125 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/utf16.js +177 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/utf7.js +290 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/bom-handling.js +52 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/extend-node.js +217 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/index.d.ts +24 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/index.js +153 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/streams.js +121 -0
- gaia/eval/webapp/node_modules/iconv-lite/package.json +46 -0
- gaia/eval/webapp/node_modules/inherits/LICENSE +16 -0
- gaia/eval/webapp/node_modules/inherits/README.md +42 -0
- gaia/eval/webapp/node_modules/inherits/inherits.js +9 -0
- gaia/eval/webapp/node_modules/inherits/inherits_browser.js +27 -0
- gaia/eval/webapp/node_modules/inherits/package.json +29 -0
- gaia/eval/webapp/node_modules/ipaddr.js/LICENSE +19 -0
- gaia/eval/webapp/node_modules/ipaddr.js/README.md +233 -0
- gaia/eval/webapp/node_modules/ipaddr.js/ipaddr.min.js +1 -0
- gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js +673 -0
- gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js.d.ts +68 -0
- gaia/eval/webapp/node_modules/ipaddr.js/package.json +35 -0
- gaia/eval/webapp/node_modules/math-intrinsics/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/math-intrinsics/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/math-intrinsics/CHANGELOG.md +24 -0
- gaia/eval/webapp/node_modules/math-intrinsics/LICENSE +21 -0
- gaia/eval/webapp/node_modules/math-intrinsics/README.md +50 -0
- gaia/eval/webapp/node_modules/math-intrinsics/abs.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/abs.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.js +5 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.js +5 -0
- gaia/eval/webapp/node_modules/math-intrinsics/floor.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/floor.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isFinite.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isFinite.js +12 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isInteger.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isInteger.js +16 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNaN.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNaN.js +6 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.js +6 -0
- gaia/eval/webapp/node_modules/math-intrinsics/max.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/max.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/min.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/min.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/mod.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/mod.js +9 -0
- gaia/eval/webapp/node_modules/math-intrinsics/package.json +86 -0
- gaia/eval/webapp/node_modules/math-intrinsics/pow.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/pow.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/round.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/round.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/sign.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/sign.js +11 -0
- gaia/eval/webapp/node_modules/math-intrinsics/test/index.js +192 -0
- gaia/eval/webapp/node_modules/math-intrinsics/tsconfig.json +3 -0
- gaia/eval/webapp/node_modules/media-typer/HISTORY.md +22 -0
- gaia/eval/webapp/node_modules/media-typer/LICENSE +22 -0
- gaia/eval/webapp/node_modules/media-typer/README.md +81 -0
- gaia/eval/webapp/node_modules/media-typer/index.js +270 -0
- gaia/eval/webapp/node_modules/media-typer/package.json +26 -0
- gaia/eval/webapp/node_modules/merge-descriptors/HISTORY.md +21 -0
- gaia/eval/webapp/node_modules/merge-descriptors/LICENSE +23 -0
- gaia/eval/webapp/node_modules/merge-descriptors/README.md +49 -0
- gaia/eval/webapp/node_modules/merge-descriptors/index.js +60 -0
- gaia/eval/webapp/node_modules/merge-descriptors/package.json +39 -0
- gaia/eval/webapp/node_modules/methods/HISTORY.md +29 -0
- gaia/eval/webapp/node_modules/methods/LICENSE +24 -0
- gaia/eval/webapp/node_modules/methods/README.md +51 -0
- gaia/eval/webapp/node_modules/methods/index.js +69 -0
- gaia/eval/webapp/node_modules/methods/package.json +36 -0
- gaia/eval/webapp/node_modules/mime/.npmignore +0 -0
- gaia/eval/webapp/node_modules/mime/CHANGELOG.md +164 -0
- gaia/eval/webapp/node_modules/mime/LICENSE +21 -0
- gaia/eval/webapp/node_modules/mime/README.md +90 -0
- gaia/eval/webapp/node_modules/mime/cli.js +8 -0
- gaia/eval/webapp/node_modules/mime/mime.js +108 -0
- gaia/eval/webapp/node_modules/mime/package.json +44 -0
- gaia/eval/webapp/node_modules/mime/src/build.js +53 -0
- gaia/eval/webapp/node_modules/mime/src/test.js +60 -0
- gaia/eval/webapp/node_modules/mime/types.json +1 -0
- gaia/eval/webapp/node_modules/mime-db/HISTORY.md +507 -0
- gaia/eval/webapp/node_modules/mime-db/LICENSE +23 -0
- gaia/eval/webapp/node_modules/mime-db/README.md +100 -0
- gaia/eval/webapp/node_modules/mime-db/db.json +8519 -0
- gaia/eval/webapp/node_modules/mime-db/index.js +12 -0
- gaia/eval/webapp/node_modules/mime-db/package.json +60 -0
- gaia/eval/webapp/node_modules/mime-types/HISTORY.md +397 -0
- gaia/eval/webapp/node_modules/mime-types/LICENSE +23 -0
- gaia/eval/webapp/node_modules/mime-types/README.md +113 -0
- gaia/eval/webapp/node_modules/mime-types/index.js +188 -0
- gaia/eval/webapp/node_modules/mime-types/package.json +44 -0
- gaia/eval/webapp/node_modules/ms/index.js +152 -0
- gaia/eval/webapp/node_modules/ms/license.md +21 -0
- gaia/eval/webapp/node_modules/ms/package.json +37 -0
- gaia/eval/webapp/node_modules/ms/readme.md +51 -0
- gaia/eval/webapp/node_modules/negotiator/HISTORY.md +108 -0
- gaia/eval/webapp/node_modules/negotiator/LICENSE +24 -0
- gaia/eval/webapp/node_modules/negotiator/README.md +203 -0
- gaia/eval/webapp/node_modules/negotiator/index.js +82 -0
- gaia/eval/webapp/node_modules/negotiator/lib/charset.js +169 -0
- gaia/eval/webapp/node_modules/negotiator/lib/encoding.js +184 -0
- gaia/eval/webapp/node_modules/negotiator/lib/language.js +179 -0
- gaia/eval/webapp/node_modules/negotiator/lib/mediaType.js +294 -0
- gaia/eval/webapp/node_modules/negotiator/package.json +42 -0
- gaia/eval/webapp/node_modules/object-inspect/.eslintrc +53 -0
- gaia/eval/webapp/node_modules/object-inspect/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/object-inspect/.nycrc +13 -0
- gaia/eval/webapp/node_modules/object-inspect/CHANGELOG.md +424 -0
- gaia/eval/webapp/node_modules/object-inspect/LICENSE +21 -0
- gaia/eval/webapp/node_modules/object-inspect/example/all.js +23 -0
- gaia/eval/webapp/node_modules/object-inspect/example/circular.js +6 -0
- gaia/eval/webapp/node_modules/object-inspect/example/fn.js +5 -0
- gaia/eval/webapp/node_modules/object-inspect/example/inspect.js +10 -0
- gaia/eval/webapp/node_modules/object-inspect/index.js +544 -0
- gaia/eval/webapp/node_modules/object-inspect/package-support.json +20 -0
- gaia/eval/webapp/node_modules/object-inspect/package.json +105 -0
- gaia/eval/webapp/node_modules/object-inspect/readme.markdown +84 -0
- gaia/eval/webapp/node_modules/object-inspect/test/bigint.js +58 -0
- gaia/eval/webapp/node_modules/object-inspect/test/browser/dom.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/circular.js +16 -0
- gaia/eval/webapp/node_modules/object-inspect/test/deep.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/element.js +53 -0
- gaia/eval/webapp/node_modules/object-inspect/test/err.js +48 -0
- gaia/eval/webapp/node_modules/object-inspect/test/fakes.js +29 -0
- gaia/eval/webapp/node_modules/object-inspect/test/fn.js +76 -0
- gaia/eval/webapp/node_modules/object-inspect/test/global.js +17 -0
- gaia/eval/webapp/node_modules/object-inspect/test/has.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/holes.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/indent-option.js +271 -0
- gaia/eval/webapp/node_modules/object-inspect/test/inspect.js +139 -0
- gaia/eval/webapp/node_modules/object-inspect/test/lowbyte.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/number.js +58 -0
- gaia/eval/webapp/node_modules/object-inspect/test/quoteStyle.js +26 -0
- gaia/eval/webapp/node_modules/object-inspect/test/toStringTag.js +40 -0
- gaia/eval/webapp/node_modules/object-inspect/test/undef.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/values.js +261 -0
- gaia/eval/webapp/node_modules/object-inspect/test-core-js.js +26 -0
- gaia/eval/webapp/node_modules/object-inspect/util.inspect.js +1 -0
- gaia/eval/webapp/node_modules/on-finished/HISTORY.md +98 -0
- gaia/eval/webapp/node_modules/on-finished/LICENSE +23 -0
- gaia/eval/webapp/node_modules/on-finished/README.md +162 -0
- gaia/eval/webapp/node_modules/on-finished/index.js +234 -0
- gaia/eval/webapp/node_modules/on-finished/package.json +39 -0
- gaia/eval/webapp/node_modules/parseurl/HISTORY.md +58 -0
- gaia/eval/webapp/node_modules/parseurl/LICENSE +24 -0
- gaia/eval/webapp/node_modules/parseurl/README.md +133 -0
- gaia/eval/webapp/node_modules/parseurl/index.js +158 -0
- gaia/eval/webapp/node_modules/parseurl/package.json +40 -0
- gaia/eval/webapp/node_modules/path/.npmignore +1 -0
- gaia/eval/webapp/node_modules/path/LICENSE +18 -0
- gaia/eval/webapp/node_modules/path/README.md +15 -0
- gaia/eval/webapp/node_modules/path/package.json +24 -0
- gaia/eval/webapp/node_modules/path/path.js +628 -0
- gaia/eval/webapp/node_modules/path-to-regexp/LICENSE +21 -0
- gaia/eval/webapp/node_modules/path-to-regexp/Readme.md +35 -0
- gaia/eval/webapp/node_modules/path-to-regexp/index.js +156 -0
- gaia/eval/webapp/node_modules/path-to-regexp/package.json +30 -0
- gaia/eval/webapp/node_modules/process/.eslintrc +21 -0
- gaia/eval/webapp/node_modules/process/LICENSE +22 -0
- gaia/eval/webapp/node_modules/process/README.md +26 -0
- gaia/eval/webapp/node_modules/process/browser.js +184 -0
- gaia/eval/webapp/node_modules/process/index.js +2 -0
- gaia/eval/webapp/node_modules/process/package.json +27 -0
- gaia/eval/webapp/node_modules/process/test.js +199 -0
- gaia/eval/webapp/node_modules/proxy-addr/HISTORY.md +161 -0
- gaia/eval/webapp/node_modules/proxy-addr/LICENSE +22 -0
- gaia/eval/webapp/node_modules/proxy-addr/README.md +139 -0
- gaia/eval/webapp/node_modules/proxy-addr/index.js +327 -0
- gaia/eval/webapp/node_modules/proxy-addr/package.json +47 -0
- gaia/eval/webapp/node_modules/qs/.editorconfig +46 -0
- gaia/eval/webapp/node_modules/qs/.eslintrc +38 -0
- gaia/eval/webapp/node_modules/qs/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/qs/.nycrc +13 -0
- gaia/eval/webapp/node_modules/qs/CHANGELOG.md +600 -0
- gaia/eval/webapp/node_modules/qs/LICENSE.md +29 -0
- gaia/eval/webapp/node_modules/qs/README.md +709 -0
- gaia/eval/webapp/node_modules/qs/dist/qs.js +90 -0
- gaia/eval/webapp/node_modules/qs/lib/formats.js +23 -0
- gaia/eval/webapp/node_modules/qs/lib/index.js +11 -0
- gaia/eval/webapp/node_modules/qs/lib/parse.js +296 -0
- gaia/eval/webapp/node_modules/qs/lib/stringify.js +351 -0
- gaia/eval/webapp/node_modules/qs/lib/utils.js +265 -0
- gaia/eval/webapp/node_modules/qs/package.json +91 -0
- gaia/eval/webapp/node_modules/qs/test/empty-keys-cases.js +267 -0
- gaia/eval/webapp/node_modules/qs/test/parse.js +1170 -0
- gaia/eval/webapp/node_modules/qs/test/stringify.js +1298 -0
- gaia/eval/webapp/node_modules/qs/test/utils.js +136 -0
- gaia/eval/webapp/node_modules/range-parser/HISTORY.md +56 -0
- gaia/eval/webapp/node_modules/range-parser/LICENSE +23 -0
- gaia/eval/webapp/node_modules/range-parser/README.md +84 -0
- gaia/eval/webapp/node_modules/range-parser/index.js +162 -0
- gaia/eval/webapp/node_modules/range-parser/package.json +44 -0
- gaia/eval/webapp/node_modules/raw-body/HISTORY.md +308 -0
- gaia/eval/webapp/node_modules/raw-body/LICENSE +22 -0
- gaia/eval/webapp/node_modules/raw-body/README.md +223 -0
- gaia/eval/webapp/node_modules/raw-body/SECURITY.md +24 -0
- gaia/eval/webapp/node_modules/raw-body/index.d.ts +87 -0
- gaia/eval/webapp/node_modules/raw-body/index.js +336 -0
- gaia/eval/webapp/node_modules/raw-body/package.json +49 -0
- gaia/eval/webapp/node_modules/safe-buffer/LICENSE +21 -0
- gaia/eval/webapp/node_modules/safe-buffer/README.md +584 -0
- gaia/eval/webapp/node_modules/safe-buffer/index.d.ts +187 -0
- gaia/eval/webapp/node_modules/safe-buffer/index.js +65 -0
- gaia/eval/webapp/node_modules/safe-buffer/package.json +51 -0
- gaia/eval/webapp/node_modules/safer-buffer/LICENSE +21 -0
- gaia/eval/webapp/node_modules/safer-buffer/Porting-Buffer.md +268 -0
- gaia/eval/webapp/node_modules/safer-buffer/Readme.md +156 -0
- gaia/eval/webapp/node_modules/safer-buffer/dangerous.js +58 -0
- gaia/eval/webapp/node_modules/safer-buffer/package.json +34 -0
- gaia/eval/webapp/node_modules/safer-buffer/safer.js +77 -0
- gaia/eval/webapp/node_modules/safer-buffer/tests.js +406 -0
- gaia/eval/webapp/node_modules/send/HISTORY.md +526 -0
- gaia/eval/webapp/node_modules/send/LICENSE +23 -0
- gaia/eval/webapp/node_modules/send/README.md +327 -0
- gaia/eval/webapp/node_modules/send/SECURITY.md +24 -0
- gaia/eval/webapp/node_modules/send/index.js +1142 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/HISTORY.md +14 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/LICENSE +22 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/README.md +128 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/index.js +60 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/package.json +40 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/index.js +162 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/license.md +21 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/package.json +38 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/readme.md +59 -0
- gaia/eval/webapp/node_modules/send/package.json +62 -0
- gaia/eval/webapp/node_modules/serve-static/HISTORY.md +487 -0
- gaia/eval/webapp/node_modules/serve-static/LICENSE +25 -0
- gaia/eval/webapp/node_modules/serve-static/README.md +257 -0
- gaia/eval/webapp/node_modules/serve-static/index.js +209 -0
- gaia/eval/webapp/node_modules/serve-static/package.json +42 -0
- gaia/eval/webapp/node_modules/setprototypeof/LICENSE +13 -0
- gaia/eval/webapp/node_modules/setprototypeof/README.md +31 -0
- gaia/eval/webapp/node_modules/setprototypeof/index.d.ts +2 -0
- gaia/eval/webapp/node_modules/setprototypeof/index.js +17 -0
- gaia/eval/webapp/node_modules/setprototypeof/package.json +38 -0
- gaia/eval/webapp/node_modules/setprototypeof/test/index.js +24 -0
- gaia/eval/webapp/node_modules/side-channel/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel/.eslintrc +12 -0
- gaia/eval/webapp/node_modules/side-channel/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel/CHANGELOG.md +110 -0
- gaia/eval/webapp/node_modules/side-channel/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel/README.md +61 -0
- gaia/eval/webapp/node_modules/side-channel/index.d.ts +14 -0
- gaia/eval/webapp/node_modules/side-channel/index.js +43 -0
- gaia/eval/webapp/node_modules/side-channel/package.json +85 -0
- gaia/eval/webapp/node_modules/side-channel/test/index.js +104 -0
- gaia/eval/webapp/node_modules/side-channel/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-list/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-list/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/side-channel-list/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-list/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-list/CHANGELOG.md +15 -0
- gaia/eval/webapp/node_modules/side-channel-list/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-list/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-list/index.d.ts +13 -0
- gaia/eval/webapp/node_modules/side-channel-list/index.js +113 -0
- gaia/eval/webapp/node_modules/side-channel-list/list.d.ts +14 -0
- gaia/eval/webapp/node_modules/side-channel-list/package.json +77 -0
- gaia/eval/webapp/node_modules/side-channel-list/test/index.js +104 -0
- gaia/eval/webapp/node_modules/side-channel-list/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-map/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-map/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/side-channel-map/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-map/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-map/CHANGELOG.md +22 -0
- gaia/eval/webapp/node_modules/side-channel-map/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-map/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-map/index.d.ts +15 -0
- gaia/eval/webapp/node_modules/side-channel-map/index.js +68 -0
- gaia/eval/webapp/node_modules/side-channel-map/package.json +80 -0
- gaia/eval/webapp/node_modules/side-channel-map/test/index.js +114 -0
- gaia/eval/webapp/node_modules/side-channel-map/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.eslintrc +12 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/CHANGELOG.md +28 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/index.d.ts +15 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/index.js +84 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/package.json +87 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/test/index.js +114 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/statuses/HISTORY.md +82 -0
- gaia/eval/webapp/node_modules/statuses/LICENSE +23 -0
- gaia/eval/webapp/node_modules/statuses/README.md +136 -0
- gaia/eval/webapp/node_modules/statuses/codes.json +65 -0
- gaia/eval/webapp/node_modules/statuses/index.js +146 -0
- gaia/eval/webapp/node_modules/statuses/package.json +49 -0
- gaia/eval/webapp/node_modules/toidentifier/HISTORY.md +9 -0
- gaia/eval/webapp/node_modules/toidentifier/LICENSE +21 -0
- gaia/eval/webapp/node_modules/toidentifier/README.md +61 -0
- gaia/eval/webapp/node_modules/toidentifier/index.js +32 -0
- gaia/eval/webapp/node_modules/toidentifier/package.json +38 -0
- gaia/eval/webapp/node_modules/type-is/HISTORY.md +259 -0
- gaia/eval/webapp/node_modules/type-is/LICENSE +23 -0
- gaia/eval/webapp/node_modules/type-is/README.md +170 -0
- gaia/eval/webapp/node_modules/type-is/index.js +266 -0
- gaia/eval/webapp/node_modules/type-is/package.json +45 -0
- gaia/eval/webapp/node_modules/unpipe/HISTORY.md +4 -0
- gaia/eval/webapp/node_modules/unpipe/LICENSE +22 -0
- gaia/eval/webapp/node_modules/unpipe/README.md +43 -0
- gaia/eval/webapp/node_modules/unpipe/index.js +69 -0
- gaia/eval/webapp/node_modules/unpipe/package.json +27 -0
- gaia/eval/webapp/node_modules/util/LICENSE +18 -0
- gaia/eval/webapp/node_modules/util/README.md +15 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/LICENSE +16 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/README.md +42 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits.js +7 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits_browser.js +23 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/package.json +29 -0
- gaia/eval/webapp/node_modules/util/package.json +35 -0
- gaia/eval/webapp/node_modules/util/support/isBuffer.js +3 -0
- gaia/eval/webapp/node_modules/util/support/isBufferBrowser.js +6 -0
- gaia/eval/webapp/node_modules/util/util.js +586 -0
- gaia/eval/webapp/node_modules/utils-merge/.npmignore +9 -0
- gaia/eval/webapp/node_modules/utils-merge/LICENSE +20 -0
- gaia/eval/webapp/node_modules/utils-merge/README.md +34 -0
- gaia/eval/webapp/node_modules/utils-merge/index.js +23 -0
- gaia/eval/webapp/node_modules/utils-merge/package.json +40 -0
- gaia/eval/webapp/node_modules/vary/HISTORY.md +39 -0
- gaia/eval/webapp/node_modules/vary/LICENSE +22 -0
- gaia/eval/webapp/node_modules/vary/README.md +101 -0
- gaia/eval/webapp/node_modules/vary/index.js +149 -0
- gaia/eval/webapp/node_modules/vary/package.json +43 -0
- gaia/eval/webapp/package-lock.json +875 -0
- gaia/eval/webapp/package.json +21 -0
- gaia/eval/webapp/public/app.js +3403 -0
- gaia/eval/webapp/public/index.html +88 -0
- gaia/eval/webapp/public/styles.css +3661 -0
- gaia/eval/webapp/server.js +416 -0
- gaia/eval/webapp/test-setup.js +73 -0
- gaia/llm/__init__.py +2 -0
- gaia/llm/lemonade_client.py +3083 -0
- gaia/llm/lemonade_manager.py +269 -0
- gaia/llm/llm_client.py +729 -0
- gaia/llm/vlm_client.py +307 -0
- gaia/logger.py +189 -0
- gaia/mcp/agent_mcp_server.py +245 -0
- gaia/mcp/blender_mcp_client.py +138 -0
- gaia/mcp/blender_mcp_server.py +648 -0
- gaia/mcp/context7_cache.py +332 -0
- gaia/mcp/external_services.py +518 -0
- gaia/mcp/mcp_bridge.py +550 -0
- gaia/mcp/servers/__init__.py +6 -0
- gaia/mcp/servers/docker_mcp.py +83 -0
- gaia/rag/__init__.py +10 -0
- gaia/rag/app.py +293 -0
- gaia/rag/demo.py +304 -0
- gaia/rag/pdf_utils.py +235 -0
- gaia/rag/sdk.py +2194 -0
- gaia/security.py +163 -0
- gaia/talk/app.py +289 -0
- gaia/talk/sdk.py +538 -0
- gaia/util.py +46 -0
- gaia/version.py +100 -0
|
@@ -0,0 +1,3083 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
"""
|
|
5
|
+
Lemonade Server Client for GAIA.
|
|
6
|
+
|
|
7
|
+
This module provides a client for interacting with the Lemonade server's
|
|
8
|
+
OpenAI-compatible API and additional functionality.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import shutil
|
|
16
|
+
import signal
|
|
17
|
+
import socket
|
|
18
|
+
import subprocess
|
|
19
|
+
import sys
|
|
20
|
+
import threading
|
|
21
|
+
import time
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from enum import Enum
|
|
24
|
+
from threading import Event, Thread
|
|
25
|
+
from typing import Any, Callable, Dict, Generator, List, Optional, Union
|
|
26
|
+
|
|
27
|
+
import openai # For exception types
|
|
28
|
+
import psutil
|
|
29
|
+
import requests
|
|
30
|
+
from dotenv import load_dotenv
|
|
31
|
+
|
|
32
|
+
# Import OpenAI client for internal use
|
|
33
|
+
from openai import OpenAI
|
|
34
|
+
|
|
35
|
+
from gaia.logger import get_logger
|
|
36
|
+
|
|
37
|
+
# Load environment variables from .env file
|
|
38
|
+
load_dotenv()
|
|
39
|
+
|
|
40
|
+
# =========================================================================
|
|
41
|
+
# Server Configuration Defaults
|
|
42
|
+
# =========================================================================
|
|
43
|
+
# Default server host and port (can be overridden via LEMONADE_BASE_URL env var)
|
|
44
|
+
DEFAULT_HOST = "localhost"
|
|
45
|
+
DEFAULT_PORT = 8000
|
|
46
|
+
# API version supported by this client
|
|
47
|
+
LEMONADE_API_VERSION = "v1"
|
|
48
|
+
# Default URL includes /api/v1 to match documentation and other clients
|
|
49
|
+
DEFAULT_LEMONADE_URL = (
|
|
50
|
+
f"http://{DEFAULT_HOST}:{DEFAULT_PORT}/api/{LEMONADE_API_VERSION}"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _get_lemonade_config() -> tuple:
|
|
55
|
+
"""
|
|
56
|
+
Get Lemonade host, port, and base_url from environment or defaults.
|
|
57
|
+
|
|
58
|
+
Parses LEMONADE_BASE_URL env var if set, otherwise uses defaults.
|
|
59
|
+
The base_url is expected to include /api/v1 suffix per documentation.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Tuple of (host, port, base_url)
|
|
63
|
+
"""
|
|
64
|
+
from urllib.parse import urlparse
|
|
65
|
+
|
|
66
|
+
base_url = os.getenv("LEMONADE_BASE_URL", DEFAULT_LEMONADE_URL)
|
|
67
|
+
# Parse the URL to extract host and port for backwards compatibility
|
|
68
|
+
parsed = urlparse(base_url)
|
|
69
|
+
host = parsed.hostname or DEFAULT_HOST
|
|
70
|
+
port = (
|
|
71
|
+
80
|
|
72
|
+
if (parsed.port is None and host is not None)
|
|
73
|
+
else (parsed.port or DEFAULT_PORT)
|
|
74
|
+
)
|
|
75
|
+
return (host, port, base_url)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# =========================================================================
|
|
79
|
+
# Model Configuration Defaults
|
|
80
|
+
# =========================================================================
|
|
81
|
+
# Default model for text generation - lightweight CPU model for testing
|
|
82
|
+
DEFAULT_MODEL_NAME = "Qwen2.5-0.5B-Instruct-CPU"
|
|
83
|
+
# DEFAULT_MODEL_NAME = "Llama-3.2-3B-Instruct-Hybrid"
|
|
84
|
+
|
|
85
|
+
# =========================================================================
|
|
86
|
+
# Request Configuration Defaults
|
|
87
|
+
# =========================================================================
|
|
88
|
+
# Default timeout in seconds for regular API requests
|
|
89
|
+
# Increased to accommodate long-running coding and evaluation tasks
|
|
90
|
+
DEFAULT_REQUEST_TIMEOUT = 900
|
|
91
|
+
# Default timeout in seconds for model loading operations
|
|
92
|
+
# Increased for large model downloads and loading
|
|
93
|
+
DEFAULT_MODEL_LOAD_TIMEOUT = 1200
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# =========================================================================
|
|
97
|
+
# Model Types and Agent Profiles
|
|
98
|
+
# =========================================================================
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class ModelType(Enum):
|
|
102
|
+
"""Types of models supported by Lemonade"""
|
|
103
|
+
|
|
104
|
+
LLM = "llm" # Large Language Model for chat/reasoning
|
|
105
|
+
EMBEDDING = "embed" # Embedding model for RAG
|
|
106
|
+
VLM = "vlm" # Vision-Language Model for image understanding
|
|
107
|
+
ASR = "asr" # Automatic Speech Recognition
|
|
108
|
+
TTS = "tts" # Text-to-Speech
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class ModelRequirement:
|
|
113
|
+
"""Defines a model requirement for an agent"""
|
|
114
|
+
|
|
115
|
+
model_type: ModelType
|
|
116
|
+
model_id: str
|
|
117
|
+
display_name: str
|
|
118
|
+
required: bool = True
|
|
119
|
+
min_ctx_size: int = 4096 # Minimum context size needed
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class AgentProfile:
|
|
124
|
+
"""Defines the requirements for an agent"""
|
|
125
|
+
|
|
126
|
+
name: str
|
|
127
|
+
display_name: str
|
|
128
|
+
models: list = field(default_factory=list)
|
|
129
|
+
min_ctx_size: int = 4096
|
|
130
|
+
description: str = ""
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class LemonadeStatus:
|
|
135
|
+
"""Status of Lemonade Server"""
|
|
136
|
+
|
|
137
|
+
running: bool = False
|
|
138
|
+
url: str = field(
|
|
139
|
+
default_factory=lambda: os.getenv("LEMONADE_BASE_URL", DEFAULT_LEMONADE_URL)
|
|
140
|
+
)
|
|
141
|
+
context_size: int = 0
|
|
142
|
+
loaded_models: list = field(default_factory=list)
|
|
143
|
+
health_data: dict = field(default_factory=dict)
|
|
144
|
+
error: Optional[str] = None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# Define available models
|
|
148
|
+
MODELS = {
|
|
149
|
+
# LLM Models
|
|
150
|
+
"qwen3-coder-30b": ModelRequirement(
|
|
151
|
+
model_type=ModelType.LLM,
|
|
152
|
+
model_id="Qwen3-Coder-30B-A3B-Instruct-GGUF",
|
|
153
|
+
display_name="Qwen3 Coder 30B",
|
|
154
|
+
min_ctx_size=32768,
|
|
155
|
+
),
|
|
156
|
+
"qwen2.5-0.5b": ModelRequirement(
|
|
157
|
+
model_type=ModelType.LLM,
|
|
158
|
+
model_id="Qwen2.5-0.5B-Instruct-CPU",
|
|
159
|
+
display_name="Qwen2.5 0.5B (Fast)",
|
|
160
|
+
min_ctx_size=4096,
|
|
161
|
+
),
|
|
162
|
+
# Embedding Models
|
|
163
|
+
"nomic-embed": ModelRequirement(
|
|
164
|
+
model_type=ModelType.EMBEDDING,
|
|
165
|
+
model_id="nomic-embed-text-v2-moe-GGUF",
|
|
166
|
+
display_name="Nomic Embed Text v2",
|
|
167
|
+
min_ctx_size=2048,
|
|
168
|
+
),
|
|
169
|
+
# VLM Models
|
|
170
|
+
"qwen2.5-vl-7b": ModelRequirement(
|
|
171
|
+
model_type=ModelType.VLM,
|
|
172
|
+
model_id="Qwen2.5-VL-7B-Instruct-GGUF",
|
|
173
|
+
display_name="Qwen2.5 VL 7B",
|
|
174
|
+
min_ctx_size=8192,
|
|
175
|
+
),
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# Define agent profiles with their model requirements
|
|
179
|
+
AGENT_PROFILES = {
|
|
180
|
+
"chat": AgentProfile(
|
|
181
|
+
name="chat",
|
|
182
|
+
display_name="Chat Agent",
|
|
183
|
+
models=["qwen3-coder-30b", "nomic-embed", "qwen2.5-vl-7b"],
|
|
184
|
+
min_ctx_size=32768,
|
|
185
|
+
description="Interactive chat with RAG and vision support",
|
|
186
|
+
),
|
|
187
|
+
"code": AgentProfile(
|
|
188
|
+
name="code",
|
|
189
|
+
display_name="Code Agent",
|
|
190
|
+
models=["qwen3-coder-30b"],
|
|
191
|
+
min_ctx_size=32768,
|
|
192
|
+
description="Autonomous coding assistant",
|
|
193
|
+
),
|
|
194
|
+
"talk": AgentProfile(
|
|
195
|
+
name="talk",
|
|
196
|
+
display_name="Talk Agent",
|
|
197
|
+
models=["qwen3-coder-30b"],
|
|
198
|
+
min_ctx_size=32768,
|
|
199
|
+
description="Voice-enabled chat",
|
|
200
|
+
),
|
|
201
|
+
"rag": AgentProfile(
|
|
202
|
+
name="rag",
|
|
203
|
+
display_name="RAG System",
|
|
204
|
+
models=["qwen3-coder-30b", "nomic-embed", "qwen2.5-vl-7b"],
|
|
205
|
+
min_ctx_size=32768,
|
|
206
|
+
description="Document Q&A with retrieval and vision",
|
|
207
|
+
),
|
|
208
|
+
"blender": AgentProfile(
|
|
209
|
+
name="blender",
|
|
210
|
+
display_name="Blender Agent",
|
|
211
|
+
models=["qwen3-coder-30b"],
|
|
212
|
+
min_ctx_size=32768,
|
|
213
|
+
description="3D content generation in Blender",
|
|
214
|
+
),
|
|
215
|
+
"jira": AgentProfile(
|
|
216
|
+
name="jira",
|
|
217
|
+
display_name="Jira Agent",
|
|
218
|
+
models=["qwen3-coder-30b"],
|
|
219
|
+
min_ctx_size=32768,
|
|
220
|
+
description="Jira issue management",
|
|
221
|
+
),
|
|
222
|
+
"docker": AgentProfile(
|
|
223
|
+
name="docker",
|
|
224
|
+
display_name="Docker Agent",
|
|
225
|
+
models=["qwen3-coder-30b"],
|
|
226
|
+
min_ctx_size=32768,
|
|
227
|
+
description="Docker container management",
|
|
228
|
+
),
|
|
229
|
+
"vlm": AgentProfile(
|
|
230
|
+
name="vlm",
|
|
231
|
+
display_name="Vision Agent",
|
|
232
|
+
models=["qwen2.5-vl-7b"],
|
|
233
|
+
min_ctx_size=8192,
|
|
234
|
+
description="Image understanding and analysis",
|
|
235
|
+
),
|
|
236
|
+
"minimal": AgentProfile(
|
|
237
|
+
name="minimal",
|
|
238
|
+
display_name="Minimal (Fast)",
|
|
239
|
+
models=["qwen2.5-0.5b"],
|
|
240
|
+
min_ctx_size=4096,
|
|
241
|
+
description="Fast responses with smaller model",
|
|
242
|
+
),
|
|
243
|
+
"mcp": AgentProfile(
|
|
244
|
+
name="mcp",
|
|
245
|
+
display_name="MCP Bridge",
|
|
246
|
+
models=["qwen3-coder-30b", "nomic-embed", "qwen2.5-vl-7b"],
|
|
247
|
+
min_ctx_size=32768,
|
|
248
|
+
description="Model Context Protocol bridge server with vision",
|
|
249
|
+
),
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class LemonadeClientError(Exception):
|
|
254
|
+
"""Base exception for Lemonade client errors."""
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class ModelDownloadCancelledError(LemonadeClientError):
|
|
258
|
+
"""Raised when a model download is cancelled by user."""
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class InsufficientDiskSpaceError(LemonadeClientError):
|
|
262
|
+
"""Raised when there's not enough disk space for model download."""
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@dataclass
|
|
266
|
+
class DownloadTask:
|
|
267
|
+
"""Represents an ongoing model download."""
|
|
268
|
+
|
|
269
|
+
model_name: str
|
|
270
|
+
size_gb: float = 0.0
|
|
271
|
+
start_time: float = field(default_factory=time.time)
|
|
272
|
+
cancel_event: Event = field(default_factory=Event)
|
|
273
|
+
progress_percent: float = 0.0
|
|
274
|
+
|
|
275
|
+
def cancel(self):
|
|
276
|
+
"""Cancel this download."""
|
|
277
|
+
self.cancel_event.set()
|
|
278
|
+
|
|
279
|
+
def is_cancelled(self) -> bool:
|
|
280
|
+
"""Check if download was cancelled."""
|
|
281
|
+
return self.cancel_event.is_set()
|
|
282
|
+
|
|
283
|
+
def elapsed_time(self) -> float:
|
|
284
|
+
"""Get elapsed time in seconds."""
|
|
285
|
+
return time.time() - self.start_time
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _supports_unicode() -> bool:
|
|
289
|
+
"""
|
|
290
|
+
Check if the terminal supports Unicode output.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
True if UTF-8 encoding is supported, False otherwise
|
|
294
|
+
"""
|
|
295
|
+
try:
|
|
296
|
+
# Check stdout encoding
|
|
297
|
+
encoding = sys.stdout.encoding
|
|
298
|
+
if encoding and "utf" in encoding.lower():
|
|
299
|
+
return True
|
|
300
|
+
# Try encoding a test emoji
|
|
301
|
+
"✓".encode(encoding or "utf-8")
|
|
302
|
+
return True
|
|
303
|
+
except (UnicodeEncodeError, AttributeError, LookupError):
|
|
304
|
+
return False
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
# Cache unicode support check
|
|
308
|
+
_UNICODE_SUPPORTED = _supports_unicode()
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _emoji(unicode_char: str, ascii_fallback: str) -> str:
|
|
312
|
+
"""
|
|
313
|
+
Return emoji if terminal supports unicode, otherwise ASCII fallback.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
unicode_char: Unicode emoji character
|
|
317
|
+
ascii_fallback: ASCII fallback string
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Unicode emoji or ASCII fallback
|
|
321
|
+
|
|
322
|
+
Examples:
|
|
323
|
+
_emoji("✅", "[OK]") # Returns "✅" or "[OK]"
|
|
324
|
+
_emoji("❌", "[X]") # Returns "❌" or "[X]"
|
|
325
|
+
_emoji("📥", "[DL]") # Returns "📥" or "[DL]"
|
|
326
|
+
"""
|
|
327
|
+
return unicode_char if _UNICODE_SUPPORTED else ascii_fallback
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def kill_process_on_port(port):
|
|
331
|
+
"""Kill any process that is using the specified port."""
|
|
332
|
+
for proc in psutil.process_iter(["pid", "name"]):
|
|
333
|
+
try:
|
|
334
|
+
connections = proc.net_connections()
|
|
335
|
+
for conn in connections:
|
|
336
|
+
if conn.laddr.port == port:
|
|
337
|
+
proc_name = proc.name()
|
|
338
|
+
proc_pid = proc.pid
|
|
339
|
+
proc.kill()
|
|
340
|
+
print(
|
|
341
|
+
f"Killed process {proc_name} (PID: {proc_pid}) using port {port}"
|
|
342
|
+
)
|
|
343
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _prompt_user_for_download(
|
|
348
|
+
model_name: str, size_gb: float, estimated_minutes: int
|
|
349
|
+
) -> bool:
|
|
350
|
+
"""
|
|
351
|
+
Prompt user for confirmation before downloading a large model.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
model_name: Name of the model to download
|
|
355
|
+
size_gb: Size in gigabytes
|
|
356
|
+
estimated_minutes: Estimated download time in minutes
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
True if user confirms, False otherwise
|
|
360
|
+
"""
|
|
361
|
+
# Check if we're in an interactive terminal
|
|
362
|
+
if not sys.stdin.isatty() or not sys.stdout.isatty():
|
|
363
|
+
# Non-interactive environment - auto-approve
|
|
364
|
+
return True
|
|
365
|
+
|
|
366
|
+
print("\n" + "=" * 60)
|
|
367
|
+
print(f"{_emoji('📥', '[DOWNLOAD]')} Model Download Required")
|
|
368
|
+
print("=" * 60)
|
|
369
|
+
print(f"Model: {model_name}")
|
|
370
|
+
print(f"Size: {size_gb:.1f} GB")
|
|
371
|
+
print(f"Estimated time: ~{estimated_minutes} minutes (@ 100Mbps)")
|
|
372
|
+
print("=" * 60)
|
|
373
|
+
|
|
374
|
+
while True:
|
|
375
|
+
response = input("Download this model? [Y/n]: ").strip().lower()
|
|
376
|
+
if response in ("", "y", "yes"):
|
|
377
|
+
return True
|
|
378
|
+
elif response in ("n", "no"):
|
|
379
|
+
return False
|
|
380
|
+
else:
|
|
381
|
+
print("Please enter 'y' or 'n'")
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _check_disk_space(size_gb: float, path: Optional[str] = None) -> bool:
|
|
385
|
+
"""
|
|
386
|
+
Check if there's enough disk space for download.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
size_gb: Required space in GB
|
|
390
|
+
path: Path to check. If None (default), checks current working directory.
|
|
391
|
+
This is cross-platform compatible (works on Windows and Unix).
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
True if enough space available
|
|
395
|
+
|
|
396
|
+
Raises:
|
|
397
|
+
InsufficientDiskSpaceError: If not enough space
|
|
398
|
+
|
|
399
|
+
Note:
|
|
400
|
+
The default checks the current working directory's drive/partition.
|
|
401
|
+
Ideally, this should check the actual model storage location, but that
|
|
402
|
+
requires server API support to report the storage path.
|
|
403
|
+
"""
|
|
404
|
+
try:
|
|
405
|
+
# Use current working directory if no path specified (cross-platform)
|
|
406
|
+
check_path = path if path is not None else os.getcwd()
|
|
407
|
+
stat = shutil.disk_usage(check_path)
|
|
408
|
+
free_gb = stat.free / (1024**3)
|
|
409
|
+
required_gb = size_gb * 1.5 # Need 50% buffer for extraction/temp files
|
|
410
|
+
|
|
411
|
+
if free_gb < required_gb:
|
|
412
|
+
raise InsufficientDiskSpaceError(
|
|
413
|
+
f"Insufficient disk space: need {required_gb:.1f}GB, "
|
|
414
|
+
f"have {free_gb:.1f}GB free"
|
|
415
|
+
)
|
|
416
|
+
return True
|
|
417
|
+
except InsufficientDiskSpaceError:
|
|
418
|
+
raise
|
|
419
|
+
except Exception as e:
|
|
420
|
+
# If we can't check disk space, log warning but continue
|
|
421
|
+
logger = logging.getLogger(__name__)
|
|
422
|
+
logger.warning(f"Could not check disk space: {e}")
|
|
423
|
+
return True
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class LemonadeClient:
|
|
427
|
+
"""Client for interacting with the Lemonade server REST API."""
|
|
428
|
+
|
|
429
|
+
def __init__(
|
|
430
|
+
self,
|
|
431
|
+
model: Optional[str] = None,
|
|
432
|
+
host: Optional[str] = None,
|
|
433
|
+
port: Optional[int] = None,
|
|
434
|
+
verbose: bool = True,
|
|
435
|
+
keep_alive: bool = False,
|
|
436
|
+
):
|
|
437
|
+
"""
|
|
438
|
+
Initialize the Lemonade client.
|
|
439
|
+
|
|
440
|
+
Args:
|
|
441
|
+
model: Name of the model to load (optional)
|
|
442
|
+
host: Host address of the Lemonade server (defaults to LEMONADE_BASE_URL env var)
|
|
443
|
+
port: Port number of the Lemonade server (defaults to LEMONADE_BASE_URL env var)
|
|
444
|
+
verbose: If False, reduce logging verbosity during initialization
|
|
445
|
+
keep_alive: If True, don't terminate server in __del__
|
|
446
|
+
"""
|
|
447
|
+
# Use provided host/port, or get from env var, or use defaults
|
|
448
|
+
env_host, env_port, env_base_url = _get_lemonade_config()
|
|
449
|
+
self.host = host if host is not None else env_host
|
|
450
|
+
self.port = port if port is not None else env_port
|
|
451
|
+
# If host/port explicitly provided, construct URL; otherwise use env URL directly
|
|
452
|
+
if host is not None or port is not None:
|
|
453
|
+
self.base_url = f"http://{self.host}:{self.port}/api/{LEMONADE_API_VERSION}"
|
|
454
|
+
else:
|
|
455
|
+
self.base_url = env_base_url
|
|
456
|
+
self.model = model
|
|
457
|
+
self.server_process = None
|
|
458
|
+
self.log = get_logger(__name__)
|
|
459
|
+
self.keep_alive = keep_alive
|
|
460
|
+
|
|
461
|
+
# Track active downloads for cancellation support
|
|
462
|
+
self.active_downloads: Dict[str, DownloadTask] = {}
|
|
463
|
+
self._downloads_lock = threading.Lock()
|
|
464
|
+
|
|
465
|
+
# Set logging level based on verbosity
|
|
466
|
+
if not verbose:
|
|
467
|
+
self.log.setLevel(logging.WARNING)
|
|
468
|
+
|
|
469
|
+
self.log.info(f"Initialized Lemonade client for {host}:{port}")
|
|
470
|
+
if model:
|
|
471
|
+
self.log.info(f"Initial model set to: {model}")
|
|
472
|
+
|
|
473
|
+
def launch_server(self, log_level="info", background="none", ctx_size=None):
|
|
474
|
+
"""
|
|
475
|
+
Launch the Lemonade server using subprocess.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
log_level: Logging level for the server
|
|
479
|
+
('critical', 'error', 'warning', 'info', 'debug', 'trace').
|
|
480
|
+
Defaults to 'info'.
|
|
481
|
+
background: How to run the server:
|
|
482
|
+
- "terminal": Launch in a new terminal window
|
|
483
|
+
- "silent": Run in background with output to log file
|
|
484
|
+
- "none": Run in foreground (default)
|
|
485
|
+
ctx_size: Context size for the model (default: None, uses server default).
|
|
486
|
+
For chat/RAG applications, use 32768 or higher.
|
|
487
|
+
|
|
488
|
+
This method follows the approach in test_lemonade_server.py.
|
|
489
|
+
"""
|
|
490
|
+
self.log.info("Starting Lemonade server...")
|
|
491
|
+
|
|
492
|
+
# Ensure we kill anything using the port
|
|
493
|
+
kill_process_on_port(self.port)
|
|
494
|
+
|
|
495
|
+
# Build the base command
|
|
496
|
+
base_cmd = ["lemonade-server", "serve"]
|
|
497
|
+
if log_level != "info":
|
|
498
|
+
base_cmd.extend(["--log-level", log_level])
|
|
499
|
+
if ctx_size is not None:
|
|
500
|
+
base_cmd.extend(["--ctx-size", str(ctx_size)])
|
|
501
|
+
self.log.info(f"Context size set to: {ctx_size}")
|
|
502
|
+
|
|
503
|
+
if background == "terminal":
|
|
504
|
+
# Launch in a new terminal window
|
|
505
|
+
cmd = f'start cmd /k "{" ".join(base_cmd)}"'
|
|
506
|
+
self.server_process = subprocess.Popen(cmd, shell=True)
|
|
507
|
+
elif background == "silent":
|
|
508
|
+
# Run in background with subprocess
|
|
509
|
+
log_file = open("lemonade.log", "w", encoding="utf-8")
|
|
510
|
+
self.server_process = subprocess.Popen(
|
|
511
|
+
base_cmd,
|
|
512
|
+
stdout=log_file,
|
|
513
|
+
stderr=log_file,
|
|
514
|
+
text=True,
|
|
515
|
+
bufsize=1,
|
|
516
|
+
shell=True,
|
|
517
|
+
)
|
|
518
|
+
else: # "none" or any other value
|
|
519
|
+
# Run in foreground with real-time output
|
|
520
|
+
self.server_process = subprocess.Popen(
|
|
521
|
+
base_cmd,
|
|
522
|
+
stdout=subprocess.PIPE,
|
|
523
|
+
stderr=subprocess.PIPE,
|
|
524
|
+
text=True,
|
|
525
|
+
bufsize=1,
|
|
526
|
+
shell=True,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
# Print stdout and stderr in real-time only for foreground mode
|
|
530
|
+
def print_output():
|
|
531
|
+
while True:
|
|
532
|
+
if self.server_process is None:
|
|
533
|
+
break
|
|
534
|
+
try:
|
|
535
|
+
stdout = self.server_process.stdout.readline()
|
|
536
|
+
stderr = self.server_process.stderr.readline()
|
|
537
|
+
if stdout:
|
|
538
|
+
self.log.debug(f"[Server stdout] {stdout.strip()}")
|
|
539
|
+
if stderr:
|
|
540
|
+
self.log.warning(f"[Server stderr] {stderr.strip()}")
|
|
541
|
+
if (
|
|
542
|
+
not stdout
|
|
543
|
+
and not stderr
|
|
544
|
+
and self.server_process is not None
|
|
545
|
+
and self.server_process.poll() is not None
|
|
546
|
+
):
|
|
547
|
+
break
|
|
548
|
+
except AttributeError:
|
|
549
|
+
# This happens if server_process becomes None
|
|
550
|
+
# while we're executing this function
|
|
551
|
+
break
|
|
552
|
+
|
|
553
|
+
output_thread = Thread(target=print_output, daemon=True)
|
|
554
|
+
output_thread.start()
|
|
555
|
+
|
|
556
|
+
# Wait for the server to start by checking port
|
|
557
|
+
start_time = time.time()
|
|
558
|
+
while True:
|
|
559
|
+
if time.time() - start_time > 60:
|
|
560
|
+
self.log.error("Server failed to start within 60 seconds")
|
|
561
|
+
raise TimeoutError("Server failed to start within 60 seconds")
|
|
562
|
+
try:
|
|
563
|
+
conn = socket.create_connection((self.host, self.port))
|
|
564
|
+
conn.close()
|
|
565
|
+
break
|
|
566
|
+
except socket.error:
|
|
567
|
+
time.sleep(1)
|
|
568
|
+
|
|
569
|
+
# Wait a few other seconds after the port is available
|
|
570
|
+
time.sleep(5)
|
|
571
|
+
self.log.info("Lemonade server started successfully")
|
|
572
|
+
|
|
573
|
+
def terminate_server(self):
|
|
574
|
+
"""Terminate the Lemonade server process if it exists."""
|
|
575
|
+
if not self.server_process:
|
|
576
|
+
return
|
|
577
|
+
|
|
578
|
+
try:
|
|
579
|
+
self.log.info("Terminating Lemonade server...")
|
|
580
|
+
|
|
581
|
+
# Handle different process types
|
|
582
|
+
if hasattr(self.server_process, "join"):
|
|
583
|
+
# Handle multiprocessing.Process objects
|
|
584
|
+
self.server_process.terminate()
|
|
585
|
+
self.server_process.join(timeout=5)
|
|
586
|
+
else:
|
|
587
|
+
# For subprocess.Popen
|
|
588
|
+
if sys.platform.startswith("win") and self.server_process.pid:
|
|
589
|
+
# On Windows, use taskkill to ensure process tree is terminated
|
|
590
|
+
os.system(f"taskkill /F /PID {self.server_process.pid} /T")
|
|
591
|
+
elif self.server_process.pid:
|
|
592
|
+
# On Linux/Unix, kill the process group to terminate child processes
|
|
593
|
+
try:
|
|
594
|
+
os.killpg(os.getpgid(self.server_process.pid), signal.SIGTERM)
|
|
595
|
+
# Wait a bit for graceful termination
|
|
596
|
+
try:
|
|
597
|
+
self.server_process.wait(timeout=2)
|
|
598
|
+
except subprocess.TimeoutExpired:
|
|
599
|
+
# Force kill if graceful termination failed
|
|
600
|
+
os.killpg(
|
|
601
|
+
os.getpgid(self.server_process.pid), signal.SIGKILL
|
|
602
|
+
)
|
|
603
|
+
except (OSError, ProcessLookupError):
|
|
604
|
+
# Process or process group doesn't exist, try individual kill
|
|
605
|
+
try:
|
|
606
|
+
self.server_process.kill()
|
|
607
|
+
except ProcessLookupError:
|
|
608
|
+
pass # Process already terminated
|
|
609
|
+
else:
|
|
610
|
+
# Fallback: try to kill normally
|
|
611
|
+
self.server_process.kill()
|
|
612
|
+
# Wait for process to terminate
|
|
613
|
+
try:
|
|
614
|
+
self.server_process.wait(timeout=5)
|
|
615
|
+
except subprocess.TimeoutExpired:
|
|
616
|
+
self.log.warning("Process did not terminate within timeout")
|
|
617
|
+
|
|
618
|
+
# Ensure port is free
|
|
619
|
+
kill_process_on_port(self.port)
|
|
620
|
+
|
|
621
|
+
# Reset reference
|
|
622
|
+
self.server_process = None
|
|
623
|
+
self.log.info("Lemonade server terminated successfully")
|
|
624
|
+
except Exception as e:
|
|
625
|
+
self.log.error(f"Error terminating server process: {e}")
|
|
626
|
+
# Reset reference even on error
|
|
627
|
+
self.server_process = None
|
|
628
|
+
|
|
629
|
+
def __del__(self):
|
|
630
|
+
"""Cleanup server process on deletion."""
|
|
631
|
+
# Check if keep_alive attribute exists (might not if __init__ failed early)
|
|
632
|
+
if hasattr(self, "keep_alive") and not self.keep_alive:
|
|
633
|
+
self.terminate_server()
|
|
634
|
+
elif hasattr(self, "server_process") and self.server_process:
|
|
635
|
+
if hasattr(self, "log"):
|
|
636
|
+
self.log.info("Not terminating server because keep_alive=True")
|
|
637
|
+
|
|
638
|
+
def get_model_info(self, model_name: str) -> Dict[str, Any]:
|
|
639
|
+
"""
|
|
640
|
+
Get information about a model from the server.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
model_name: Name of the model
|
|
644
|
+
|
|
645
|
+
Returns:
|
|
646
|
+
Dict with model info including size_gb estimate
|
|
647
|
+
"""
|
|
648
|
+
try:
|
|
649
|
+
models_response = self.list_models()
|
|
650
|
+
for model in models_response.get("data", []):
|
|
651
|
+
if model.get("id", "").lower() == model_name.lower():
|
|
652
|
+
# Estimate size based on model name if not provided
|
|
653
|
+
size_gb = model.get(
|
|
654
|
+
"size_gb", self._estimate_model_size(model_name)
|
|
655
|
+
)
|
|
656
|
+
return {
|
|
657
|
+
"id": model.get("id"),
|
|
658
|
+
"size_gb": size_gb,
|
|
659
|
+
"downloaded": model.get("downloaded", False),
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
# Model not found in list, provide estimate
|
|
663
|
+
return {
|
|
664
|
+
"id": model_name,
|
|
665
|
+
"size_gb": self._estimate_model_size(model_name),
|
|
666
|
+
"downloaded": False,
|
|
667
|
+
}
|
|
668
|
+
except Exception:
|
|
669
|
+
# If we can't get info, provide conservative estimate
|
|
670
|
+
return {
|
|
671
|
+
"id": model_name,
|
|
672
|
+
"size_gb": self._estimate_model_size(model_name),
|
|
673
|
+
"downloaded": False,
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
def _estimate_model_size(self, model_name: str) -> float:
|
|
677
|
+
"""
|
|
678
|
+
Estimate model size in GB based on model name.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
model_name: Name of the model
|
|
682
|
+
|
|
683
|
+
Returns:
|
|
684
|
+
Estimated size in GB
|
|
685
|
+
"""
|
|
686
|
+
model_lower = model_name.lower()
|
|
687
|
+
|
|
688
|
+
# Look for billion parameter indicators
|
|
689
|
+
if "70b" in model_lower or "72b" in model_lower:
|
|
690
|
+
return 40.0 # ~40GB for 70B models
|
|
691
|
+
elif "30b" in model_lower or "34b" in model_lower:
|
|
692
|
+
return 20.0 # ~20GB for 30B models
|
|
693
|
+
elif "13b" in model_lower or "14b" in model_lower:
|
|
694
|
+
return 8.0 # ~8GB for 13B models
|
|
695
|
+
elif "7b" in model_lower:
|
|
696
|
+
return 5.0 # ~5GB for 7B models
|
|
697
|
+
elif "3b" in model_lower:
|
|
698
|
+
return 2.0 # ~2GB for 3B models
|
|
699
|
+
elif "1b" in model_lower or "0.5b" in model_lower:
|
|
700
|
+
return 1.0 # ~1GB for small models
|
|
701
|
+
elif "embed" in model_lower:
|
|
702
|
+
return 0.5 # Embedding models are usually small
|
|
703
|
+
else:
|
|
704
|
+
return 10.0 # Conservative default
|
|
705
|
+
|
|
706
|
+
def _estimate_download_time(self, size_gb: float, mbps: int = 100) -> int:
|
|
707
|
+
"""
|
|
708
|
+
Estimate download time in minutes.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
size_gb: Size in gigabytes
|
|
712
|
+
mbps: Connection speed in megabits per second
|
|
713
|
+
|
|
714
|
+
Returns:
|
|
715
|
+
Estimated time in minutes
|
|
716
|
+
"""
|
|
717
|
+
# Convert GB to megabits: 1 GB = 8000 megabits
|
|
718
|
+
megabits = size_gb * 8000
|
|
719
|
+
# Time in seconds
|
|
720
|
+
seconds = megabits / mbps
|
|
721
|
+
# Convert to minutes and round up
|
|
722
|
+
return int(seconds / 60) + 1
|
|
723
|
+
|
|
724
|
+
def cancel_download(self, model_name: str) -> bool:
|
|
725
|
+
"""
|
|
726
|
+
Stop waiting for an ongoing model download.
|
|
727
|
+
|
|
728
|
+
**IMPORTANT:** This only stops the client from waiting for the download.
|
|
729
|
+
The server will continue downloading the model in the background.
|
|
730
|
+
This limitation exists because the server's `/api/v1/pull` endpoint does not
|
|
731
|
+
support cancellation.
|
|
732
|
+
|
|
733
|
+
To truly cancel a download, you would need to:
|
|
734
|
+
1. Stop the Lemonade server process, or
|
|
735
|
+
2. Wait for server API to support download cancellation
|
|
736
|
+
|
|
737
|
+
Args:
|
|
738
|
+
model_name: Name of the model being downloaded
|
|
739
|
+
|
|
740
|
+
Returns:
|
|
741
|
+
True if waiting was stopped, False if download not found
|
|
742
|
+
|
|
743
|
+
Example:
|
|
744
|
+
# User initiates download
|
|
745
|
+
client.load_model("large-model", auto_download=True)
|
|
746
|
+
|
|
747
|
+
# In another thread, user wants to "cancel"
|
|
748
|
+
client.cancel_download("large-model")
|
|
749
|
+
# Client stops waiting, but server keeps downloading
|
|
750
|
+
|
|
751
|
+
See Also:
|
|
752
|
+
- get_active_downloads(): List downloads client is waiting for
|
|
753
|
+
- Future: Server will support DELETE /api/v1/downloads/{id}
|
|
754
|
+
"""
|
|
755
|
+
with self._downloads_lock:
|
|
756
|
+
if model_name in self.active_downloads:
|
|
757
|
+
task = self.active_downloads[model_name]
|
|
758
|
+
task.cancel()
|
|
759
|
+
self.log.warning(
|
|
760
|
+
f"Stopped waiting for {model_name} download. "
|
|
761
|
+
f"Note: Server continues downloading in background."
|
|
762
|
+
)
|
|
763
|
+
return True
|
|
764
|
+
return False
|
|
765
|
+
|
|
766
|
+
def get_active_downloads(self) -> List[DownloadTask]:
|
|
767
|
+
"""Get list of active download tasks."""
|
|
768
|
+
with self._downloads_lock:
|
|
769
|
+
return list(self.active_downloads.values())
|
|
770
|
+
|
|
771
|
+
def _extract_error_info(self, error: Union[str, Dict, Exception]) -> Dict[str, Any]:
|
|
772
|
+
"""
|
|
773
|
+
Extract structured error information from various error formats.
|
|
774
|
+
|
|
775
|
+
Lemonade server returns errors in two formats:
|
|
776
|
+
1. Structured: {"error": {"message": "...", "type": "not_found"}}
|
|
777
|
+
2. Operation: {"status": "error", "message": "..."}
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
error: Error as string, dict, or exception
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
Dict with normalized error info:
|
|
784
|
+
- message: Error message text
|
|
785
|
+
- type: Error type if available (e.g., "not_found")
|
|
786
|
+
- code: Error code if available
|
|
787
|
+
- is_structured: Whether error had type/code field
|
|
788
|
+
|
|
789
|
+
Examples:
|
|
790
|
+
# From exception
|
|
791
|
+
info = self._extract_error_info(LemonadeClientError("Model not found"))
|
|
792
|
+
# Returns: {"message": "Model not found", "type": None, ...}
|
|
793
|
+
|
|
794
|
+
# From structured response
|
|
795
|
+
response = {"error": {"message": "Not found", "type": "not_found"}}
|
|
796
|
+
info = self._extract_error_info(response)
|
|
797
|
+
# Returns: {"message": "Not found", "type": "not_found", ...}
|
|
798
|
+
"""
|
|
799
|
+
result = {
|
|
800
|
+
"message": "",
|
|
801
|
+
"type": None,
|
|
802
|
+
"code": None,
|
|
803
|
+
"is_structured": False,
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
# Handle exception objects
|
|
807
|
+
if isinstance(error, Exception):
|
|
808
|
+
error = str(error)
|
|
809
|
+
|
|
810
|
+
# Handle string errors
|
|
811
|
+
if isinstance(error, str):
|
|
812
|
+
result["message"] = error
|
|
813
|
+
return result
|
|
814
|
+
|
|
815
|
+
# Handle dict responses
|
|
816
|
+
if isinstance(error, dict):
|
|
817
|
+
# Format 1: {"error": {"message": "...", "type": "..."}}
|
|
818
|
+
if "error" in error and isinstance(error["error"], dict):
|
|
819
|
+
error_obj = error["error"]
|
|
820
|
+
result["message"] = error_obj.get("message", "")
|
|
821
|
+
result["type"] = error_obj.get("type")
|
|
822
|
+
result["code"] = error_obj.get("code")
|
|
823
|
+
result["is_structured"] = (
|
|
824
|
+
result["type"] is not None or result["code"] is not None
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
# Format 2: {"status": "error", "message": "..."}
|
|
828
|
+
elif error.get("status") == "error":
|
|
829
|
+
result["message"] = error.get("message", "")
|
|
830
|
+
|
|
831
|
+
# Fallback: use the dict as string
|
|
832
|
+
else:
|
|
833
|
+
result["message"] = str(error)
|
|
834
|
+
|
|
835
|
+
return result
|
|
836
|
+
|
|
837
|
+
def _is_model_error(self, error: Union[str, Dict, Exception]) -> bool:
|
|
838
|
+
"""
|
|
839
|
+
Check if an error is related to model not being loaded.
|
|
840
|
+
|
|
841
|
+
Uses structured error types when available (e.g., type="not_found"),
|
|
842
|
+
falls back to string matching for unstructured errors.
|
|
843
|
+
|
|
844
|
+
Args:
|
|
845
|
+
error: Error as string, dict, or exception
|
|
846
|
+
|
|
847
|
+
Returns:
|
|
848
|
+
True if this is a model loading error
|
|
849
|
+
|
|
850
|
+
Examples:
|
|
851
|
+
# Structured error (preferred)
|
|
852
|
+
error = {"error": {"message": "...", "type": "not_found"}}
|
|
853
|
+
is_model_error = self._is_model_error(error) # Returns True
|
|
854
|
+
|
|
855
|
+
# String error (fallback)
|
|
856
|
+
is_model_error = self._is_model_error("model not loaded") # Returns True
|
|
857
|
+
"""
|
|
858
|
+
# Extract structured error info
|
|
859
|
+
error_info = self._extract_error_info(error)
|
|
860
|
+
|
|
861
|
+
# Check structured error type first (more reliable)
|
|
862
|
+
error_type = error_info.get("type")
|
|
863
|
+
if error_type:
|
|
864
|
+
error_type_lower = error_type.lower()
|
|
865
|
+
if error_type_lower in ["not_found", "model_not_found", "model_not_loaded"]:
|
|
866
|
+
return True
|
|
867
|
+
|
|
868
|
+
# Fallback to string matching for unstructured errors
|
|
869
|
+
error_message = error_info.get("message") or ""
|
|
870
|
+
error_message = error_message.lower()
|
|
871
|
+
return any(
|
|
872
|
+
phrase in error_message
|
|
873
|
+
for phrase in [
|
|
874
|
+
"model not loaded",
|
|
875
|
+
"no model loaded",
|
|
876
|
+
"model not found",
|
|
877
|
+
"model is not loaded",
|
|
878
|
+
"model does not exist",
|
|
879
|
+
"model not available",
|
|
880
|
+
]
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
def _execute_with_auto_download(
|
|
884
|
+
self, api_call: Callable, model: str, auto_download: bool = True
|
|
885
|
+
):
|
|
886
|
+
"""
|
|
887
|
+
Execute an API call with auto-download retry logic.
|
|
888
|
+
|
|
889
|
+
Args:
|
|
890
|
+
api_call: Function to call (should raise exception if model not loaded)
|
|
891
|
+
model: Model name
|
|
892
|
+
auto_download: Whether to auto-download on model error
|
|
893
|
+
|
|
894
|
+
Returns:
|
|
895
|
+
Result of api_call()
|
|
896
|
+
|
|
897
|
+
Raises:
|
|
898
|
+
ModelDownloadCancelledError: If user cancels download
|
|
899
|
+
InsufficientDiskSpaceError: If not enough disk space
|
|
900
|
+
LemonadeClientError: If download/load fails
|
|
901
|
+
"""
|
|
902
|
+
try:
|
|
903
|
+
return api_call()
|
|
904
|
+
except Exception as e:
|
|
905
|
+
# Check if this is a model loading error and auto_download is enabled
|
|
906
|
+
if auto_download and self._is_model_error(e):
|
|
907
|
+
self.log.info(
|
|
908
|
+
f"{_emoji('📥', '[AUTO-DOWNLOAD]')} Model '{model}' not loaded, "
|
|
909
|
+
f"attempting auto-download and load..."
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
# Load model with auto-download (includes prompt, validation, etc.)
|
|
913
|
+
self.load_model(model, timeout=60, auto_download=True)
|
|
914
|
+
|
|
915
|
+
# Retry the API call
|
|
916
|
+
self.log.info(
|
|
917
|
+
f"{_emoji('🔄', '[RETRY]')} Retrying API call with model: {model}"
|
|
918
|
+
)
|
|
919
|
+
return api_call()
|
|
920
|
+
|
|
921
|
+
# Re-raise original error
|
|
922
|
+
raise
|
|
923
|
+
|
|
924
|
+
def chat_completions(
|
|
925
|
+
self,
|
|
926
|
+
model: str,
|
|
927
|
+
messages: List[Dict[str, str]],
|
|
928
|
+
temperature: float = 0.7,
|
|
929
|
+
max_completion_tokens: Optional[int] = None,
|
|
930
|
+
max_tokens: Optional[int] = None,
|
|
931
|
+
stop: Optional[Union[str, List[str]]] = None,
|
|
932
|
+
stream: bool = False,
|
|
933
|
+
timeout: int = DEFAULT_REQUEST_TIMEOUT,
|
|
934
|
+
logprobs: Optional[bool] = None,
|
|
935
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
936
|
+
auto_download: bool = True,
|
|
937
|
+
**kwargs,
|
|
938
|
+
) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
|
|
939
|
+
"""
|
|
940
|
+
Call the chat completions endpoint.
|
|
941
|
+
|
|
942
|
+
If the model is not loaded, it will be automatically downloaded and loaded.
|
|
943
|
+
|
|
944
|
+
Args:
|
|
945
|
+
model: The model to use for completion
|
|
946
|
+
messages: List of conversation messages with 'role' and 'content'
|
|
947
|
+
temperature: Controls randomness (higher = more random)
|
|
948
|
+
max_completion_tokens: Maximum number of output tokens to generate (preferred)
|
|
949
|
+
max_tokens: Maximum number of output tokens to generate
|
|
950
|
+
(deprecated, use max_completion_tokens)
|
|
951
|
+
stop: Sequences where generation should stop
|
|
952
|
+
stream: Whether to stream the response
|
|
953
|
+
timeout: Request timeout in seconds
|
|
954
|
+
logprobs: Whether to include log probabilities
|
|
955
|
+
tools: List of tools the model may call
|
|
956
|
+
auto_download: Automatically download model if not available (default: True)
|
|
957
|
+
**kwargs: Additional parameters to pass to the API
|
|
958
|
+
|
|
959
|
+
Returns:
|
|
960
|
+
For non-streaming: Dict with completion data
|
|
961
|
+
For streaming: Generator yielding completion chunks
|
|
962
|
+
|
|
963
|
+
Example response (non-streaming):
|
|
964
|
+
{
|
|
965
|
+
"id": "0",
|
|
966
|
+
"object": "chat.completion",
|
|
967
|
+
"created": 1742927481,
|
|
968
|
+
"model": "model-name",
|
|
969
|
+
"choices": [{
|
|
970
|
+
"index": 0,
|
|
971
|
+
"message": {
|
|
972
|
+
"role": "assistant",
|
|
973
|
+
"content": "Response text here"
|
|
974
|
+
},
|
|
975
|
+
"finish_reason": "stop"
|
|
976
|
+
}]
|
|
977
|
+
}
|
|
978
|
+
"""
|
|
979
|
+
# Handle max_tokens vs max_completion_tokens
|
|
980
|
+
if max_completion_tokens is None and max_tokens is None:
|
|
981
|
+
max_completion_tokens = 1000 # Default value
|
|
982
|
+
elif max_completion_tokens is not None and max_tokens is not None:
|
|
983
|
+
self.log.warning(
|
|
984
|
+
"Both max_completion_tokens and max_tokens provided. Using max_completion_tokens."
|
|
985
|
+
)
|
|
986
|
+
elif max_tokens is not None:
|
|
987
|
+
max_completion_tokens = max_tokens
|
|
988
|
+
|
|
989
|
+
# Use the OpenAI client for streaming if requested
|
|
990
|
+
if stream:
|
|
991
|
+
return self._stream_chat_completions_with_openai(
|
|
992
|
+
model=model,
|
|
993
|
+
messages=messages,
|
|
994
|
+
temperature=temperature,
|
|
995
|
+
max_completion_tokens=max_completion_tokens,
|
|
996
|
+
stop=stop,
|
|
997
|
+
timeout=timeout,
|
|
998
|
+
logprobs=logprobs,
|
|
999
|
+
tools=tools,
|
|
1000
|
+
auto_download=auto_download,
|
|
1001
|
+
**kwargs,
|
|
1002
|
+
)
|
|
1003
|
+
|
|
1004
|
+
# Note: self.base_url already includes /api/v1
|
|
1005
|
+
url = f"{self.base_url}/chat/completions"
|
|
1006
|
+
data = {
|
|
1007
|
+
"model": model,
|
|
1008
|
+
"messages": messages,
|
|
1009
|
+
"temperature": temperature,
|
|
1010
|
+
"max_completion_tokens": max_completion_tokens,
|
|
1011
|
+
"stream": stream,
|
|
1012
|
+
**kwargs,
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
if stop:
|
|
1016
|
+
data["stop"] = stop
|
|
1017
|
+
|
|
1018
|
+
if logprobs:
|
|
1019
|
+
data["logprobs"] = logprobs
|
|
1020
|
+
|
|
1021
|
+
if tools:
|
|
1022
|
+
data["tools"] = tools
|
|
1023
|
+
|
|
1024
|
+
# Helper function for the actual API call
|
|
1025
|
+
def _make_request():
|
|
1026
|
+
self.log.debug(f"Sending chat completion request to model: {model}")
|
|
1027
|
+
response = requests.post(
|
|
1028
|
+
url,
|
|
1029
|
+
json=data,
|
|
1030
|
+
headers={"Content-Type": "application/json"},
|
|
1031
|
+
timeout=timeout,
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
if response.status_code != 200:
|
|
1035
|
+
error_msg = (
|
|
1036
|
+
f"Error in chat completions "
|
|
1037
|
+
f"(status {response.status_code}): {response.text}"
|
|
1038
|
+
)
|
|
1039
|
+
self.log.error(error_msg)
|
|
1040
|
+
raise LemonadeClientError(error_msg)
|
|
1041
|
+
|
|
1042
|
+
result = response.json()
|
|
1043
|
+
if "choices" in result and len(result["choices"]) > 0:
|
|
1044
|
+
token_count = len(
|
|
1045
|
+
result["choices"][0].get("message", {}).get("content", "")
|
|
1046
|
+
)
|
|
1047
|
+
self.log.debug(
|
|
1048
|
+
f"Chat completion successful. "
|
|
1049
|
+
f"Approximate response length: {token_count} characters"
|
|
1050
|
+
)
|
|
1051
|
+
|
|
1052
|
+
return result
|
|
1053
|
+
|
|
1054
|
+
# Execute with auto-download retry logic
|
|
1055
|
+
try:
|
|
1056
|
+
return _make_request()
|
|
1057
|
+
except (requests.exceptions.RequestException, LemonadeClientError):
|
|
1058
|
+
# Use helper to handle auto-download and retry
|
|
1059
|
+
return self._execute_with_auto_download(_make_request, model, auto_download)
|
|
1060
|
+
|
|
1061
|
+
def _stream_chat_completions_with_openai(
|
|
1062
|
+
self,
|
|
1063
|
+
model: str,
|
|
1064
|
+
messages: List[Dict[str, str]],
|
|
1065
|
+
temperature: float = 0.7,
|
|
1066
|
+
max_completion_tokens: int = 1000,
|
|
1067
|
+
stop: Optional[Union[str, List[str]]] = None,
|
|
1068
|
+
timeout: int = DEFAULT_REQUEST_TIMEOUT,
|
|
1069
|
+
logprobs: Optional[bool] = None,
|
|
1070
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
1071
|
+
auto_download: bool = True,
|
|
1072
|
+
**kwargs,
|
|
1073
|
+
) -> Generator[Dict[str, Any], None, None]:
|
|
1074
|
+
"""
|
|
1075
|
+
Stream chat completions using the OpenAI client.
|
|
1076
|
+
|
|
1077
|
+
Returns chunks in the format:
|
|
1078
|
+
{
|
|
1079
|
+
"id": "...",
|
|
1080
|
+
"object": "chat.completion.chunk",
|
|
1081
|
+
"created": 1742927481,
|
|
1082
|
+
"model": "...",
|
|
1083
|
+
"choices": [{
|
|
1084
|
+
"index": 0,
|
|
1085
|
+
"delta": {
|
|
1086
|
+
"role": "assistant",
|
|
1087
|
+
"content": "..."
|
|
1088
|
+
},
|
|
1089
|
+
"finish_reason": null
|
|
1090
|
+
}]
|
|
1091
|
+
}
|
|
1092
|
+
"""
|
|
1093
|
+
# Create a client just for this request
|
|
1094
|
+
client = OpenAI(
|
|
1095
|
+
base_url=self.base_url,
|
|
1096
|
+
api_key="lemonade", # required, but unused
|
|
1097
|
+
timeout=timeout,
|
|
1098
|
+
)
|
|
1099
|
+
|
|
1100
|
+
# Create request parameters
|
|
1101
|
+
request_params = {
|
|
1102
|
+
"model": model,
|
|
1103
|
+
"messages": messages,
|
|
1104
|
+
"temperature": temperature,
|
|
1105
|
+
"max_completion_tokens": max_completion_tokens,
|
|
1106
|
+
"stream": True,
|
|
1107
|
+
**kwargs,
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
if stop:
|
|
1111
|
+
request_params["stop"] = stop
|
|
1112
|
+
|
|
1113
|
+
if logprobs:
|
|
1114
|
+
request_params["logprobs"] = logprobs
|
|
1115
|
+
|
|
1116
|
+
if tools:
|
|
1117
|
+
request_params["tools"] = tools
|
|
1118
|
+
|
|
1119
|
+
try:
|
|
1120
|
+
# Use the client to stream responses
|
|
1121
|
+
self.log.debug(f"Starting streaming chat completion with model: {model}")
|
|
1122
|
+
stream = client.chat.completions.create(**request_params)
|
|
1123
|
+
|
|
1124
|
+
# Convert OpenAI client responses to our format
|
|
1125
|
+
tokens_generated = 0
|
|
1126
|
+
for chunk in stream:
|
|
1127
|
+
tokens_generated += 1
|
|
1128
|
+
# Convert to dict format expected by our API
|
|
1129
|
+
yield {
|
|
1130
|
+
"id": chunk.id,
|
|
1131
|
+
"object": "chat.completion.chunk",
|
|
1132
|
+
"created": chunk.created,
|
|
1133
|
+
"model": chunk.model,
|
|
1134
|
+
"choices": [
|
|
1135
|
+
{
|
|
1136
|
+
"index": choice.index,
|
|
1137
|
+
"delta": {
|
|
1138
|
+
"role": (
|
|
1139
|
+
choice.delta.role
|
|
1140
|
+
if hasattr(choice.delta, "role")
|
|
1141
|
+
and choice.delta.role
|
|
1142
|
+
else None
|
|
1143
|
+
),
|
|
1144
|
+
"content": (
|
|
1145
|
+
choice.delta.content
|
|
1146
|
+
if hasattr(choice.delta, "content")
|
|
1147
|
+
and choice.delta.content
|
|
1148
|
+
else None
|
|
1149
|
+
),
|
|
1150
|
+
},
|
|
1151
|
+
"finish_reason": choice.finish_reason,
|
|
1152
|
+
}
|
|
1153
|
+
for choice in chunk.choices
|
|
1154
|
+
],
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
self.log.debug(
|
|
1158
|
+
f"Completed streaming chat completion. Generated {tokens_generated} tokens."
|
|
1159
|
+
)
|
|
1160
|
+
|
|
1161
|
+
except (openai.APIError, openai.APIConnectionError, openai.RateLimitError) as e:
|
|
1162
|
+
error_type = e.__class__.__name__
|
|
1163
|
+
error_msg = str(e)
|
|
1164
|
+
|
|
1165
|
+
# Check if this is a model loading error and auto_download is enabled
|
|
1166
|
+
if auto_download and self._is_model_error(e):
|
|
1167
|
+
self.log.info(
|
|
1168
|
+
f"{_emoji('📥', '[AUTO-DOWNLOAD]')} Model '{model}' not loaded, "
|
|
1169
|
+
f"attempting auto-download and load..."
|
|
1170
|
+
)
|
|
1171
|
+
try:
|
|
1172
|
+
# Load model with auto-download (may take hours for large models)
|
|
1173
|
+
self.load_model(model, timeout=60, auto_download=True)
|
|
1174
|
+
|
|
1175
|
+
# Retry streaming
|
|
1176
|
+
self.log.info(
|
|
1177
|
+
f"{_emoji('🔄', '[RETRY]')} Retrying streaming chat completion "
|
|
1178
|
+
f"with model: {model}"
|
|
1179
|
+
)
|
|
1180
|
+
stream = client.chat.completions.create(**request_params)
|
|
1181
|
+
|
|
1182
|
+
tokens_generated = 0
|
|
1183
|
+
for chunk in stream:
|
|
1184
|
+
tokens_generated += 1
|
|
1185
|
+
yield {
|
|
1186
|
+
"id": chunk.id,
|
|
1187
|
+
"object": "chat.completion.chunk",
|
|
1188
|
+
"created": chunk.created,
|
|
1189
|
+
"model": chunk.model,
|
|
1190
|
+
"choices": [
|
|
1191
|
+
{
|
|
1192
|
+
"index": choice.index,
|
|
1193
|
+
"delta": {
|
|
1194
|
+
"role": (
|
|
1195
|
+
choice.delta.role
|
|
1196
|
+
if hasattr(choice.delta, "role")
|
|
1197
|
+
and choice.delta.role
|
|
1198
|
+
else None
|
|
1199
|
+
),
|
|
1200
|
+
"content": (
|
|
1201
|
+
choice.delta.content
|
|
1202
|
+
if hasattr(choice.delta, "content")
|
|
1203
|
+
and choice.delta.content
|
|
1204
|
+
else None
|
|
1205
|
+
),
|
|
1206
|
+
},
|
|
1207
|
+
"finish_reason": choice.finish_reason,
|
|
1208
|
+
}
|
|
1209
|
+
for choice in chunk.choices
|
|
1210
|
+
],
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
self.log.debug(
|
|
1214
|
+
f"Completed streaming chat completion. Generated {tokens_generated} tokens."
|
|
1215
|
+
)
|
|
1216
|
+
return
|
|
1217
|
+
|
|
1218
|
+
except Exception as load_error:
|
|
1219
|
+
self.log.error(f"Auto-download/load failed: {load_error}")
|
|
1220
|
+
raise LemonadeClientError(
|
|
1221
|
+
f"Model '{model}' not loaded and auto-load failed: {load_error}"
|
|
1222
|
+
)
|
|
1223
|
+
|
|
1224
|
+
# Re-raise original error
|
|
1225
|
+
self.log.error(f"OpenAI {error_type}: {error_msg}")
|
|
1226
|
+
raise LemonadeClientError(f"OpenAI {error_type}: {error_msg}")
|
|
1227
|
+
except Exception as e:
|
|
1228
|
+
self.log.error(f"Error using OpenAI client for streaming: {str(e)}")
|
|
1229
|
+
raise LemonadeClientError(f"Streaming request failed: {str(e)}")
|
|
1230
|
+
|
|
1231
|
+
def completions(
|
|
1232
|
+
self,
|
|
1233
|
+
model: str,
|
|
1234
|
+
prompt: str,
|
|
1235
|
+
temperature: float = 0.7,
|
|
1236
|
+
max_tokens: int = 1000,
|
|
1237
|
+
stop: Optional[Union[str, List[str]]] = None,
|
|
1238
|
+
stream: bool = False,
|
|
1239
|
+
echo: bool = False,
|
|
1240
|
+
timeout: int = DEFAULT_REQUEST_TIMEOUT,
|
|
1241
|
+
logprobs: Optional[bool] = None,
|
|
1242
|
+
auto_download: bool = True,
|
|
1243
|
+
**kwargs,
|
|
1244
|
+
) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
|
|
1245
|
+
"""
|
|
1246
|
+
Call the completions endpoint.
|
|
1247
|
+
|
|
1248
|
+
If the model is not loaded, it will be automatically downloaded and loaded.
|
|
1249
|
+
|
|
1250
|
+
Args:
|
|
1251
|
+
model: The model to use for completion
|
|
1252
|
+
prompt: The prompt to generate a completion for
|
|
1253
|
+
temperature: Controls randomness (higher = more random)
|
|
1254
|
+
max_tokens: Maximum number of tokens to generate (including input tokens)
|
|
1255
|
+
stop: Sequences where generation should stop
|
|
1256
|
+
stream: Whether to stream the response
|
|
1257
|
+
echo: Whether to include the prompt in the response
|
|
1258
|
+
timeout: Request timeout in seconds
|
|
1259
|
+
logprobs: Whether to include log probabilities
|
|
1260
|
+
auto_download: Automatically download model if not available (default: True)
|
|
1261
|
+
**kwargs: Additional parameters to pass to the API
|
|
1262
|
+
|
|
1263
|
+
Returns:
|
|
1264
|
+
For non-streaming: Dict with completion data
|
|
1265
|
+
For streaming: Generator yielding completion chunks
|
|
1266
|
+
|
|
1267
|
+
Example response:
|
|
1268
|
+
{
|
|
1269
|
+
"id": "0",
|
|
1270
|
+
"object": "text_completion",
|
|
1271
|
+
"created": 1742927481,
|
|
1272
|
+
"model": "model-name",
|
|
1273
|
+
"choices": [{
|
|
1274
|
+
"index": 0,
|
|
1275
|
+
"text": "Response text here",
|
|
1276
|
+
"finish_reason": "stop"
|
|
1277
|
+
}]
|
|
1278
|
+
}
|
|
1279
|
+
"""
|
|
1280
|
+
# Use the OpenAI client for streaming if requested
|
|
1281
|
+
if stream:
|
|
1282
|
+
return self._stream_completions_with_openai(
|
|
1283
|
+
model=model,
|
|
1284
|
+
prompt=prompt,
|
|
1285
|
+
temperature=temperature,
|
|
1286
|
+
max_tokens=max_tokens,
|
|
1287
|
+
stop=stop,
|
|
1288
|
+
echo=echo,
|
|
1289
|
+
timeout=timeout,
|
|
1290
|
+
logprobs=logprobs,
|
|
1291
|
+
auto_download=auto_download,
|
|
1292
|
+
**kwargs,
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
# Note: self.base_url already includes /api/v1
|
|
1296
|
+
url = f"{self.base_url}/completions"
|
|
1297
|
+
data = {
|
|
1298
|
+
"model": model,
|
|
1299
|
+
"prompt": prompt,
|
|
1300
|
+
"temperature": temperature,
|
|
1301
|
+
"max_tokens": max_tokens,
|
|
1302
|
+
"stream": stream,
|
|
1303
|
+
"echo": echo,
|
|
1304
|
+
**kwargs,
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
if stop:
|
|
1308
|
+
data["stop"] = stop
|
|
1309
|
+
|
|
1310
|
+
if logprobs:
|
|
1311
|
+
data["logprobs"] = logprobs
|
|
1312
|
+
|
|
1313
|
+
# Helper function for the actual API call
|
|
1314
|
+
def _make_request():
|
|
1315
|
+
self.log.debug(f"Sending text completion request to model: {model}")
|
|
1316
|
+
response = requests.post(
|
|
1317
|
+
url,
|
|
1318
|
+
json=data,
|
|
1319
|
+
headers={"Content-Type": "application/json"},
|
|
1320
|
+
timeout=timeout,
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
if response.status_code != 200:
|
|
1324
|
+
error_msg = f"Error in completions (status {response.status_code}): {response.text}"
|
|
1325
|
+
self.log.error(error_msg)
|
|
1326
|
+
raise LemonadeClientError(error_msg)
|
|
1327
|
+
|
|
1328
|
+
result = response.json()
|
|
1329
|
+
if "choices" in result and len(result["choices"]) > 0:
|
|
1330
|
+
token_count = len(result["choices"][0].get("text", ""))
|
|
1331
|
+
self.log.debug(
|
|
1332
|
+
f"Text completion successful. "
|
|
1333
|
+
f"Approximate response length: {token_count} characters"
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
return result
|
|
1337
|
+
|
|
1338
|
+
# Execute with auto-download retry logic
|
|
1339
|
+
try:
|
|
1340
|
+
return _make_request()
|
|
1341
|
+
except (requests.exceptions.RequestException, LemonadeClientError):
|
|
1342
|
+
# Use helper to handle auto-download and retry
|
|
1343
|
+
return self._execute_with_auto_download(_make_request, model, auto_download)
|
|
1344
|
+
|
|
1345
|
+
def _stream_completions_with_openai(
|
|
1346
|
+
self,
|
|
1347
|
+
model: str,
|
|
1348
|
+
prompt: str,
|
|
1349
|
+
temperature: float = 0.7,
|
|
1350
|
+
max_tokens: int = 1000,
|
|
1351
|
+
stop: Optional[Union[str, List[str]]] = None,
|
|
1352
|
+
echo: bool = False,
|
|
1353
|
+
timeout: int = DEFAULT_REQUEST_TIMEOUT,
|
|
1354
|
+
logprobs: Optional[bool] = None,
|
|
1355
|
+
**kwargs,
|
|
1356
|
+
) -> Generator[Dict[str, Any], None, None]:
|
|
1357
|
+
"""
|
|
1358
|
+
Stream completions using the OpenAI client.
|
|
1359
|
+
|
|
1360
|
+
Returns chunks in the format:
|
|
1361
|
+
{
|
|
1362
|
+
"id": "...",
|
|
1363
|
+
"object": "text_completion",
|
|
1364
|
+
"created": 1742927481,
|
|
1365
|
+
"model": "...",
|
|
1366
|
+
"choices": [{
|
|
1367
|
+
"index": 0,
|
|
1368
|
+
"text": "...",
|
|
1369
|
+
"finish_reason": null
|
|
1370
|
+
}]
|
|
1371
|
+
}
|
|
1372
|
+
"""
|
|
1373
|
+
client = OpenAI(
|
|
1374
|
+
base_url=self.base_url,
|
|
1375
|
+
api_key="lemonade", # required, but unused
|
|
1376
|
+
timeout=timeout,
|
|
1377
|
+
)
|
|
1378
|
+
|
|
1379
|
+
try:
|
|
1380
|
+
self.log.debug(f"Starting streaming text completion with model: {model}")
|
|
1381
|
+
# Create request parameters
|
|
1382
|
+
request_params = {
|
|
1383
|
+
"model": model,
|
|
1384
|
+
"prompt": prompt,
|
|
1385
|
+
"temperature": temperature,
|
|
1386
|
+
"max_tokens": max_tokens,
|
|
1387
|
+
"stop": stop,
|
|
1388
|
+
"echo": echo,
|
|
1389
|
+
"stream": True,
|
|
1390
|
+
**kwargs,
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
if logprobs is not None:
|
|
1394
|
+
request_params["logprobs"] = logprobs
|
|
1395
|
+
|
|
1396
|
+
response = client.completions.create(**request_params)
|
|
1397
|
+
|
|
1398
|
+
tokens_generated = 0
|
|
1399
|
+
for chunk in response:
|
|
1400
|
+
tokens_generated += 1
|
|
1401
|
+
yield chunk.model_dump()
|
|
1402
|
+
|
|
1403
|
+
self.log.debug(
|
|
1404
|
+
f"Completed streaming text completion. Generated {tokens_generated} tokens."
|
|
1405
|
+
)
|
|
1406
|
+
|
|
1407
|
+
except (openai.APIError, openai.APIConnectionError, openai.RateLimitError) as e:
|
|
1408
|
+
error_type = e.__class__.__name__
|
|
1409
|
+
self.log.error(f"OpenAI {error_type}: {str(e)}")
|
|
1410
|
+
raise LemonadeClientError(f"OpenAI {error_type}: {str(e)}")
|
|
1411
|
+
except Exception as e:
|
|
1412
|
+
self.log.error(f"Error in OpenAI completion streaming: {str(e)}")
|
|
1413
|
+
raise LemonadeClientError(f"Error in OpenAI completion streaming: {str(e)}")
|
|
1414
|
+
|
|
1415
|
+
def embeddings(
|
|
1416
|
+
self,
|
|
1417
|
+
input_texts: Union[str, List[str]],
|
|
1418
|
+
model: Optional[str] = None,
|
|
1419
|
+
timeout: int = DEFAULT_REQUEST_TIMEOUT,
|
|
1420
|
+
) -> Dict[str, Any]:
|
|
1421
|
+
"""
|
|
1422
|
+
Generate embeddings for input text(s) using Lemonade server.
|
|
1423
|
+
|
|
1424
|
+
Args:
|
|
1425
|
+
input_texts: Single string or list of strings to embed
|
|
1426
|
+
model: Embedding model to use (defaults to self.model or nomic-embed-text-v2)
|
|
1427
|
+
timeout: Request timeout in seconds
|
|
1428
|
+
|
|
1429
|
+
Returns:
|
|
1430
|
+
Dict with 'data' containing list of embedding vectors
|
|
1431
|
+
"""
|
|
1432
|
+
try:
|
|
1433
|
+
# Ensure input is a list
|
|
1434
|
+
if isinstance(input_texts, str):
|
|
1435
|
+
input_texts = [input_texts]
|
|
1436
|
+
|
|
1437
|
+
# Use specified model or default
|
|
1438
|
+
embedding_model = model or self.model or "nomic-embed-text-v2"
|
|
1439
|
+
|
|
1440
|
+
payload = {"model": embedding_model, "input": input_texts}
|
|
1441
|
+
|
|
1442
|
+
url = f"{self.base_url}/embeddings"
|
|
1443
|
+
response = self._send_request("POST", url, data=payload, timeout=timeout)
|
|
1444
|
+
|
|
1445
|
+
return response
|
|
1446
|
+
|
|
1447
|
+
except Exception as e:
|
|
1448
|
+
self.log.error(f"Error generating embeddings: {str(e)}")
|
|
1449
|
+
raise LemonadeClientError(f"Error generating embeddings: {str(e)}")
|
|
1450
|
+
|
|
1451
|
+
def list_models(self, show_all: bool = False) -> Dict[str, Any]:
|
|
1452
|
+
"""
|
|
1453
|
+
List available models from the server.
|
|
1454
|
+
|
|
1455
|
+
Args:
|
|
1456
|
+
show_all: If True, returns full catalog including models not yet downloaded.
|
|
1457
|
+
If False (default), returns only downloaded models.
|
|
1458
|
+
When True, response includes additional fields:
|
|
1459
|
+
- name: Human-readable model name
|
|
1460
|
+
- downloaded: Boolean indicating local availability
|
|
1461
|
+
- labels: Array of descriptive tags (e.g., "hot", "cpu", "hybrid")
|
|
1462
|
+
|
|
1463
|
+
Returns:
|
|
1464
|
+
Dict containing the list of available models
|
|
1465
|
+
|
|
1466
|
+
Examples:
|
|
1467
|
+
# List only downloaded models
|
|
1468
|
+
downloaded = client.list_models()
|
|
1469
|
+
|
|
1470
|
+
# List full catalog for model discovery
|
|
1471
|
+
all_models = client.list_models(show_all=True)
|
|
1472
|
+
available = [m for m in all_models["data"] if not m.get("downloaded")]
|
|
1473
|
+
"""
|
|
1474
|
+
url = f"{self.base_url}/models"
|
|
1475
|
+
if show_all:
|
|
1476
|
+
url += "?show_all=true"
|
|
1477
|
+
return self._send_request("get", url)
|
|
1478
|
+
|
|
1479
|
+
def get_model_details(self, model_id: str) -> Dict[str, Any]:
|
|
1480
|
+
"""
|
|
1481
|
+
Get detailed information about a specific model.
|
|
1482
|
+
|
|
1483
|
+
Args:
|
|
1484
|
+
model_id: The model identifier (e.g., "Qwen3-Coder-30B-GGUF")
|
|
1485
|
+
|
|
1486
|
+
Returns:
|
|
1487
|
+
Dict containing model metadata:
|
|
1488
|
+
- id: Model identifier
|
|
1489
|
+
- created: Unix timestamp
|
|
1490
|
+
- object: Always "model"
|
|
1491
|
+
- owned_by: Attribution field
|
|
1492
|
+
- checkpoint: HuggingFace checkpoint reference
|
|
1493
|
+
- recipe: Framework/device specification (e.g., "oga-cpu", "oga-hybrid")
|
|
1494
|
+
|
|
1495
|
+
Raises:
|
|
1496
|
+
LemonadeClientError: If model not found (404 error)
|
|
1497
|
+
|
|
1498
|
+
Examples:
|
|
1499
|
+
# Get model checkpoint and recipe
|
|
1500
|
+
model = client.get_model_details("Qwen3-Coder-30B-GGUF")
|
|
1501
|
+
print(f"Checkpoint: {model['checkpoint']}")
|
|
1502
|
+
print(f"Recipe: {model['recipe']}")
|
|
1503
|
+
|
|
1504
|
+
# Verify model exists before loading
|
|
1505
|
+
try:
|
|
1506
|
+
details = client.get_model_details(model_name)
|
|
1507
|
+
client.load_model(model_name)
|
|
1508
|
+
except LemonadeClientError as e:
|
|
1509
|
+
print(f"Model not found: {e}")
|
|
1510
|
+
"""
|
|
1511
|
+
url = f"{self.base_url}/models/{model_id}"
|
|
1512
|
+
return self._send_request("get", url)
|
|
1513
|
+
|
|
1514
|
+
def pull_model(
|
|
1515
|
+
self,
|
|
1516
|
+
model_name: str,
|
|
1517
|
+
checkpoint: Optional[str] = None,
|
|
1518
|
+
recipe: Optional[str] = None,
|
|
1519
|
+
reasoning: Optional[bool] = None,
|
|
1520
|
+
mmproj: Optional[str] = None,
|
|
1521
|
+
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
1522
|
+
) -> Dict[str, Any]:
|
|
1523
|
+
"""
|
|
1524
|
+
Install a model on the server.
|
|
1525
|
+
|
|
1526
|
+
Args:
|
|
1527
|
+
model_name: Model name to install
|
|
1528
|
+
checkpoint: HuggingFace checkpoint to install (for registering new models)
|
|
1529
|
+
recipe: Lemonade API recipe to load the model with (for registering new models)
|
|
1530
|
+
reasoning: Whether the model is a reasoning model (for registering new models)
|
|
1531
|
+
mmproj: Multimodal Projector file for vision models (for registering new models)
|
|
1532
|
+
timeout: Request timeout in seconds (longer for model installation)
|
|
1533
|
+
|
|
1534
|
+
Returns:
|
|
1535
|
+
Dict containing the status of the pull operation
|
|
1536
|
+
|
|
1537
|
+
Raises:
|
|
1538
|
+
LemonadeClientError: If the model installation fails
|
|
1539
|
+
"""
|
|
1540
|
+
self.log.info(f"Installing {model_name}")
|
|
1541
|
+
|
|
1542
|
+
request_data = {"model_name": model_name}
|
|
1543
|
+
|
|
1544
|
+
if checkpoint:
|
|
1545
|
+
request_data["checkpoint"] = checkpoint
|
|
1546
|
+
if recipe:
|
|
1547
|
+
request_data["recipe"] = recipe
|
|
1548
|
+
if reasoning is not None:
|
|
1549
|
+
request_data["reasoning"] = reasoning
|
|
1550
|
+
if mmproj:
|
|
1551
|
+
request_data["mmproj"] = mmproj
|
|
1552
|
+
|
|
1553
|
+
url = f"{self.base_url}/pull"
|
|
1554
|
+
try:
|
|
1555
|
+
response = self._send_request("post", url, request_data, timeout=timeout)
|
|
1556
|
+
self.log.info(f"Installed {model_name} successfully: response={response}")
|
|
1557
|
+
return response
|
|
1558
|
+
except Exception as e:
|
|
1559
|
+
message = f"Failed to install {model_name}: {e}"
|
|
1560
|
+
self.log.error(message)
|
|
1561
|
+
raise LemonadeClientError(message)
|
|
1562
|
+
|
|
1563
|
+
def pull_model_stream(
|
|
1564
|
+
self,
|
|
1565
|
+
model_name: str,
|
|
1566
|
+
checkpoint: Optional[str] = None,
|
|
1567
|
+
recipe: Optional[str] = None,
|
|
1568
|
+
reasoning: Optional[bool] = None,
|
|
1569
|
+
vision: Optional[bool] = None,
|
|
1570
|
+
embedding: Optional[bool] = None,
|
|
1571
|
+
reranking: Optional[bool] = None,
|
|
1572
|
+
mmproj: Optional[str] = None,
|
|
1573
|
+
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
1574
|
+
progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
|
1575
|
+
) -> Generator[Dict[str, Any], None, None]:
|
|
1576
|
+
"""
|
|
1577
|
+
Install a model on the server with streaming progress updates.
|
|
1578
|
+
|
|
1579
|
+
This method streams Server-Sent Events (SSE) during the download,
|
|
1580
|
+
providing real-time progress information.
|
|
1581
|
+
|
|
1582
|
+
Args:
|
|
1583
|
+
model_name: Model name to install
|
|
1584
|
+
checkpoint: HuggingFace checkpoint to install (for registering new models)
|
|
1585
|
+
recipe: Lemonade API recipe to load the model with (for registering new models)
|
|
1586
|
+
reasoning: Whether the model is a reasoning model (for registering new models)
|
|
1587
|
+
vision: Whether the model has vision capabilities (for registering new models)
|
|
1588
|
+
embedding: Whether the model is an embedding model (for registering new models)
|
|
1589
|
+
reranking: Whether the model is a reranking model (for registering new models)
|
|
1590
|
+
mmproj: Multimodal Projector file for vision models (for registering new models)
|
|
1591
|
+
timeout: Request timeout in seconds (longer for model installation)
|
|
1592
|
+
progress_callback: Optional callback function called with progress dict on each event.
|
|
1593
|
+
Signature: callback(event_type: str, data: dict) -> None
|
|
1594
|
+
event_type is one of: "progress", "complete", "error"
|
|
1595
|
+
|
|
1596
|
+
Yields:
|
|
1597
|
+
Dict containing progress event data with fields:
|
|
1598
|
+
- For "progress" events: file, file_index, total_files, bytes_downloaded,
|
|
1599
|
+
bytes_total, percent
|
|
1600
|
+
- For "complete" events: file_index, total_files, percent (100)
|
|
1601
|
+
- For "error" events: error message
|
|
1602
|
+
|
|
1603
|
+
Raises:
|
|
1604
|
+
LemonadeClientError: If the model installation fails
|
|
1605
|
+
|
|
1606
|
+
Example:
|
|
1607
|
+
# Using as generator
|
|
1608
|
+
for event in client.pull_model_stream("Qwen3-0.6B-GGUF"):
|
|
1609
|
+
if event.get("event") == "progress":
|
|
1610
|
+
print(f"Downloading: {event['percent']}%")
|
|
1611
|
+
|
|
1612
|
+
# Using with callback
|
|
1613
|
+
def on_progress(event_type, data):
|
|
1614
|
+
if event_type == "progress":
|
|
1615
|
+
print(f"{data['file']}: {data['percent']}%")
|
|
1616
|
+
|
|
1617
|
+
for _ in client.pull_model_stream("Qwen3-0.6B-GGUF", progress_callback=on_progress):
|
|
1618
|
+
pass
|
|
1619
|
+
"""
|
|
1620
|
+
self.log.info(f"Installing {model_name} with streaming progress")
|
|
1621
|
+
|
|
1622
|
+
request_data = {"model_name": model_name, "stream": True}
|
|
1623
|
+
|
|
1624
|
+
if checkpoint:
|
|
1625
|
+
request_data["checkpoint"] = checkpoint
|
|
1626
|
+
if recipe:
|
|
1627
|
+
request_data["recipe"] = recipe
|
|
1628
|
+
if reasoning is not None:
|
|
1629
|
+
request_data["reasoning"] = reasoning
|
|
1630
|
+
if vision is not None:
|
|
1631
|
+
request_data["vision"] = vision
|
|
1632
|
+
if embedding is not None:
|
|
1633
|
+
request_data["embedding"] = embedding
|
|
1634
|
+
if reranking is not None:
|
|
1635
|
+
request_data["reranking"] = reranking
|
|
1636
|
+
if mmproj:
|
|
1637
|
+
request_data["mmproj"] = mmproj
|
|
1638
|
+
|
|
1639
|
+
url = f"{self.base_url}/pull"
|
|
1640
|
+
|
|
1641
|
+
try:
|
|
1642
|
+
response = requests.post(
|
|
1643
|
+
url,
|
|
1644
|
+
json=request_data,
|
|
1645
|
+
headers={"Content-Type": "application/json"},
|
|
1646
|
+
timeout=timeout,
|
|
1647
|
+
stream=True,
|
|
1648
|
+
)
|
|
1649
|
+
|
|
1650
|
+
if response.status_code != 200:
|
|
1651
|
+
error_msg = f"Error pulling model (status {response.status_code}): {response.text}"
|
|
1652
|
+
self.log.error(error_msg)
|
|
1653
|
+
raise LemonadeClientError(error_msg)
|
|
1654
|
+
|
|
1655
|
+
# Parse SSE stream
|
|
1656
|
+
event_type = None
|
|
1657
|
+
received_complete = False
|
|
1658
|
+
try:
|
|
1659
|
+
for line in response.iter_lines(decode_unicode=True):
|
|
1660
|
+
if not line:
|
|
1661
|
+
continue
|
|
1662
|
+
|
|
1663
|
+
if line.startswith("event:"):
|
|
1664
|
+
event_type = line[6:].strip()
|
|
1665
|
+
elif line.startswith("data:"):
|
|
1666
|
+
data_str = line[5:].strip()
|
|
1667
|
+
try:
|
|
1668
|
+
data = json.loads(data_str)
|
|
1669
|
+
data["event"] = event_type or "progress"
|
|
1670
|
+
|
|
1671
|
+
# Call the progress callback if provided
|
|
1672
|
+
if progress_callback:
|
|
1673
|
+
progress_callback(event_type or "progress", data)
|
|
1674
|
+
|
|
1675
|
+
yield data
|
|
1676
|
+
|
|
1677
|
+
# Track complete event
|
|
1678
|
+
if event_type == "complete":
|
|
1679
|
+
received_complete = True
|
|
1680
|
+
|
|
1681
|
+
# Check for error event
|
|
1682
|
+
if event_type == "error":
|
|
1683
|
+
error_msg = data.get(
|
|
1684
|
+
"error", "Unknown error during model pull"
|
|
1685
|
+
)
|
|
1686
|
+
raise LemonadeClientError(error_msg)
|
|
1687
|
+
|
|
1688
|
+
except json.JSONDecodeError:
|
|
1689
|
+
self.log.warning(f"Failed to parse SSE data: {data_str}")
|
|
1690
|
+
continue
|
|
1691
|
+
except requests.exceptions.ChunkedEncodingError:
|
|
1692
|
+
# Connection closed by server - this is normal after complete event
|
|
1693
|
+
if not received_complete:
|
|
1694
|
+
raise
|
|
1695
|
+
|
|
1696
|
+
self.log.info(f"Installed {model_name} successfully via streaming")
|
|
1697
|
+
|
|
1698
|
+
except requests.exceptions.RequestException as e:
|
|
1699
|
+
message = f"Failed to install {model_name}: {e}"
|
|
1700
|
+
self.log.error(message)
|
|
1701
|
+
raise LemonadeClientError(message)
|
|
1702
|
+
|
|
1703
|
+
def delete_model(
|
|
1704
|
+
self,
|
|
1705
|
+
model_name: str,
|
|
1706
|
+
timeout: int = DEFAULT_REQUEST_TIMEOUT,
|
|
1707
|
+
) -> Dict[str, Any]:
|
|
1708
|
+
"""
|
|
1709
|
+
Delete a model from the server.
|
|
1710
|
+
|
|
1711
|
+
Args:
|
|
1712
|
+
model_name: Model name to delete
|
|
1713
|
+
timeout: Request timeout in seconds
|
|
1714
|
+
|
|
1715
|
+
Returns:
|
|
1716
|
+
Dict containing the status of the delete operation
|
|
1717
|
+
|
|
1718
|
+
Raises:
|
|
1719
|
+
LemonadeClientError: If the model deletion fails
|
|
1720
|
+
"""
|
|
1721
|
+
self.log.info(f"Deleting {model_name}")
|
|
1722
|
+
|
|
1723
|
+
request_data = {"model_name": model_name}
|
|
1724
|
+
|
|
1725
|
+
url = f"{self.base_url}/delete"
|
|
1726
|
+
try:
|
|
1727
|
+
response = self._send_request("post", url, request_data, timeout=timeout)
|
|
1728
|
+
self.log.info(f"Deleted {model_name} successfully: response={response}")
|
|
1729
|
+
return response
|
|
1730
|
+
except Exception as e:
|
|
1731
|
+
message = f"Failed to delete {model_name}: {e}"
|
|
1732
|
+
self.log.error(message)
|
|
1733
|
+
raise LemonadeClientError(message)
|
|
1734
|
+
|
|
1735
|
+
def ensure_model_downloaded(
|
|
1736
|
+
self,
|
|
1737
|
+
model_name: str,
|
|
1738
|
+
show_progress: bool = True,
|
|
1739
|
+
timeout: int = 7200,
|
|
1740
|
+
) -> bool:
|
|
1741
|
+
"""
|
|
1742
|
+
Ensure a model is downloaded, downloading if necessary.
|
|
1743
|
+
|
|
1744
|
+
This method checks if the model is available on the server,
|
|
1745
|
+
and if not, downloads it via the /api/v1/pull endpoint.
|
|
1746
|
+
|
|
1747
|
+
Large models can be 100GB+ and take hours to download on typical connections.
|
|
1748
|
+
|
|
1749
|
+
Args:
|
|
1750
|
+
model_name: Model name to ensure is downloaded
|
|
1751
|
+
show_progress: Show progress messages during download
|
|
1752
|
+
timeout: Download timeout in seconds (default: 7200 = 2 hours)
|
|
1753
|
+
|
|
1754
|
+
Returns:
|
|
1755
|
+
True if model is available (was already downloaded or successfully downloaded),
|
|
1756
|
+
False if download failed
|
|
1757
|
+
|
|
1758
|
+
Example:
|
|
1759
|
+
client = LemonadeClient()
|
|
1760
|
+
if client.ensure_model_downloaded("Qwen3-0.6B-GGUF"):
|
|
1761
|
+
client.load_model("Qwen3-0.6B-GGUF")
|
|
1762
|
+
"""
|
|
1763
|
+
try:
|
|
1764
|
+
# Check if model is already downloaded
|
|
1765
|
+
models_response = self.list_models()
|
|
1766
|
+
for model in models_response.get("data", []):
|
|
1767
|
+
if model.get("id") == model_name:
|
|
1768
|
+
if model.get("downloaded", False):
|
|
1769
|
+
if show_progress:
|
|
1770
|
+
self.log.info(
|
|
1771
|
+
f"{_emoji('✅', '[OK]')} Model already downloaded: {model_name}"
|
|
1772
|
+
)
|
|
1773
|
+
return True
|
|
1774
|
+
|
|
1775
|
+
# Model not downloaded - attempt download
|
|
1776
|
+
if show_progress:
|
|
1777
|
+
self.log.info(
|
|
1778
|
+
f"{_emoji('📥', '[DOWNLOADING]')} Downloading model: {model_name}"
|
|
1779
|
+
)
|
|
1780
|
+
self.log.info(
|
|
1781
|
+
" This may take minutes to hours depending on model size..."
|
|
1782
|
+
)
|
|
1783
|
+
|
|
1784
|
+
# Download via pull_model
|
|
1785
|
+
self.pull_model(model_name, timeout=timeout)
|
|
1786
|
+
|
|
1787
|
+
# Use the centralized download waiter
|
|
1788
|
+
return self._wait_for_model_download(
|
|
1789
|
+
model_name, timeout=timeout, show_progress=show_progress
|
|
1790
|
+
)
|
|
1791
|
+
|
|
1792
|
+
except Exception as e:
|
|
1793
|
+
self.log.error(f"Failed to ensure model downloaded: {e}")
|
|
1794
|
+
return False
|
|
1795
|
+
|
|
1796
|
+
def responses(
|
|
1797
|
+
self,
|
|
1798
|
+
model: str,
|
|
1799
|
+
input: Union[str, List[Dict[str, str]]],
|
|
1800
|
+
temperature: float = 0.7,
|
|
1801
|
+
max_output_tokens: Optional[int] = None,
|
|
1802
|
+
stream: bool = False,
|
|
1803
|
+
timeout: int = DEFAULT_REQUEST_TIMEOUT,
|
|
1804
|
+
**kwargs,
|
|
1805
|
+
) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
|
|
1806
|
+
"""
|
|
1807
|
+
Call the responses endpoint.
|
|
1808
|
+
|
|
1809
|
+
Args:
|
|
1810
|
+
model: The model to use for the response
|
|
1811
|
+
input: A string or list of dictionaries input for the model to respond to
|
|
1812
|
+
temperature: Controls randomness (higher = more random)
|
|
1813
|
+
max_output_tokens: Maximum number of output tokens to generate
|
|
1814
|
+
stream: Whether to stream the response
|
|
1815
|
+
timeout: Request timeout in seconds
|
|
1816
|
+
**kwargs: Additional parameters to pass to the API
|
|
1817
|
+
|
|
1818
|
+
Returns:
|
|
1819
|
+
For non-streaming: Dict with response data
|
|
1820
|
+
For streaming: Generator yielding response events
|
|
1821
|
+
|
|
1822
|
+
Example response (non-streaming):
|
|
1823
|
+
{
|
|
1824
|
+
"id": "0",
|
|
1825
|
+
"created_at": 1746225832.0,
|
|
1826
|
+
"model": "model-name",
|
|
1827
|
+
"object": "response",
|
|
1828
|
+
"output": [{
|
|
1829
|
+
"id": "0",
|
|
1830
|
+
"content": [{
|
|
1831
|
+
"annotations": [],
|
|
1832
|
+
"text": "Response text here"
|
|
1833
|
+
}]
|
|
1834
|
+
}]
|
|
1835
|
+
}
|
|
1836
|
+
"""
|
|
1837
|
+
# Note: self.base_url already includes /api/v1
|
|
1838
|
+
url = f"{self.base_url}/responses"
|
|
1839
|
+
data = {
|
|
1840
|
+
"model": model,
|
|
1841
|
+
"input": input,
|
|
1842
|
+
"temperature": temperature,
|
|
1843
|
+
"stream": stream,
|
|
1844
|
+
**kwargs,
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
if max_output_tokens:
|
|
1848
|
+
data["max_output_tokens"] = max_output_tokens
|
|
1849
|
+
|
|
1850
|
+
try:
|
|
1851
|
+
self.log.debug(f"Sending responses request to model: {model}")
|
|
1852
|
+
response = requests.post(
|
|
1853
|
+
url,
|
|
1854
|
+
json=data,
|
|
1855
|
+
headers={"Content-Type": "application/json"},
|
|
1856
|
+
timeout=timeout,
|
|
1857
|
+
)
|
|
1858
|
+
|
|
1859
|
+
if response.status_code != 200:
|
|
1860
|
+
error_msg = f"Error in responses (status {response.status_code}): {response.text}"
|
|
1861
|
+
self.log.error(error_msg)
|
|
1862
|
+
raise LemonadeClientError(error_msg)
|
|
1863
|
+
|
|
1864
|
+
if stream:
|
|
1865
|
+
# For streaming responses, we need to handle server-sent events
|
|
1866
|
+
# This is a simplified implementation - full SSE parsing might be needed
|
|
1867
|
+
return self._parse_sse_stream(response)
|
|
1868
|
+
else:
|
|
1869
|
+
result = response.json()
|
|
1870
|
+
if "output" in result and len(result["output"]) > 0:
|
|
1871
|
+
content = result["output"][0].get("content", [])
|
|
1872
|
+
if content and len(content) > 0:
|
|
1873
|
+
text_length = len(content[0].get("text", ""))
|
|
1874
|
+
self.log.debug(
|
|
1875
|
+
f"Response successful. "
|
|
1876
|
+
f"Approximate response length: {text_length} characters"
|
|
1877
|
+
)
|
|
1878
|
+
return result
|
|
1879
|
+
|
|
1880
|
+
except requests.exceptions.RequestException as e:
|
|
1881
|
+
self.log.error(f"Request failed: {str(e)}")
|
|
1882
|
+
raise LemonadeClientError(f"Request failed: {str(e)}")
|
|
1883
|
+
|
|
1884
|
+
def _parse_sse_stream(self, response) -> Generator[Dict[str, Any], None, None]:
|
|
1885
|
+
"""
|
|
1886
|
+
Parse server-sent events from streaming responses endpoint.
|
|
1887
|
+
|
|
1888
|
+
This is a simplified implementation that may need enhancement
|
|
1889
|
+
for full SSE specification compliance.
|
|
1890
|
+
"""
|
|
1891
|
+
for line in response.iter_lines(decode_unicode=True):
|
|
1892
|
+
if line.startswith("data: "):
|
|
1893
|
+
try:
|
|
1894
|
+
data = line[6:] # Remove "data: " prefix
|
|
1895
|
+
if data.strip() == "[DONE]":
|
|
1896
|
+
break
|
|
1897
|
+
yield json.loads(data)
|
|
1898
|
+
except json.JSONDecodeError:
|
|
1899
|
+
continue
|
|
1900
|
+
|
|
1901
|
+
def _wait_for_model_download(
|
|
1902
|
+
self,
|
|
1903
|
+
model_name: str,
|
|
1904
|
+
timeout: int = 7200,
|
|
1905
|
+
show_progress: bool = True,
|
|
1906
|
+
download_task: Optional[DownloadTask] = None,
|
|
1907
|
+
) -> bool:
|
|
1908
|
+
"""
|
|
1909
|
+
Wait for a model download to complete by polling the models endpoint.
|
|
1910
|
+
|
|
1911
|
+
Large models (up to 100GB) can take hours to download on typical connections:
|
|
1912
|
+
- 100GB @ 100Mbps = ~2-3 hours
|
|
1913
|
+
- 100GB @ 1Gbps = ~15-20 minutes
|
|
1914
|
+
|
|
1915
|
+
Args:
|
|
1916
|
+
model_name: Model name to wait for
|
|
1917
|
+
timeout: Maximum time to wait in seconds (default: 7200 = 2 hours)
|
|
1918
|
+
show_progress: Show progress messages
|
|
1919
|
+
download_task: Optional DownloadTask for cancellation support
|
|
1920
|
+
|
|
1921
|
+
Returns:
|
|
1922
|
+
True if model download completed, False if timeout or error
|
|
1923
|
+
|
|
1924
|
+
Raises:
|
|
1925
|
+
ModelDownloadCancelledError: If download is cancelled
|
|
1926
|
+
"""
|
|
1927
|
+
poll_interval = 30 # Check every 30 seconds for large downloads
|
|
1928
|
+
elapsed = 0
|
|
1929
|
+
|
|
1930
|
+
while elapsed < timeout:
|
|
1931
|
+
# Check for cancellation
|
|
1932
|
+
if download_task and download_task.is_cancelled():
|
|
1933
|
+
if show_progress:
|
|
1934
|
+
self.log.warning(
|
|
1935
|
+
f"{_emoji('🚫', '[CANCELLED]')} Download cancelled for {model_name}"
|
|
1936
|
+
)
|
|
1937
|
+
raise ModelDownloadCancelledError(f"Download cancelled: {model_name}")
|
|
1938
|
+
|
|
1939
|
+
time.sleep(poll_interval)
|
|
1940
|
+
elapsed += poll_interval
|
|
1941
|
+
|
|
1942
|
+
try:
|
|
1943
|
+
# Check if model is now downloaded
|
|
1944
|
+
models_response = self.list_models()
|
|
1945
|
+
for model in models_response.get("data", []):
|
|
1946
|
+
if model.get("id") == model_name:
|
|
1947
|
+
if model.get("downloaded", False):
|
|
1948
|
+
if show_progress:
|
|
1949
|
+
minutes = elapsed // 60
|
|
1950
|
+
seconds = elapsed % 60
|
|
1951
|
+
self.log.info(
|
|
1952
|
+
f"{_emoji('✅', '[OK]')} Model downloaded successfully: "
|
|
1953
|
+
f"{model_name} ({minutes}m {seconds}s)"
|
|
1954
|
+
)
|
|
1955
|
+
return True
|
|
1956
|
+
|
|
1957
|
+
if show_progress and elapsed % 60 == 0: # Show every 60s
|
|
1958
|
+
minutes = elapsed // 60
|
|
1959
|
+
self.log.info(
|
|
1960
|
+
f" {_emoji('⏳', '[WAIT]')} Downloading... {minutes} minutes elapsed"
|
|
1961
|
+
)
|
|
1962
|
+
except ModelDownloadCancelledError:
|
|
1963
|
+
raise # Re-raise cancellation
|
|
1964
|
+
except Exception as e:
|
|
1965
|
+
self.log.warning(f"Error checking download status: {e}")
|
|
1966
|
+
|
|
1967
|
+
# Timeout reached
|
|
1968
|
+
if show_progress:
|
|
1969
|
+
minutes = timeout // 60
|
|
1970
|
+
self.log.warning(
|
|
1971
|
+
f"{_emoji('⏰', '[TIMEOUT]')} Download timeout ({minutes} minutes) "
|
|
1972
|
+
f"reached for {model_name}"
|
|
1973
|
+
)
|
|
1974
|
+
return False
|
|
1975
|
+
|
|
1976
|
+
def load_model(
|
|
1977
|
+
self,
|
|
1978
|
+
model_name: str,
|
|
1979
|
+
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
1980
|
+
auto_download: bool = False,
|
|
1981
|
+
download_timeout: int = 7200,
|
|
1982
|
+
llamacpp_args: Optional[str] = None,
|
|
1983
|
+
) -> Dict[str, Any]:
|
|
1984
|
+
"""
|
|
1985
|
+
Load a model on the server.
|
|
1986
|
+
|
|
1987
|
+
If auto_download is enabled and the model is not available:
|
|
1988
|
+
1. Prompts user for confirmation (with size and ETA)
|
|
1989
|
+
2. Validates disk space
|
|
1990
|
+
3. Downloads model with cancellation support
|
|
1991
|
+
4. Retries loading
|
|
1992
|
+
|
|
1993
|
+
Args:
|
|
1994
|
+
model_name: Model name to load
|
|
1995
|
+
timeout: Request timeout in seconds (longer for model loading)
|
|
1996
|
+
auto_download: If True, automatically download the model if not available
|
|
1997
|
+
download_timeout: Timeout for model download in seconds (default: 7200 = 2 hours)
|
|
1998
|
+
Large models can be 100GB+ and take hours to download
|
|
1999
|
+
llamacpp_args: Optional llama.cpp arguments (e.g., "--ubatch-size 2048").
|
|
2000
|
+
Used to configure model loading parameters like batch sizes.
|
|
2001
|
+
|
|
2002
|
+
Returns:
|
|
2003
|
+
Dict containing the status of the load operation
|
|
2004
|
+
|
|
2005
|
+
Raises:
|
|
2006
|
+
ModelDownloadCancelledError: If user declines download or cancels
|
|
2007
|
+
InsufficientDiskSpaceError: If not enough disk space
|
|
2008
|
+
LemonadeClientError: If model loading fails
|
|
2009
|
+
"""
|
|
2010
|
+
self.log.info(f"Loading {model_name}")
|
|
2011
|
+
|
|
2012
|
+
request_data = {"model_name": model_name}
|
|
2013
|
+
if llamacpp_args:
|
|
2014
|
+
request_data["llamacpp_args"] = llamacpp_args
|
|
2015
|
+
url = f"{self.base_url}/load"
|
|
2016
|
+
|
|
2017
|
+
try:
|
|
2018
|
+
response = self._send_request("post", url, request_data, timeout=timeout)
|
|
2019
|
+
self.log.info(f"Loaded {model_name} successfully: response={response}")
|
|
2020
|
+
self.model = model_name
|
|
2021
|
+
return response
|
|
2022
|
+
except Exception as e:
|
|
2023
|
+
original_error = str(e)
|
|
2024
|
+
|
|
2025
|
+
# Check if this is a "model not found" error and auto_download is enabled
|
|
2026
|
+
if not (auto_download and self._is_model_error(e)):
|
|
2027
|
+
# Not a model error or auto_download disabled - re-raise
|
|
2028
|
+
self.log.error(f"Failed to load {model_name}: {original_error}")
|
|
2029
|
+
if isinstance(e, LemonadeClientError):
|
|
2030
|
+
raise
|
|
2031
|
+
raise LemonadeClientError(
|
|
2032
|
+
f"Failed to load {model_name}: {original_error}"
|
|
2033
|
+
)
|
|
2034
|
+
|
|
2035
|
+
# Auto-download flow
|
|
2036
|
+
self.log.info(
|
|
2037
|
+
f"{_emoji('📥', '[AUTO-DOWNLOAD]')} Model '{model_name}' not found, "
|
|
2038
|
+
f"initiating auto-download..."
|
|
2039
|
+
)
|
|
2040
|
+
|
|
2041
|
+
# Get model info and size estimate
|
|
2042
|
+
model_info = self.get_model_info(model_name)
|
|
2043
|
+
size_gb = model_info["size_gb"]
|
|
2044
|
+
estimated_minutes = self._estimate_download_time(size_gb)
|
|
2045
|
+
|
|
2046
|
+
# Prompt user for confirmation
|
|
2047
|
+
if not _prompt_user_for_download(model_name, size_gb, estimated_minutes):
|
|
2048
|
+
raise ModelDownloadCancelledError(
|
|
2049
|
+
f"User declined download of {model_name}"
|
|
2050
|
+
)
|
|
2051
|
+
|
|
2052
|
+
# Validate disk space
|
|
2053
|
+
_check_disk_space(size_gb)
|
|
2054
|
+
|
|
2055
|
+
# Create and track download task
|
|
2056
|
+
download_task = DownloadTask(model_name=model_name, size_gb=size_gb)
|
|
2057
|
+
with self._downloads_lock:
|
|
2058
|
+
self.active_downloads[model_name] = download_task
|
|
2059
|
+
|
|
2060
|
+
try:
|
|
2061
|
+
# Trigger model download
|
|
2062
|
+
self.pull_model(model_name, timeout=download_timeout)
|
|
2063
|
+
|
|
2064
|
+
# Wait for download to complete (with cancellation support)
|
|
2065
|
+
self.log.info(
|
|
2066
|
+
f" {_emoji('⏳', '[WAIT]')} Waiting for model download to complete..."
|
|
2067
|
+
)
|
|
2068
|
+
self.log.info(
|
|
2069
|
+
f" {_emoji('💡', '[TIP]')} Tip: You can cancel with "
|
|
2070
|
+
f"client.cancel_download(model_name)"
|
|
2071
|
+
)
|
|
2072
|
+
|
|
2073
|
+
if self._wait_for_model_download(
|
|
2074
|
+
model_name,
|
|
2075
|
+
timeout=download_timeout,
|
|
2076
|
+
show_progress=True,
|
|
2077
|
+
download_task=download_task,
|
|
2078
|
+
):
|
|
2079
|
+
# Retry loading after successful download
|
|
2080
|
+
self.log.info(
|
|
2081
|
+
f"{_emoji('🔄', '[RETRY]')} Retrying model load: {model_name}"
|
|
2082
|
+
)
|
|
2083
|
+
response = self._send_request(
|
|
2084
|
+
"post", url, request_data, timeout=timeout
|
|
2085
|
+
)
|
|
2086
|
+
self.log.info(
|
|
2087
|
+
f"{_emoji('✅', '[OK]')} Loaded {model_name} successfully after download"
|
|
2088
|
+
)
|
|
2089
|
+
self.model = model_name
|
|
2090
|
+
return response
|
|
2091
|
+
else:
|
|
2092
|
+
raise LemonadeClientError(
|
|
2093
|
+
f"Model download timed out for '{model_name}'"
|
|
2094
|
+
)
|
|
2095
|
+
|
|
2096
|
+
except ModelDownloadCancelledError:
|
|
2097
|
+
self.log.warning(f"Download cancelled for {model_name}")
|
|
2098
|
+
raise
|
|
2099
|
+
except InsufficientDiskSpaceError:
|
|
2100
|
+
self.log.error(f"Insufficient disk space for {model_name}")
|
|
2101
|
+
raise
|
|
2102
|
+
except Exception as download_error:
|
|
2103
|
+
self.log.error(f"Auto-download failed: {download_error}")
|
|
2104
|
+
raise LemonadeClientError(
|
|
2105
|
+
f"Failed to auto-download '{model_name}': {download_error}"
|
|
2106
|
+
)
|
|
2107
|
+
finally:
|
|
2108
|
+
# Clean up download task
|
|
2109
|
+
with self._downloads_lock:
|
|
2110
|
+
self.active_downloads.pop(model_name, None)
|
|
2111
|
+
|
|
2112
|
+
def unload_model(self) -> Dict[str, Any]:
|
|
2113
|
+
"""
|
|
2114
|
+
Unload the current model from the server.
|
|
2115
|
+
|
|
2116
|
+
Returns:
|
|
2117
|
+
Dict containing the status of the unload operation
|
|
2118
|
+
"""
|
|
2119
|
+
url = f"{self.base_url}/unload"
|
|
2120
|
+
response = self._send_request("post", url)
|
|
2121
|
+
self.model = None
|
|
2122
|
+
self.log.info(f"Model unloaded successfully: {response}")
|
|
2123
|
+
return response
|
|
2124
|
+
|
|
2125
|
+
def set_params(
|
|
2126
|
+
self,
|
|
2127
|
+
temperature: Optional[float] = None,
|
|
2128
|
+
top_p: Optional[float] = None,
|
|
2129
|
+
top_k: Optional[int] = None,
|
|
2130
|
+
min_length: Optional[int] = None,
|
|
2131
|
+
max_length: Optional[int] = None,
|
|
2132
|
+
do_sample: Optional[bool] = None,
|
|
2133
|
+
) -> Dict[str, Any]:
|
|
2134
|
+
"""
|
|
2135
|
+
Set generation parameters for text completion.
|
|
2136
|
+
|
|
2137
|
+
Args:
|
|
2138
|
+
temperature: Controls randomness (higher = more random)
|
|
2139
|
+
top_p: Controls diversity via nucleus sampling
|
|
2140
|
+
top_k: Controls diversity by limiting to k most likely tokens
|
|
2141
|
+
min_length: Minimum length of generated text in tokens
|
|
2142
|
+
max_length: Maximum length of generated text in tokens
|
|
2143
|
+
do_sample: Whether to use sampling or greedy decoding
|
|
2144
|
+
|
|
2145
|
+
Returns:
|
|
2146
|
+
Dict containing the status and updated parameters
|
|
2147
|
+
"""
|
|
2148
|
+
request_data = {}
|
|
2149
|
+
|
|
2150
|
+
if temperature is not None:
|
|
2151
|
+
request_data["temperature"] = temperature
|
|
2152
|
+
if top_p is not None:
|
|
2153
|
+
request_data["top_p"] = top_p
|
|
2154
|
+
if top_k is not None:
|
|
2155
|
+
request_data["top_k"] = top_k
|
|
2156
|
+
if min_length is not None:
|
|
2157
|
+
request_data["min_length"] = min_length
|
|
2158
|
+
if max_length is not None:
|
|
2159
|
+
request_data["max_length"] = max_length
|
|
2160
|
+
if do_sample is not None:
|
|
2161
|
+
request_data["do_sample"] = do_sample
|
|
2162
|
+
|
|
2163
|
+
url = f"{self.base_url}/params"
|
|
2164
|
+
return self._send_request("post", url, request_data)
|
|
2165
|
+
|
|
2166
|
+
def health_check(self) -> Dict[str, Any]:
|
|
2167
|
+
"""
|
|
2168
|
+
Check server health.
|
|
2169
|
+
|
|
2170
|
+
Returns:
|
|
2171
|
+
Dict containing the server status and loaded model
|
|
2172
|
+
|
|
2173
|
+
Raises:
|
|
2174
|
+
LemonadeClientError: If the health check fails
|
|
2175
|
+
"""
|
|
2176
|
+
url = f"{self.base_url}/health"
|
|
2177
|
+
return self._send_request("get", url)
|
|
2178
|
+
|
|
2179
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
2180
|
+
"""
|
|
2181
|
+
Get performance statistics from the last request.
|
|
2182
|
+
|
|
2183
|
+
Returns:
|
|
2184
|
+
Dict containing performance statistics
|
|
2185
|
+
"""
|
|
2186
|
+
url = f"{self.base_url}/stats"
|
|
2187
|
+
return self._send_request("get", url)
|
|
2188
|
+
|
|
2189
|
+
def get_system_info(self, verbose: bool = False) -> Dict[str, Any]:
|
|
2190
|
+
"""
|
|
2191
|
+
Get system hardware information and device enumeration.
|
|
2192
|
+
|
|
2193
|
+
Args:
|
|
2194
|
+
verbose: If True, returns additional details like Python packages
|
|
2195
|
+
and extended system information
|
|
2196
|
+
|
|
2197
|
+
Returns:
|
|
2198
|
+
Dict containing system information:
|
|
2199
|
+
- OS Version
|
|
2200
|
+
- Processor details
|
|
2201
|
+
- Physical Memory (RAM)
|
|
2202
|
+
- devices: Dictionary with device information
|
|
2203
|
+
- cpu: Name, cores, threads, availability
|
|
2204
|
+
- gpu: AMD iGPU/dGPU name, memory (MB), driver version, availability
|
|
2205
|
+
- npu: Name, driver version, power mode, availability
|
|
2206
|
+
|
|
2207
|
+
Examples:
|
|
2208
|
+
# Check available devices
|
|
2209
|
+
sysinfo = client.get_system_info()
|
|
2210
|
+
devices = sysinfo.get("devices", {})
|
|
2211
|
+
|
|
2212
|
+
# Select best device
|
|
2213
|
+
if devices.get("npu", {}).get("available"):
|
|
2214
|
+
print("Using NPU for acceleration")
|
|
2215
|
+
elif devices.get("gpu", {}).get("available"):
|
|
2216
|
+
print("Using GPU for acceleration")
|
|
2217
|
+
else:
|
|
2218
|
+
print("Using CPU")
|
|
2219
|
+
|
|
2220
|
+
# Get detailed info
|
|
2221
|
+
detailed = client.get_system_info(verbose=True)
|
|
2222
|
+
"""
|
|
2223
|
+
url = f"{self.base_url}/system-info"
|
|
2224
|
+
if verbose:
|
|
2225
|
+
url += "?verbose=true"
|
|
2226
|
+
return self._send_request("get", url)
|
|
2227
|
+
|
|
2228
|
+
def ready(self) -> bool:
|
|
2229
|
+
"""
|
|
2230
|
+
Check if the client is ready for use.
|
|
2231
|
+
|
|
2232
|
+
Returns:
|
|
2233
|
+
bool: True if the client exists and the server is healthy, False otherwise
|
|
2234
|
+
"""
|
|
2235
|
+
try:
|
|
2236
|
+
# Check if client exists and server is healthy
|
|
2237
|
+
health = self.health_check()
|
|
2238
|
+
return health.get("status") == "ok"
|
|
2239
|
+
except Exception:
|
|
2240
|
+
return False
|
|
2241
|
+
|
|
2242
|
+
def validate_context_size(
|
|
2243
|
+
self,
|
|
2244
|
+
required_tokens: int = 32768,
|
|
2245
|
+
quiet: bool = False,
|
|
2246
|
+
) -> tuple:
|
|
2247
|
+
"""
|
|
2248
|
+
Validate that Lemonade server has sufficient context size.
|
|
2249
|
+
|
|
2250
|
+
Checks the /health endpoint to verify the server's context size
|
|
2251
|
+
meets the required minimum.
|
|
2252
|
+
|
|
2253
|
+
Args:
|
|
2254
|
+
required_tokens: Minimum required context size in tokens (default: 32768)
|
|
2255
|
+
quiet: Suppress output messages
|
|
2256
|
+
|
|
2257
|
+
Returns:
|
|
2258
|
+
Tuple of (success: bool, error_message: Optional[str])
|
|
2259
|
+
- success: True if context size is sufficient
|
|
2260
|
+
- error_message: Description of the issue if validation failed, None if successful
|
|
2261
|
+
|
|
2262
|
+
Example:
|
|
2263
|
+
client = LemonadeClient()
|
|
2264
|
+
success, error = client.validate_context_size(required_tokens=32768)
|
|
2265
|
+
if not success:
|
|
2266
|
+
print(f"Context validation failed: {error}")
|
|
2267
|
+
sys.exit(1)
|
|
2268
|
+
"""
|
|
2269
|
+
try:
|
|
2270
|
+
health = self.health_check()
|
|
2271
|
+
reported_ctx = health.get("context_size", 0)
|
|
2272
|
+
|
|
2273
|
+
if reported_ctx >= required_tokens:
|
|
2274
|
+
self.log.debug(
|
|
2275
|
+
f"Context size validated: {reported_ctx} >= {required_tokens}"
|
|
2276
|
+
)
|
|
2277
|
+
return True, None
|
|
2278
|
+
else:
|
|
2279
|
+
error_msg = (
|
|
2280
|
+
f"Insufficient context size: server has {reported_ctx} tokens, "
|
|
2281
|
+
f"but {required_tokens} tokens are required. "
|
|
2282
|
+
f"Restart with: lemonade-server serve --ctx-size {required_tokens}"
|
|
2283
|
+
)
|
|
2284
|
+
if not quiet:
|
|
2285
|
+
print(f"❌ {error_msg}")
|
|
2286
|
+
return False, error_msg
|
|
2287
|
+
|
|
2288
|
+
except Exception as e:
|
|
2289
|
+
self.log.warning(f"Context validation failed: {e}")
|
|
2290
|
+
if not quiet:
|
|
2291
|
+
print(f"⚠️ Context validation failed: {e}")
|
|
2292
|
+
return True, None # Don't block on connection errors
|
|
2293
|
+
|
|
2294
|
+
def get_status(self) -> LemonadeStatus:
|
|
2295
|
+
"""
|
|
2296
|
+
Get comprehensive Lemonade status.
|
|
2297
|
+
|
|
2298
|
+
Returns:
|
|
2299
|
+
LemonadeStatus with server status and loaded models
|
|
2300
|
+
"""
|
|
2301
|
+
status = LemonadeStatus(url=f"http://{self.host}:{self.port}")
|
|
2302
|
+
|
|
2303
|
+
try:
|
|
2304
|
+
health = self.health_check()
|
|
2305
|
+
status.running = True
|
|
2306
|
+
status.health_data = health
|
|
2307
|
+
status.context_size = health.get("context_size", 0)
|
|
2308
|
+
|
|
2309
|
+
# Get loaded models
|
|
2310
|
+
models_response = self.list_models()
|
|
2311
|
+
status.loaded_models = models_response.get("data", [])
|
|
2312
|
+
except Exception as e:
|
|
2313
|
+
self.log.debug(f"Failed to get status: {e}")
|
|
2314
|
+
status.running = False
|
|
2315
|
+
status.error = str(e)
|
|
2316
|
+
|
|
2317
|
+
return status
|
|
2318
|
+
|
|
2319
|
+
def get_agent_profile(self, agent: str) -> Optional[AgentProfile]:
|
|
2320
|
+
"""
|
|
2321
|
+
Get agent profile by name.
|
|
2322
|
+
|
|
2323
|
+
Args:
|
|
2324
|
+
agent: Name of the agent (chat, code, rag, talk, blender, etc.)
|
|
2325
|
+
|
|
2326
|
+
Returns:
|
|
2327
|
+
AgentProfile if found, None otherwise
|
|
2328
|
+
"""
|
|
2329
|
+
return AGENT_PROFILES.get(agent.lower())
|
|
2330
|
+
|
|
2331
|
+
def list_agents(self) -> List[str]:
|
|
2332
|
+
"""
|
|
2333
|
+
List all available agent profiles.
|
|
2334
|
+
|
|
2335
|
+
Returns:
|
|
2336
|
+
List of agent profile names
|
|
2337
|
+
"""
|
|
2338
|
+
return list(AGENT_PROFILES.keys())
|
|
2339
|
+
|
|
2340
|
+
def get_required_models(self, agent: str = "all") -> List[str]:
|
|
2341
|
+
"""
|
|
2342
|
+
Get list of model IDs required for an agent or all agents.
|
|
2343
|
+
|
|
2344
|
+
Args:
|
|
2345
|
+
agent: Agent name or "all" for all unique models
|
|
2346
|
+
|
|
2347
|
+
Returns:
|
|
2348
|
+
List of model IDs (e.g., ["Qwen3-Coder-30B-A3B-Instruct-GGUF", ...])
|
|
2349
|
+
"""
|
|
2350
|
+
model_ids = set()
|
|
2351
|
+
|
|
2352
|
+
if agent.lower() == "all":
|
|
2353
|
+
# Collect all unique models across all agents
|
|
2354
|
+
for profile in AGENT_PROFILES.values():
|
|
2355
|
+
for model_key in profile.models:
|
|
2356
|
+
if model_key in MODELS:
|
|
2357
|
+
model_ids.add(MODELS[model_key].model_id)
|
|
2358
|
+
else:
|
|
2359
|
+
# Get models for specific agent
|
|
2360
|
+
profile = self.get_agent_profile(agent)
|
|
2361
|
+
if profile:
|
|
2362
|
+
for model_key in profile.models:
|
|
2363
|
+
if model_key in MODELS:
|
|
2364
|
+
model_ids.add(MODELS[model_key].model_id)
|
|
2365
|
+
|
|
2366
|
+
return list(model_ids)
|
|
2367
|
+
|
|
2368
|
+
def check_model_available(self, model_id: str) -> bool:
|
|
2369
|
+
"""
|
|
2370
|
+
Check if a model is available (downloaded) on the server.
|
|
2371
|
+
|
|
2372
|
+
Args:
|
|
2373
|
+
model_id: Model ID to check
|
|
2374
|
+
|
|
2375
|
+
Returns:
|
|
2376
|
+
True if model is available, False otherwise
|
|
2377
|
+
"""
|
|
2378
|
+
try:
|
|
2379
|
+
# Use list_models with show_all=True to get download status
|
|
2380
|
+
models = self.list_models(show_all=True)
|
|
2381
|
+
for model in models.get("data", []):
|
|
2382
|
+
if model.get("id", "").lower() == model_id.lower():
|
|
2383
|
+
return model.get("downloaded", False)
|
|
2384
|
+
except Exception:
|
|
2385
|
+
pass
|
|
2386
|
+
return False
|
|
2387
|
+
|
|
2388
|
+
def download_agent_models(
|
|
2389
|
+
self,
|
|
2390
|
+
agent: str = "all",
|
|
2391
|
+
timeout: int = DEFAULT_MODEL_LOAD_TIMEOUT,
|
|
2392
|
+
progress_callback: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
|
2393
|
+
) -> Dict[str, Any]:
|
|
2394
|
+
"""
|
|
2395
|
+
Download all models required for an agent with streaming progress.
|
|
2396
|
+
|
|
2397
|
+
This method downloads all models needed by an agent (or all agents)
|
|
2398
|
+
and provides real-time progress updates via SSE streaming.
|
|
2399
|
+
|
|
2400
|
+
Args:
|
|
2401
|
+
agent: Agent name (chat, code, rag, etc.) or "all" for all models
|
|
2402
|
+
timeout: Timeout per model in seconds
|
|
2403
|
+
progress_callback: Optional callback for progress updates.
|
|
2404
|
+
Signature: callback(event_type: str, data: dict) -> None
|
|
2405
|
+
|
|
2406
|
+
Returns:
|
|
2407
|
+
Dict with download results:
|
|
2408
|
+
- success: bool - True if all models downloaded
|
|
2409
|
+
- models: List[Dict] - Status for each model
|
|
2410
|
+
- errors: List[str] - Any error messages
|
|
2411
|
+
|
|
2412
|
+
Example:
|
|
2413
|
+
def on_progress(event_type, data):
|
|
2414
|
+
if event_type == "progress":
|
|
2415
|
+
print(f"{data['file']}: {data['percent']}%")
|
|
2416
|
+
|
|
2417
|
+
result = client.download_agent_models("chat", progress_callback=on_progress)
|
|
2418
|
+
"""
|
|
2419
|
+
model_ids = self.get_required_models(agent)
|
|
2420
|
+
|
|
2421
|
+
if not model_ids:
|
|
2422
|
+
return {
|
|
2423
|
+
"success": True,
|
|
2424
|
+
"models": [],
|
|
2425
|
+
"errors": [],
|
|
2426
|
+
"message": f"No models required for agent '{agent}'",
|
|
2427
|
+
}
|
|
2428
|
+
|
|
2429
|
+
results = {"success": True, "models": [], "errors": []}
|
|
2430
|
+
|
|
2431
|
+
for model_id in model_ids:
|
|
2432
|
+
model_result = {"model_id": model_id, "status": "pending", "skipped": False}
|
|
2433
|
+
|
|
2434
|
+
# Check if already available
|
|
2435
|
+
if self.check_model_available(model_id):
|
|
2436
|
+
model_result["status"] = "already_available"
|
|
2437
|
+
model_result["skipped"] = True
|
|
2438
|
+
results["models"].append(model_result)
|
|
2439
|
+
self.log.info(f"Model {model_id} already available, skipping download")
|
|
2440
|
+
continue
|
|
2441
|
+
|
|
2442
|
+
# Download with streaming
|
|
2443
|
+
try:
|
|
2444
|
+
self.log.info(f"Downloading model: {model_id}")
|
|
2445
|
+
completed = False
|
|
2446
|
+
|
|
2447
|
+
for event in self.pull_model_stream(
|
|
2448
|
+
model_name=model_id,
|
|
2449
|
+
timeout=timeout,
|
|
2450
|
+
progress_callback=progress_callback,
|
|
2451
|
+
):
|
|
2452
|
+
if event.get("event") == "complete":
|
|
2453
|
+
completed = True
|
|
2454
|
+
model_result["status"] = "completed"
|
|
2455
|
+
elif event.get("event") == "error":
|
|
2456
|
+
model_result["status"] = "error"
|
|
2457
|
+
model_result["error"] = event.get("error", "Unknown error")
|
|
2458
|
+
results["errors"].append(f"{model_id}: {model_result['error']}")
|
|
2459
|
+
results["success"] = False
|
|
2460
|
+
|
|
2461
|
+
if not completed and model_result["status"] == "pending":
|
|
2462
|
+
model_result["status"] = "completed" # No explicit complete event
|
|
2463
|
+
|
|
2464
|
+
except LemonadeClientError as e:
|
|
2465
|
+
model_result["status"] = "error"
|
|
2466
|
+
model_result["error"] = str(e)
|
|
2467
|
+
results["errors"].append(f"{model_id}: {e}")
|
|
2468
|
+
results["success"] = False
|
|
2469
|
+
|
|
2470
|
+
results["models"].append(model_result)
|
|
2471
|
+
|
|
2472
|
+
return results
|
|
2473
|
+
|
|
2474
|
+
def check_model_loaded(self, model_id: str) -> bool:
|
|
2475
|
+
"""
|
|
2476
|
+
Check if a specific model is loaded.
|
|
2477
|
+
|
|
2478
|
+
Args:
|
|
2479
|
+
model_id: Model ID to check
|
|
2480
|
+
|
|
2481
|
+
Returns:
|
|
2482
|
+
True if model is loaded, False otherwise
|
|
2483
|
+
"""
|
|
2484
|
+
try:
|
|
2485
|
+
models_response = self.list_models()
|
|
2486
|
+
for model in models_response.get("data", []):
|
|
2487
|
+
if model.get("id", "").lower() == model_id.lower():
|
|
2488
|
+
return True
|
|
2489
|
+
# Also check for partial match
|
|
2490
|
+
if model_id.lower() in model.get("id", "").lower():
|
|
2491
|
+
return True
|
|
2492
|
+
except Exception:
|
|
2493
|
+
pass
|
|
2494
|
+
return False
|
|
2495
|
+
|
|
2496
|
+
def _check_lemonade_installed(self) -> bool:
|
|
2497
|
+
"""
|
|
2498
|
+
Check if lemonade-server is available.
|
|
2499
|
+
|
|
2500
|
+
Checks in this order:
|
|
2501
|
+
1. Try health check on configured URL (LEMONADE_BASE_URL or default)
|
|
2502
|
+
2. If localhost and health check fails, check if binary is in PATH (for auto-start)
|
|
2503
|
+
3. If remote server and health check fails, return False (can't auto-start)
|
|
2504
|
+
|
|
2505
|
+
Returns:
|
|
2506
|
+
True if server is available or can be started, False otherwise
|
|
2507
|
+
"""
|
|
2508
|
+
# First, always try health check to see if server is already running
|
|
2509
|
+
try:
|
|
2510
|
+
health = self.health_check()
|
|
2511
|
+
if health.get("status") == "ok":
|
|
2512
|
+
return True
|
|
2513
|
+
except Exception:
|
|
2514
|
+
pass
|
|
2515
|
+
|
|
2516
|
+
# Health check failed - determine if we can auto-start
|
|
2517
|
+
is_localhost = self.host in ("localhost", "127.0.0.1", "::1")
|
|
2518
|
+
|
|
2519
|
+
if is_localhost:
|
|
2520
|
+
# Local server not running - check if binary is installed for auto-start
|
|
2521
|
+
return shutil.which("lemonade-server") is not None
|
|
2522
|
+
else:
|
|
2523
|
+
# Remote server not responding and we can't auto-start it
|
|
2524
|
+
return False
|
|
2525
|
+
|
|
2526
|
+
def get_lemonade_version(self) -> Optional[str]:
|
|
2527
|
+
"""
|
|
2528
|
+
Get the installed lemonade-server version.
|
|
2529
|
+
|
|
2530
|
+
Returns:
|
|
2531
|
+
Version string (e.g., "8.2.2") or None if unable to determine
|
|
2532
|
+
"""
|
|
2533
|
+
try:
|
|
2534
|
+
result = subprocess.run(
|
|
2535
|
+
["lemonade-server", "--version"],
|
|
2536
|
+
capture_output=True,
|
|
2537
|
+
text=True,
|
|
2538
|
+
timeout=5,
|
|
2539
|
+
check=False, # We handle errors by checking the output
|
|
2540
|
+
)
|
|
2541
|
+
|
|
2542
|
+
# Combine stdout and stderr to get complete output
|
|
2543
|
+
full_output = result.stdout + result.stderr
|
|
2544
|
+
|
|
2545
|
+
# Extract version number using regex (e.g., "8.2.2")
|
|
2546
|
+
version_match = re.search(r"(\d+\.\d+(?:\.\d+)?)", full_output)
|
|
2547
|
+
if version_match:
|
|
2548
|
+
return version_match.group(1)
|
|
2549
|
+
|
|
2550
|
+
return None
|
|
2551
|
+
|
|
2552
|
+
except Exception:
|
|
2553
|
+
return None
|
|
2554
|
+
|
|
2555
|
+
def _check_version_compatibility(
|
|
2556
|
+
self, expected_version: str, quiet: bool = False
|
|
2557
|
+
) -> bool:
|
|
2558
|
+
"""
|
|
2559
|
+
Check if the installed lemonade-server version is compatible.
|
|
2560
|
+
|
|
2561
|
+
Checks only the major version for compatibility.
|
|
2562
|
+
|
|
2563
|
+
Args:
|
|
2564
|
+
expected_version: Expected version string (e.g., "8.2.2")
|
|
2565
|
+
quiet: Suppress warning output
|
|
2566
|
+
|
|
2567
|
+
Returns:
|
|
2568
|
+
True if compatible (or version check failed), False if incompatible major version
|
|
2569
|
+
"""
|
|
2570
|
+
actual_version = self.get_lemonade_version()
|
|
2571
|
+
|
|
2572
|
+
if not actual_version:
|
|
2573
|
+
# Can't determine version, assume compatible (don't block)
|
|
2574
|
+
return True
|
|
2575
|
+
|
|
2576
|
+
try:
|
|
2577
|
+
# Parse versions
|
|
2578
|
+
expected_parts = expected_version.split(".")
|
|
2579
|
+
actual_parts = actual_version.split(".")
|
|
2580
|
+
|
|
2581
|
+
expected_major = int(expected_parts[0])
|
|
2582
|
+
actual_major = int(actual_parts[0])
|
|
2583
|
+
|
|
2584
|
+
if expected_major != actual_major:
|
|
2585
|
+
if not quiet:
|
|
2586
|
+
print("")
|
|
2587
|
+
print(
|
|
2588
|
+
f"{_emoji('⚠️', '[WARN]')} Lemonade Server version mismatch detected!"
|
|
2589
|
+
)
|
|
2590
|
+
print(f" Expected major version: {expected_major}.x.x")
|
|
2591
|
+
print(f" Installed version: {actual_version}")
|
|
2592
|
+
print("")
|
|
2593
|
+
print(
|
|
2594
|
+
" This may cause compatibility issues. "
|
|
2595
|
+
f"Please install Lemonade Server {expected_version}:"
|
|
2596
|
+
)
|
|
2597
|
+
print(" https://lemonade-server.ai")
|
|
2598
|
+
print("")
|
|
2599
|
+
|
|
2600
|
+
return False
|
|
2601
|
+
|
|
2602
|
+
return True
|
|
2603
|
+
|
|
2604
|
+
except Exception:
|
|
2605
|
+
# If parsing fails, assume compatible (don't block)
|
|
2606
|
+
return True
|
|
2607
|
+
|
|
2608
|
+
def initialize(
|
|
2609
|
+
self,
|
|
2610
|
+
agent: str = "mcp",
|
|
2611
|
+
ctx_size: Optional[int] = None,
|
|
2612
|
+
auto_start: bool = True,
|
|
2613
|
+
timeout: int = 120,
|
|
2614
|
+
verbose: bool = False, # pylint: disable=unused-argument
|
|
2615
|
+
quiet: bool = False,
|
|
2616
|
+
) -> LemonadeStatus:
|
|
2617
|
+
"""
|
|
2618
|
+
Initialize Lemonade Server for a specific agent.
|
|
2619
|
+
|
|
2620
|
+
This method:
|
|
2621
|
+
1. Checks if lemonade-server is installed
|
|
2622
|
+
2. Checks if server is running (health endpoint)
|
|
2623
|
+
3. Auto-starts with ctx-size=32768 if not running
|
|
2624
|
+
4. Validates context size and shows warning if too small
|
|
2625
|
+
|
|
2626
|
+
With auto-download enabled, models are downloaded on-demand when needed,
|
|
2627
|
+
so we don't validate model availability during initialization.
|
|
2628
|
+
|
|
2629
|
+
Args:
|
|
2630
|
+
agent: Agent name (chat, code, rag, talk, blender, jira, docker, vlm, minimal, mcp)
|
|
2631
|
+
ctx_size: Override context size (default: 32768 for most agents)
|
|
2632
|
+
auto_start: Automatically start server if not running
|
|
2633
|
+
timeout: Timeout in seconds for server startup
|
|
2634
|
+
verbose: Enable verbose output
|
|
2635
|
+
quiet: Suppress output (only errors)
|
|
2636
|
+
|
|
2637
|
+
Returns:
|
|
2638
|
+
LemonadeStatus with server status and loaded models
|
|
2639
|
+
|
|
2640
|
+
Example:
|
|
2641
|
+
client = LemonadeClient()
|
|
2642
|
+
status = client.initialize(agent="chat")
|
|
2643
|
+
|
|
2644
|
+
# Initialize with custom context size
|
|
2645
|
+
status = client.initialize(agent="code", ctx_size=65536)
|
|
2646
|
+
"""
|
|
2647
|
+
profile = self.get_agent_profile(agent)
|
|
2648
|
+
if not profile:
|
|
2649
|
+
if not quiet:
|
|
2650
|
+
print(
|
|
2651
|
+
f"{_emoji('⚠️', '[WARN]')} Unknown agent '{agent}', using 'mcp' profile"
|
|
2652
|
+
)
|
|
2653
|
+
profile = AGENT_PROFILES["mcp"]
|
|
2654
|
+
|
|
2655
|
+
# Use 32768 as default context size for all agents (suitable for most tasks)
|
|
2656
|
+
# User can override with ctx_size parameter if needed
|
|
2657
|
+
required_ctx = ctx_size or 32768
|
|
2658
|
+
|
|
2659
|
+
if not quiet:
|
|
2660
|
+
print(f"🍋 Initializing Lemonade for {profile.display_name}")
|
|
2661
|
+
print(f" Context size: {required_ctx}")
|
|
2662
|
+
|
|
2663
|
+
# Check if lemonade-server is installed
|
|
2664
|
+
if not self._check_lemonade_installed():
|
|
2665
|
+
if not quiet:
|
|
2666
|
+
print(f"{_emoji('❌', '[ERROR]')} Lemonade Server is not installed")
|
|
2667
|
+
print("")
|
|
2668
|
+
print(f"{_emoji('📥', '[DOWNLOAD]')} Download and install from:")
|
|
2669
|
+
print(" https://lemonade-server.ai")
|
|
2670
|
+
print("")
|
|
2671
|
+
print("GAIA will automatically start Lemonade Server once installed.")
|
|
2672
|
+
print("")
|
|
2673
|
+
status = LemonadeStatus(url=f"http://{self.host}:{self.port}")
|
|
2674
|
+
status.running = False
|
|
2675
|
+
status.error = "Lemonade Server not installed"
|
|
2676
|
+
return status
|
|
2677
|
+
|
|
2678
|
+
# Check version compatibility (warning only, not fatal)
|
|
2679
|
+
from gaia.version import LEMONADE_VERSION
|
|
2680
|
+
|
|
2681
|
+
self._check_version_compatibility(LEMONADE_VERSION, quiet=quiet)
|
|
2682
|
+
|
|
2683
|
+
# Check current status
|
|
2684
|
+
status = self.get_status()
|
|
2685
|
+
|
|
2686
|
+
if status.running:
|
|
2687
|
+
if not quiet:
|
|
2688
|
+
print("✅ Lemonade Server is running")
|
|
2689
|
+
print(f" Current context size: {status.context_size}")
|
|
2690
|
+
|
|
2691
|
+
# Check context size (warning only, not fatal)
|
|
2692
|
+
if status.context_size < required_ctx:
|
|
2693
|
+
if not quiet:
|
|
2694
|
+
print("")
|
|
2695
|
+
print(
|
|
2696
|
+
f"{_emoji('⚠️', '[WARN]')} Context size ({status.context_size}) "
|
|
2697
|
+
f"is less than recommended ({required_ctx})"
|
|
2698
|
+
)
|
|
2699
|
+
print(
|
|
2700
|
+
f" For better performance, restart with: "
|
|
2701
|
+
f"lemonade-server serve --ctx-size {required_ctx}"
|
|
2702
|
+
)
|
|
2703
|
+
print("")
|
|
2704
|
+
|
|
2705
|
+
return status
|
|
2706
|
+
|
|
2707
|
+
# Server not running
|
|
2708
|
+
if not auto_start:
|
|
2709
|
+
if not quiet:
|
|
2710
|
+
print(f"{_emoji('❌', '[ERROR]')} Lemonade Server is not running")
|
|
2711
|
+
print(f" Start with: lemonade-server serve --ctx-size {required_ctx}")
|
|
2712
|
+
status.error = "Server not running"
|
|
2713
|
+
return status
|
|
2714
|
+
|
|
2715
|
+
# Auto-start server
|
|
2716
|
+
if not quiet:
|
|
2717
|
+
print(
|
|
2718
|
+
f"{_emoji('🚀', '[START]')} Starting Lemonade Server "
|
|
2719
|
+
f"with ctx-size={required_ctx}..."
|
|
2720
|
+
)
|
|
2721
|
+
|
|
2722
|
+
try:
|
|
2723
|
+
self.launch_server(ctx_size=required_ctx, background="terminal")
|
|
2724
|
+
|
|
2725
|
+
# Wait for server to be ready
|
|
2726
|
+
start_time = time.time()
|
|
2727
|
+
while time.time() - start_time < timeout:
|
|
2728
|
+
try:
|
|
2729
|
+
health = self.health_check()
|
|
2730
|
+
if health.get("status") == "ok":
|
|
2731
|
+
if not quiet:
|
|
2732
|
+
print(
|
|
2733
|
+
f"{_emoji('✅', '[OK]')} Lemonade Server started successfully"
|
|
2734
|
+
)
|
|
2735
|
+
status = self.get_status()
|
|
2736
|
+
status.running = True
|
|
2737
|
+
return status
|
|
2738
|
+
except Exception:
|
|
2739
|
+
pass
|
|
2740
|
+
time.sleep(2)
|
|
2741
|
+
|
|
2742
|
+
if not quiet:
|
|
2743
|
+
print(f"{_emoji('❌', '[ERROR]')} Failed to start Lemonade Server")
|
|
2744
|
+
status.error = "Failed to start server"
|
|
2745
|
+
except Exception as e:
|
|
2746
|
+
self.log.error(f"Failed to start server: {e}")
|
|
2747
|
+
if not quiet:
|
|
2748
|
+
print(f"{_emoji('❌', '[ERROR]')} Failed to start Lemonade Server: {e}")
|
|
2749
|
+
status.error = str(e)
|
|
2750
|
+
|
|
2751
|
+
return status
|
|
2752
|
+
|
|
2753
|
+
def _send_request(
|
|
2754
|
+
self,
|
|
2755
|
+
method: str,
|
|
2756
|
+
url: str,
|
|
2757
|
+
data: Optional[Dict[str, Any]] = None,
|
|
2758
|
+
timeout: int = DEFAULT_REQUEST_TIMEOUT,
|
|
2759
|
+
) -> Dict[str, Any]:
|
|
2760
|
+
"""
|
|
2761
|
+
Send a request to the server and return the response.
|
|
2762
|
+
|
|
2763
|
+
Args:
|
|
2764
|
+
method: HTTP method (get, post, etc.)
|
|
2765
|
+
url: URL to send the request to
|
|
2766
|
+
data: Request payload
|
|
2767
|
+
timeout: Request timeout in seconds
|
|
2768
|
+
|
|
2769
|
+
Returns:
|
|
2770
|
+
Response as a dict
|
|
2771
|
+
|
|
2772
|
+
Raises:
|
|
2773
|
+
LemonadeClientError: If the request fails
|
|
2774
|
+
"""
|
|
2775
|
+
try:
|
|
2776
|
+
headers = {"Content-Type": "application/json"}
|
|
2777
|
+
|
|
2778
|
+
if method.lower() == "get":
|
|
2779
|
+
response = requests.get(url, headers=headers, timeout=timeout)
|
|
2780
|
+
elif method.lower() == "post":
|
|
2781
|
+
response = requests.post(
|
|
2782
|
+
url, json=data, headers=headers, timeout=timeout
|
|
2783
|
+
)
|
|
2784
|
+
else:
|
|
2785
|
+
raise LemonadeClientError(f"Unsupported HTTP method: {method}")
|
|
2786
|
+
|
|
2787
|
+
if response.status_code >= 400:
|
|
2788
|
+
raise LemonadeClientError(
|
|
2789
|
+
f"Request failed with status {response.status_code}: {response.text}"
|
|
2790
|
+
)
|
|
2791
|
+
|
|
2792
|
+
return response.json()
|
|
2793
|
+
|
|
2794
|
+
except requests.exceptions.RequestException as e:
|
|
2795
|
+
raise LemonadeClientError(f"Request failed: {str(e)}")
|
|
2796
|
+
except json.JSONDecodeError:
|
|
2797
|
+
raise LemonadeClientError(
|
|
2798
|
+
f"Failed to parse response as JSON: {response.text}"
|
|
2799
|
+
)
|
|
2800
|
+
|
|
2801
|
+
|
|
2802
|
+
def create_lemonade_client(
|
|
2803
|
+
model: Optional[str] = None,
|
|
2804
|
+
host: Optional[str] = None,
|
|
2805
|
+
port: Optional[int] = None,
|
|
2806
|
+
auto_start: bool = False,
|
|
2807
|
+
auto_load: bool = False,
|
|
2808
|
+
auto_pull: bool = True,
|
|
2809
|
+
verbose: bool = True,
|
|
2810
|
+
background: str = "terminal",
|
|
2811
|
+
keep_alive: bool = False,
|
|
2812
|
+
) -> LemonadeClient:
|
|
2813
|
+
"""
|
|
2814
|
+
Factory function to create and configure a LemonadeClient instance.
|
|
2815
|
+
|
|
2816
|
+
This function provides a simplified way to create a LemonadeClient instance
|
|
2817
|
+
with proper configuration from environment variables and/or explicit parameters.
|
|
2818
|
+
|
|
2819
|
+
Args:
|
|
2820
|
+
model: Name of the model to use
|
|
2821
|
+
(defaults to env var LEMONADE_MODEL or DEFAULT_MODEL_NAME)
|
|
2822
|
+
host: Host address for the Lemonade server
|
|
2823
|
+
(defaults to env var LEMONADE_HOST or DEFAULT_HOST)
|
|
2824
|
+
port: Port number for the Lemonade server
|
|
2825
|
+
(defaults to env var LEMONADE_PORT or DEFAULT_PORT)
|
|
2826
|
+
auto_start: Automatically start the server
|
|
2827
|
+
auto_load: Automatically load the model
|
|
2828
|
+
auto_pull: Whether to automatically pull the model if it's not available
|
|
2829
|
+
(when auto_load=True)
|
|
2830
|
+
verbose: Whether to enable verbose logging
|
|
2831
|
+
background: How to run the server if auto_start is True:
|
|
2832
|
+
- "terminal": Launch in a new terminal window (default)
|
|
2833
|
+
- "silent": Run in background with output to log file
|
|
2834
|
+
- "none": Run in foreground
|
|
2835
|
+
keep_alive: If True, don't terminate server when client is deleted
|
|
2836
|
+
|
|
2837
|
+
Returns:
|
|
2838
|
+
A configured LemonadeClient instance
|
|
2839
|
+
"""
|
|
2840
|
+
# Get configuration from environment variables with fallbacks to defaults
|
|
2841
|
+
env_model = os.environ.get("LEMONADE_MODEL")
|
|
2842
|
+
env_host = os.environ.get("LEMONADE_HOST")
|
|
2843
|
+
env_port = os.environ.get("LEMONADE_PORT")
|
|
2844
|
+
|
|
2845
|
+
# Prioritize explicit parameters over environment variables over defaults
|
|
2846
|
+
model_name = model or env_model or DEFAULT_MODEL_NAME
|
|
2847
|
+
server_host = host or env_host or DEFAULT_HOST
|
|
2848
|
+
server_port = port or (int(env_port) if env_port else DEFAULT_PORT)
|
|
2849
|
+
|
|
2850
|
+
# Create the client
|
|
2851
|
+
client = LemonadeClient(
|
|
2852
|
+
model=model_name,
|
|
2853
|
+
host=server_host,
|
|
2854
|
+
port=server_port,
|
|
2855
|
+
verbose=verbose,
|
|
2856
|
+
keep_alive=keep_alive,
|
|
2857
|
+
)
|
|
2858
|
+
|
|
2859
|
+
# Auto-start server if requested
|
|
2860
|
+
if auto_start:
|
|
2861
|
+
try:
|
|
2862
|
+
# Check if server is already running
|
|
2863
|
+
try:
|
|
2864
|
+
client.health_check()
|
|
2865
|
+
client.log.info("Lemonade server is already running")
|
|
2866
|
+
except LemonadeClientError:
|
|
2867
|
+
# Server not running, start it
|
|
2868
|
+
client.log.info(
|
|
2869
|
+
f"Starting Lemonade server at {server_host}:{server_port}"
|
|
2870
|
+
)
|
|
2871
|
+
client.launch_server(background=background)
|
|
2872
|
+
|
|
2873
|
+
# Perform a health check to verify the server is running
|
|
2874
|
+
client.health_check()
|
|
2875
|
+
except Exception as e:
|
|
2876
|
+
client.log.error(f"Failed to start Lemonade server: {str(e)}")
|
|
2877
|
+
raise LemonadeClientError(f"Failed to start Lemonade server: {str(e)}")
|
|
2878
|
+
|
|
2879
|
+
# Auto-load model if requested
|
|
2880
|
+
if auto_load:
|
|
2881
|
+
try:
|
|
2882
|
+
# Check if auto_pull is enabled and model needs to be pulled first
|
|
2883
|
+
if auto_pull:
|
|
2884
|
+
# Check if model is available
|
|
2885
|
+
models_response = client.list_models()
|
|
2886
|
+
available_models = [
|
|
2887
|
+
model.get("id", "") for model in models_response.get("data", [])
|
|
2888
|
+
]
|
|
2889
|
+
|
|
2890
|
+
if model_name not in available_models:
|
|
2891
|
+
client.log.info(
|
|
2892
|
+
f"Model '{model_name}' not found in registry. "
|
|
2893
|
+
f"Available models: {available_models}"
|
|
2894
|
+
)
|
|
2895
|
+
client.log.info(
|
|
2896
|
+
f"Attempting to pull model '{model_name}' before loading..."
|
|
2897
|
+
)
|
|
2898
|
+
|
|
2899
|
+
try:
|
|
2900
|
+
# Try to pull the model first
|
|
2901
|
+
pull_result = client.pull_model(
|
|
2902
|
+
model_name, timeout=300
|
|
2903
|
+
) # 5 min timeout for download
|
|
2904
|
+
client.log.info(f"Successfully pulled model: {pull_result}")
|
|
2905
|
+
except Exception as pull_error:
|
|
2906
|
+
client.log.warning(
|
|
2907
|
+
f"Failed to pull model '{model_name}': {pull_error}"
|
|
2908
|
+
)
|
|
2909
|
+
client.log.info(
|
|
2910
|
+
"Proceeding with load anyway - server may auto-install"
|
|
2911
|
+
)
|
|
2912
|
+
else:
|
|
2913
|
+
client.log.info(
|
|
2914
|
+
f"Model '{model_name}' found in registry, proceeding with load"
|
|
2915
|
+
)
|
|
2916
|
+
|
|
2917
|
+
# Now attempt to load the model
|
|
2918
|
+
client.load_model(model_name, timeout=60)
|
|
2919
|
+
except Exception as e:
|
|
2920
|
+
# Extract detailed error information
|
|
2921
|
+
error_details = str(e)
|
|
2922
|
+
client.log.error(f"Failed to load {model_name}: {error_details}")
|
|
2923
|
+
|
|
2924
|
+
# Try to get more details about available models for debugging
|
|
2925
|
+
try:
|
|
2926
|
+
models_response = client.list_models()
|
|
2927
|
+
available_models = [
|
|
2928
|
+
model.get("id", "unknown")
|
|
2929
|
+
for model in models_response.get("data", [])
|
|
2930
|
+
]
|
|
2931
|
+
client.log.error(f"Available models: {available_models}")
|
|
2932
|
+
client.log.error(f"Attempted to load: {model_name}")
|
|
2933
|
+
if available_models:
|
|
2934
|
+
client.log.error(
|
|
2935
|
+
"Consider using one of the available models instead"
|
|
2936
|
+
)
|
|
2937
|
+
except Exception as list_error:
|
|
2938
|
+
client.log.error(f"Could not list available models: {list_error}")
|
|
2939
|
+
|
|
2940
|
+
# Include both original error and context in the raised exception
|
|
2941
|
+
enhanced_message = f"Failed to load {model_name}: {error_details}"
|
|
2942
|
+
if "available_models" in locals() and available_models:
|
|
2943
|
+
enhanced_message += f" (Available models: {available_models})"
|
|
2944
|
+
|
|
2945
|
+
raise LemonadeClientError(enhanced_message)
|
|
2946
|
+
|
|
2947
|
+
return client
|
|
2948
|
+
|
|
2949
|
+
|
|
2950
|
+
def initialize_lemonade(
|
|
2951
|
+
agent: str = "mcp",
|
|
2952
|
+
ctx_size: Optional[int] = None,
|
|
2953
|
+
auto_start: bool = True,
|
|
2954
|
+
timeout: int = 120,
|
|
2955
|
+
verbose: bool = False,
|
|
2956
|
+
quiet: bool = False,
|
|
2957
|
+
host: str = DEFAULT_HOST,
|
|
2958
|
+
port: int = DEFAULT_PORT,
|
|
2959
|
+
) -> LemonadeStatus:
|
|
2960
|
+
"""
|
|
2961
|
+
Convenience function to initialize Lemonade Server.
|
|
2962
|
+
|
|
2963
|
+
This is a simplified interface for initializing Lemonade with agent-specific
|
|
2964
|
+
profiles. It creates a temporary client and runs initialization.
|
|
2965
|
+
|
|
2966
|
+
Args:
|
|
2967
|
+
agent: Agent name (chat, code, rag, talk, blender, jira, docker, vlm, minimal, mcp)
|
|
2968
|
+
ctx_size: Override context size
|
|
2969
|
+
auto_start: Automatically start server if not running
|
|
2970
|
+
timeout: Timeout for server startup
|
|
2971
|
+
verbose: Enable verbose output
|
|
2972
|
+
quiet: Suppress output
|
|
2973
|
+
host: Lemonade server host
|
|
2974
|
+
port: Lemonade server port
|
|
2975
|
+
|
|
2976
|
+
Returns:
|
|
2977
|
+
LemonadeStatus with server status
|
|
2978
|
+
|
|
2979
|
+
Example:
|
|
2980
|
+
from gaia.llm.lemonade_client import initialize_lemonade
|
|
2981
|
+
|
|
2982
|
+
# Initialize for chat agent
|
|
2983
|
+
status = initialize_lemonade(agent="chat")
|
|
2984
|
+
|
|
2985
|
+
# Initialize for code agent with larger context
|
|
2986
|
+
status = initialize_lemonade(agent="code", ctx_size=65536)
|
|
2987
|
+
"""
|
|
2988
|
+
client = LemonadeClient(host=host, port=port, keep_alive=True)
|
|
2989
|
+
return client.initialize(
|
|
2990
|
+
agent=agent,
|
|
2991
|
+
ctx_size=ctx_size,
|
|
2992
|
+
auto_start=auto_start,
|
|
2993
|
+
timeout=timeout,
|
|
2994
|
+
verbose=verbose,
|
|
2995
|
+
quiet=quiet,
|
|
2996
|
+
)
|
|
2997
|
+
|
|
2998
|
+
|
|
2999
|
+
def print_agent_profiles():
|
|
3000
|
+
"""Print all available agent profiles and their requirements."""
|
|
3001
|
+
print("\n📋 Available Agent Profiles:\n")
|
|
3002
|
+
print(f"{'Agent':<12} {'Display Name':<20} {'Context Size':<15} {'Models'}")
|
|
3003
|
+
print("-" * 80)
|
|
3004
|
+
|
|
3005
|
+
for name, profile in AGENT_PROFILES.items():
|
|
3006
|
+
models = ", ".join(profile.models) if profile.models else "None"
|
|
3007
|
+
print(
|
|
3008
|
+
f"{name:<12} {profile.display_name:<20} {profile.min_ctx_size:<15} {models}"
|
|
3009
|
+
)
|
|
3010
|
+
|
|
3011
|
+
print("\n📦 Available Models:\n")
|
|
3012
|
+
print(f"{'Key':<20} {'Model ID':<40} {'Type'}")
|
|
3013
|
+
print("-" * 80)
|
|
3014
|
+
|
|
3015
|
+
for key, model in MODELS.items():
|
|
3016
|
+
print(f"{key:<20} {model.model_id:<40} {model.model_type.value}")
|
|
3017
|
+
|
|
3018
|
+
|
|
3019
|
+
if __name__ == "__main__":
|
|
3020
|
+
logging.basicConfig(level=logging.INFO)
|
|
3021
|
+
|
|
3022
|
+
# Show agent profiles
|
|
3023
|
+
print_agent_profiles()
|
|
3024
|
+
print("\n" + "=" * 80 + "\n")
|
|
3025
|
+
|
|
3026
|
+
# Use the new factory function instead of direct instantiation
|
|
3027
|
+
client = create_lemonade_client(
|
|
3028
|
+
model=DEFAULT_MODEL_NAME,
|
|
3029
|
+
auto_start=True,
|
|
3030
|
+
auto_load=True,
|
|
3031
|
+
verbose=True,
|
|
3032
|
+
)
|
|
3033
|
+
|
|
3034
|
+
try:
|
|
3035
|
+
# Check server health
|
|
3036
|
+
try:
|
|
3037
|
+
health = client.health_check()
|
|
3038
|
+
print(f"Server health: {health}")
|
|
3039
|
+
except Exception as e:
|
|
3040
|
+
print(f"Health check failed: {e}")
|
|
3041
|
+
|
|
3042
|
+
# List available models
|
|
3043
|
+
try:
|
|
3044
|
+
print("\nListing available models:")
|
|
3045
|
+
models_list = client.list_models()
|
|
3046
|
+
print(json.dumps(models_list, indent=2))
|
|
3047
|
+
except Exception as e:
|
|
3048
|
+
print(f"Failed to list models: {e}")
|
|
3049
|
+
|
|
3050
|
+
# Example: Using chat completions
|
|
3051
|
+
messages = [
|
|
3052
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
3053
|
+
{"role": "user", "content": "What is the capital of France?"},
|
|
3054
|
+
]
|
|
3055
|
+
|
|
3056
|
+
try:
|
|
3057
|
+
print("\nNon-streaming response:")
|
|
3058
|
+
response = client.chat_completions(
|
|
3059
|
+
model=DEFAULT_MODEL_NAME, messages=messages, timeout=30
|
|
3060
|
+
)
|
|
3061
|
+
print(response["choices"][0]["message"]["content"])
|
|
3062
|
+
except Exception as e:
|
|
3063
|
+
print(f"Chat completion failed: {e}")
|
|
3064
|
+
|
|
3065
|
+
try:
|
|
3066
|
+
print("\nStreaming response:")
|
|
3067
|
+
for chunk in client.chat_completions(
|
|
3068
|
+
model=DEFAULT_MODEL_NAME, messages=messages, stream=True, timeout=30
|
|
3069
|
+
):
|
|
3070
|
+
if "choices" in chunk and chunk["choices"][0].get("delta", {}).get(
|
|
3071
|
+
"content"
|
|
3072
|
+
):
|
|
3073
|
+
print(chunk["choices"][0]["delta"]["content"], end="", flush=True)
|
|
3074
|
+
except Exception as e:
|
|
3075
|
+
print(f"Streaming chat completion failed: {e}")
|
|
3076
|
+
|
|
3077
|
+
print("\n\nDone!")
|
|
3078
|
+
|
|
3079
|
+
except Exception as e:
|
|
3080
|
+
print(f"Error occurred: {e}")
|
|
3081
|
+
finally:
|
|
3082
|
+
# Make sure to terminate the server when done
|
|
3083
|
+
client.terminate_server()
|