amd-gaia 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amd_gaia-0.14.1.dist-info/METADATA +768 -0
- amd_gaia-0.14.1.dist-info/RECORD +800 -0
- amd_gaia-0.14.1.dist-info/WHEEL +5 -0
- amd_gaia-0.14.1.dist-info/entry_points.txt +5 -0
- amd_gaia-0.14.1.dist-info/licenses/LICENSE.md +21 -0
- amd_gaia-0.14.1.dist-info/top_level.txt +1 -0
- gaia/__init__.py +2 -0
- gaia/agents/__init__.py +19 -0
- gaia/agents/base/__init__.py +9 -0
- gaia/agents/base/agent.py +2072 -0
- gaia/agents/base/api_agent.py +120 -0
- gaia/agents/base/console.py +1457 -0
- gaia/agents/base/mcp_agent.py +86 -0
- gaia/agents/base/tools.py +83 -0
- gaia/agents/blender/agent.py +556 -0
- gaia/agents/blender/agent_simple.py +135 -0
- gaia/agents/blender/app.py +211 -0
- gaia/agents/blender/app_simple.py +41 -0
- gaia/agents/blender/core/__init__.py +16 -0
- gaia/agents/blender/core/materials.py +506 -0
- gaia/agents/blender/core/objects.py +316 -0
- gaia/agents/blender/core/rendering.py +225 -0
- gaia/agents/blender/core/scene.py +220 -0
- gaia/agents/blender/core/view.py +146 -0
- gaia/agents/chat/__init__.py +9 -0
- gaia/agents/chat/agent.py +975 -0
- gaia/agents/chat/app.py +1058 -0
- gaia/agents/chat/session.py +508 -0
- gaia/agents/chat/tools/__init__.py +15 -0
- gaia/agents/chat/tools/file_tools.py +96 -0
- gaia/agents/chat/tools/rag_tools.py +1729 -0
- gaia/agents/chat/tools/shell_tools.py +436 -0
- gaia/agents/code/__init__.py +7 -0
- gaia/agents/code/agent.py +547 -0
- gaia/agents/code/app.py +266 -0
- gaia/agents/code/models.py +135 -0
- gaia/agents/code/orchestration/__init__.py +24 -0
- gaia/agents/code/orchestration/checklist_executor.py +1739 -0
- gaia/agents/code/orchestration/checklist_generator.py +709 -0
- gaia/agents/code/orchestration/factories/__init__.py +9 -0
- gaia/agents/code/orchestration/factories/base.py +63 -0
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -0
- gaia/agents/code/orchestration/factories/python_factory.py +106 -0
- gaia/agents/code/orchestration/orchestrator.py +610 -0
- gaia/agents/code/orchestration/project_analyzer.py +391 -0
- gaia/agents/code/orchestration/steps/__init__.py +67 -0
- gaia/agents/code/orchestration/steps/base.py +188 -0
- gaia/agents/code/orchestration/steps/error_handler.py +314 -0
- gaia/agents/code/orchestration/steps/nextjs.py +828 -0
- gaia/agents/code/orchestration/steps/python.py +307 -0
- gaia/agents/code/orchestration/template_catalog.py +463 -0
- gaia/agents/code/orchestration/workflows/__init__.py +14 -0
- gaia/agents/code/orchestration/workflows/base.py +80 -0
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -0
- gaia/agents/code/orchestration/workflows/python.py +94 -0
- gaia/agents/code/prompts/__init__.py +11 -0
- gaia/agents/code/prompts/base_prompt.py +77 -0
- gaia/agents/code/prompts/code_patterns.py +1925 -0
- gaia/agents/code/prompts/nextjs_prompt.py +40 -0
- gaia/agents/code/prompts/python_prompt.py +109 -0
- gaia/agents/code/schema_inference.py +365 -0
- gaia/agents/code/system_prompt.py +41 -0
- gaia/agents/code/tools/__init__.py +42 -0
- gaia/agents/code/tools/cli_tools.py +1138 -0
- gaia/agents/code/tools/code_formatting.py +319 -0
- gaia/agents/code/tools/code_tools.py +769 -0
- gaia/agents/code/tools/error_fixing.py +1347 -0
- gaia/agents/code/tools/external_tools.py +180 -0
- gaia/agents/code/tools/file_io.py +845 -0
- gaia/agents/code/tools/prisma_tools.py +190 -0
- gaia/agents/code/tools/project_management.py +1016 -0
- gaia/agents/code/tools/testing.py +321 -0
- gaia/agents/code/tools/typescript_tools.py +122 -0
- gaia/agents/code/tools/validation_parsing.py +461 -0
- gaia/agents/code/tools/validation_tools.py +803 -0
- gaia/agents/code/tools/web_dev_tools.py +1744 -0
- gaia/agents/code/validators/__init__.py +16 -0
- gaia/agents/code/validators/antipattern_checker.py +241 -0
- gaia/agents/code/validators/ast_analyzer.py +197 -0
- gaia/agents/code/validators/requirements_validator.py +145 -0
- gaia/agents/code/validators/syntax_validator.py +171 -0
- gaia/agents/docker/__init__.py +7 -0
- gaia/agents/docker/agent.py +642 -0
- gaia/agents/jira/__init__.py +11 -0
- gaia/agents/jira/agent.py +894 -0
- gaia/agents/jira/jql_templates.py +299 -0
- gaia/agents/routing/__init__.py +7 -0
- gaia/agents/routing/agent.py +512 -0
- gaia/agents/routing/system_prompt.py +75 -0
- gaia/api/__init__.py +23 -0
- gaia/api/agent_registry.py +238 -0
- gaia/api/app.py +305 -0
- gaia/api/openai_server.py +575 -0
- gaia/api/schemas.py +186 -0
- gaia/api/sse_handler.py +370 -0
- gaia/apps/__init__.py +4 -0
- gaia/apps/llm/__init__.py +6 -0
- gaia/apps/llm/app.py +169 -0
- gaia/apps/summarize/app.py +633 -0
- gaia/apps/summarize/html_viewer.py +133 -0
- gaia/apps/summarize/pdf_formatter.py +284 -0
- gaia/audio/__init__.py +2 -0
- gaia/audio/audio_client.py +439 -0
- gaia/audio/audio_recorder.py +269 -0
- gaia/audio/kokoro_tts.py +599 -0
- gaia/audio/whisper_asr.py +432 -0
- gaia/chat/__init__.py +16 -0
- gaia/chat/app.py +430 -0
- gaia/chat/prompts.py +522 -0
- gaia/chat/sdk.py +1200 -0
- gaia/cli.py +5621 -0
- gaia/eval/batch_experiment.py +2332 -0
- gaia/eval/claude.py +542 -0
- gaia/eval/config.py +37 -0
- gaia/eval/email_generator.py +512 -0
- gaia/eval/eval.py +3179 -0
- gaia/eval/groundtruth.py +1130 -0
- gaia/eval/transcript_generator.py +582 -0
- gaia/eval/webapp/README.md +168 -0
- gaia/eval/webapp/node_modules/.bin/mime +16 -0
- gaia/eval/webapp/node_modules/.bin/mime.cmd +17 -0
- gaia/eval/webapp/node_modules/.bin/mime.ps1 +28 -0
- gaia/eval/webapp/node_modules/.package-lock.json +865 -0
- gaia/eval/webapp/node_modules/accepts/HISTORY.md +243 -0
- gaia/eval/webapp/node_modules/accepts/LICENSE +23 -0
- gaia/eval/webapp/node_modules/accepts/README.md +140 -0
- gaia/eval/webapp/node_modules/accepts/index.js +238 -0
- gaia/eval/webapp/node_modules/accepts/package.json +47 -0
- gaia/eval/webapp/node_modules/array-flatten/LICENSE +21 -0
- gaia/eval/webapp/node_modules/array-flatten/README.md +43 -0
- gaia/eval/webapp/node_modules/array-flatten/array-flatten.js +64 -0
- gaia/eval/webapp/node_modules/array-flatten/package.json +39 -0
- gaia/eval/webapp/node_modules/body-parser/HISTORY.md +672 -0
- gaia/eval/webapp/node_modules/body-parser/LICENSE +23 -0
- gaia/eval/webapp/node_modules/body-parser/README.md +476 -0
- gaia/eval/webapp/node_modules/body-parser/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/body-parser/index.js +156 -0
- gaia/eval/webapp/node_modules/body-parser/lib/read.js +205 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/json.js +247 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/raw.js +101 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/text.js +121 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/urlencoded.js +307 -0
- gaia/eval/webapp/node_modules/body-parser/package.json +56 -0
- gaia/eval/webapp/node_modules/bytes/History.md +97 -0
- gaia/eval/webapp/node_modules/bytes/LICENSE +23 -0
- gaia/eval/webapp/node_modules/bytes/Readme.md +152 -0
- gaia/eval/webapp/node_modules/bytes/index.js +170 -0
- gaia/eval/webapp/node_modules/bytes/package.json +42 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.eslintrc +17 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.nycrc +9 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/CHANGELOG.md +30 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/LICENSE +21 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/README.md +62 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.js +10 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.d.ts +19 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.js +10 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.d.ts +64 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.js +15 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/package.json +85 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.d.ts +3 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/test/index.js +63 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/call-bound/.eslintrc +13 -0
- gaia/eval/webapp/node_modules/call-bound/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/call-bound/.nycrc +9 -0
- gaia/eval/webapp/node_modules/call-bound/CHANGELOG.md +42 -0
- gaia/eval/webapp/node_modules/call-bound/LICENSE +21 -0
- gaia/eval/webapp/node_modules/call-bound/README.md +53 -0
- gaia/eval/webapp/node_modules/call-bound/index.d.ts +94 -0
- gaia/eval/webapp/node_modules/call-bound/index.js +19 -0
- gaia/eval/webapp/node_modules/call-bound/package.json +99 -0
- gaia/eval/webapp/node_modules/call-bound/test/index.js +61 -0
- gaia/eval/webapp/node_modules/call-bound/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/content-disposition/HISTORY.md +60 -0
- gaia/eval/webapp/node_modules/content-disposition/LICENSE +22 -0
- gaia/eval/webapp/node_modules/content-disposition/README.md +142 -0
- gaia/eval/webapp/node_modules/content-disposition/index.js +458 -0
- gaia/eval/webapp/node_modules/content-disposition/package.json +44 -0
- gaia/eval/webapp/node_modules/content-type/HISTORY.md +29 -0
- gaia/eval/webapp/node_modules/content-type/LICENSE +22 -0
- gaia/eval/webapp/node_modules/content-type/README.md +94 -0
- gaia/eval/webapp/node_modules/content-type/index.js +225 -0
- gaia/eval/webapp/node_modules/content-type/package.json +42 -0
- gaia/eval/webapp/node_modules/cookie/LICENSE +24 -0
- gaia/eval/webapp/node_modules/cookie/README.md +317 -0
- gaia/eval/webapp/node_modules/cookie/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/cookie/index.js +334 -0
- gaia/eval/webapp/node_modules/cookie/package.json +44 -0
- gaia/eval/webapp/node_modules/cookie-signature/.npmignore +4 -0
- gaia/eval/webapp/node_modules/cookie-signature/History.md +38 -0
- gaia/eval/webapp/node_modules/cookie-signature/Readme.md +42 -0
- gaia/eval/webapp/node_modules/cookie-signature/index.js +51 -0
- gaia/eval/webapp/node_modules/cookie-signature/package.json +18 -0
- gaia/eval/webapp/node_modules/debug/.coveralls.yml +1 -0
- gaia/eval/webapp/node_modules/debug/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/debug/.npmignore +9 -0
- gaia/eval/webapp/node_modules/debug/.travis.yml +14 -0
- gaia/eval/webapp/node_modules/debug/CHANGELOG.md +362 -0
- gaia/eval/webapp/node_modules/debug/LICENSE +19 -0
- gaia/eval/webapp/node_modules/debug/Makefile +50 -0
- gaia/eval/webapp/node_modules/debug/README.md +312 -0
- gaia/eval/webapp/node_modules/debug/component.json +19 -0
- gaia/eval/webapp/node_modules/debug/karma.conf.js +70 -0
- gaia/eval/webapp/node_modules/debug/node.js +1 -0
- gaia/eval/webapp/node_modules/debug/package.json +49 -0
- gaia/eval/webapp/node_modules/debug/src/browser.js +185 -0
- gaia/eval/webapp/node_modules/debug/src/debug.js +202 -0
- gaia/eval/webapp/node_modules/debug/src/index.js +10 -0
- gaia/eval/webapp/node_modules/debug/src/inspector-log.js +15 -0
- gaia/eval/webapp/node_modules/debug/src/node.js +248 -0
- gaia/eval/webapp/node_modules/depd/History.md +103 -0
- gaia/eval/webapp/node_modules/depd/LICENSE +22 -0
- gaia/eval/webapp/node_modules/depd/Readme.md +280 -0
- gaia/eval/webapp/node_modules/depd/index.js +538 -0
- gaia/eval/webapp/node_modules/depd/lib/browser/index.js +77 -0
- gaia/eval/webapp/node_modules/depd/package.json +45 -0
- gaia/eval/webapp/node_modules/destroy/LICENSE +23 -0
- gaia/eval/webapp/node_modules/destroy/README.md +63 -0
- gaia/eval/webapp/node_modules/destroy/index.js +209 -0
- gaia/eval/webapp/node_modules/destroy/package.json +48 -0
- gaia/eval/webapp/node_modules/dunder-proto/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/dunder-proto/.nycrc +13 -0
- gaia/eval/webapp/node_modules/dunder-proto/CHANGELOG.md +24 -0
- gaia/eval/webapp/node_modules/dunder-proto/LICENSE +21 -0
- gaia/eval/webapp/node_modules/dunder-proto/README.md +54 -0
- gaia/eval/webapp/node_modules/dunder-proto/get.d.ts +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/get.js +30 -0
- gaia/eval/webapp/node_modules/dunder-proto/package.json +76 -0
- gaia/eval/webapp/node_modules/dunder-proto/set.d.ts +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/set.js +35 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/get.js +34 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/index.js +4 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/set.js +50 -0
- gaia/eval/webapp/node_modules/dunder-proto/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/ee-first/LICENSE +22 -0
- gaia/eval/webapp/node_modules/ee-first/README.md +80 -0
- gaia/eval/webapp/node_modules/ee-first/index.js +95 -0
- gaia/eval/webapp/node_modules/ee-first/package.json +29 -0
- gaia/eval/webapp/node_modules/encodeurl/LICENSE +22 -0
- gaia/eval/webapp/node_modules/encodeurl/README.md +109 -0
- gaia/eval/webapp/node_modules/encodeurl/index.js +60 -0
- gaia/eval/webapp/node_modules/encodeurl/package.json +40 -0
- gaia/eval/webapp/node_modules/es-define-property/.eslintrc +13 -0
- gaia/eval/webapp/node_modules/es-define-property/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-define-property/.nycrc +9 -0
- gaia/eval/webapp/node_modules/es-define-property/CHANGELOG.md +29 -0
- gaia/eval/webapp/node_modules/es-define-property/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-define-property/README.md +49 -0
- gaia/eval/webapp/node_modules/es-define-property/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-define-property/index.js +14 -0
- gaia/eval/webapp/node_modules/es-define-property/package.json +81 -0
- gaia/eval/webapp/node_modules/es-define-property/test/index.js +56 -0
- gaia/eval/webapp/node_modules/es-define-property/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/es-errors/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/es-errors/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-errors/CHANGELOG.md +40 -0
- gaia/eval/webapp/node_modules/es-errors/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-errors/README.md +55 -0
- gaia/eval/webapp/node_modules/es-errors/eval.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/eval.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/index.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/package.json +80 -0
- gaia/eval/webapp/node_modules/es-errors/range.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/range.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/ref.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/ref.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/syntax.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/syntax.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/test/index.js +19 -0
- gaia/eval/webapp/node_modules/es-errors/tsconfig.json +49 -0
- gaia/eval/webapp/node_modules/es-errors/type.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/type.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/uri.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/uri.js +4 -0
- gaia/eval/webapp/node_modules/es-object-atoms/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/es-object-atoms/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-object-atoms/CHANGELOG.md +37 -0
- gaia/eval/webapp/node_modules/es-object-atoms/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-object-atoms/README.md +63 -0
- gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.js +11 -0
- gaia/eval/webapp/node_modules/es-object-atoms/ToObject.d.ts +7 -0
- gaia/eval/webapp/node_modules/es-object-atoms/ToObject.js +10 -0
- gaia/eval/webapp/node_modules/es-object-atoms/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/index.js +4 -0
- gaia/eval/webapp/node_modules/es-object-atoms/isObject.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/isObject.js +6 -0
- gaia/eval/webapp/node_modules/es-object-atoms/package.json +80 -0
- gaia/eval/webapp/node_modules/es-object-atoms/test/index.js +38 -0
- gaia/eval/webapp/node_modules/es-object-atoms/tsconfig.json +6 -0
- gaia/eval/webapp/node_modules/escape-html/LICENSE +24 -0
- gaia/eval/webapp/node_modules/escape-html/Readme.md +43 -0
- gaia/eval/webapp/node_modules/escape-html/index.js +78 -0
- gaia/eval/webapp/node_modules/escape-html/package.json +24 -0
- gaia/eval/webapp/node_modules/etag/HISTORY.md +83 -0
- gaia/eval/webapp/node_modules/etag/LICENSE +22 -0
- gaia/eval/webapp/node_modules/etag/README.md +159 -0
- gaia/eval/webapp/node_modules/etag/index.js +131 -0
- gaia/eval/webapp/node_modules/etag/package.json +47 -0
- gaia/eval/webapp/node_modules/express/History.md +3656 -0
- gaia/eval/webapp/node_modules/express/LICENSE +24 -0
- gaia/eval/webapp/node_modules/express/Readme.md +260 -0
- gaia/eval/webapp/node_modules/express/index.js +11 -0
- gaia/eval/webapp/node_modules/express/lib/application.js +661 -0
- gaia/eval/webapp/node_modules/express/lib/express.js +116 -0
- gaia/eval/webapp/node_modules/express/lib/middleware/init.js +43 -0
- gaia/eval/webapp/node_modules/express/lib/middleware/query.js +47 -0
- gaia/eval/webapp/node_modules/express/lib/request.js +525 -0
- gaia/eval/webapp/node_modules/express/lib/response.js +1179 -0
- gaia/eval/webapp/node_modules/express/lib/router/index.js +673 -0
- gaia/eval/webapp/node_modules/express/lib/router/layer.js +181 -0
- gaia/eval/webapp/node_modules/express/lib/router/route.js +230 -0
- gaia/eval/webapp/node_modules/express/lib/utils.js +303 -0
- gaia/eval/webapp/node_modules/express/lib/view.js +182 -0
- gaia/eval/webapp/node_modules/express/package.json +102 -0
- gaia/eval/webapp/node_modules/finalhandler/HISTORY.md +210 -0
- gaia/eval/webapp/node_modules/finalhandler/LICENSE +22 -0
- gaia/eval/webapp/node_modules/finalhandler/README.md +147 -0
- gaia/eval/webapp/node_modules/finalhandler/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/finalhandler/index.js +341 -0
- gaia/eval/webapp/node_modules/finalhandler/package.json +47 -0
- gaia/eval/webapp/node_modules/forwarded/HISTORY.md +21 -0
- gaia/eval/webapp/node_modules/forwarded/LICENSE +22 -0
- gaia/eval/webapp/node_modules/forwarded/README.md +57 -0
- gaia/eval/webapp/node_modules/forwarded/index.js +90 -0
- gaia/eval/webapp/node_modules/forwarded/package.json +45 -0
- gaia/eval/webapp/node_modules/fresh/HISTORY.md +70 -0
- gaia/eval/webapp/node_modules/fresh/LICENSE +23 -0
- gaia/eval/webapp/node_modules/fresh/README.md +119 -0
- gaia/eval/webapp/node_modules/fresh/index.js +137 -0
- gaia/eval/webapp/node_modules/fresh/package.json +46 -0
- gaia/eval/webapp/node_modules/fs/README.md +9 -0
- gaia/eval/webapp/node_modules/fs/package.json +20 -0
- gaia/eval/webapp/node_modules/function-bind/.eslintrc +21 -0
- gaia/eval/webapp/node_modules/function-bind/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/function-bind/.github/SECURITY.md +3 -0
- gaia/eval/webapp/node_modules/function-bind/.nycrc +13 -0
- gaia/eval/webapp/node_modules/function-bind/CHANGELOG.md +136 -0
- gaia/eval/webapp/node_modules/function-bind/LICENSE +20 -0
- gaia/eval/webapp/node_modules/function-bind/README.md +46 -0
- gaia/eval/webapp/node_modules/function-bind/implementation.js +84 -0
- gaia/eval/webapp/node_modules/function-bind/index.js +5 -0
- gaia/eval/webapp/node_modules/function-bind/package.json +87 -0
- gaia/eval/webapp/node_modules/function-bind/test/.eslintrc +9 -0
- gaia/eval/webapp/node_modules/function-bind/test/index.js +252 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.eslintrc +42 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.nycrc +9 -0
- gaia/eval/webapp/node_modules/get-intrinsic/CHANGELOG.md +186 -0
- gaia/eval/webapp/node_modules/get-intrinsic/LICENSE +21 -0
- gaia/eval/webapp/node_modules/get-intrinsic/README.md +71 -0
- gaia/eval/webapp/node_modules/get-intrinsic/index.js +378 -0
- gaia/eval/webapp/node_modules/get-intrinsic/package.json +97 -0
- gaia/eval/webapp/node_modules/get-intrinsic/test/GetIntrinsic.js +274 -0
- gaia/eval/webapp/node_modules/get-proto/.eslintrc +10 -0
- gaia/eval/webapp/node_modules/get-proto/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/get-proto/.nycrc +9 -0
- gaia/eval/webapp/node_modules/get-proto/CHANGELOG.md +21 -0
- gaia/eval/webapp/node_modules/get-proto/LICENSE +21 -0
- gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.d.ts +5 -0
- gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.js +6 -0
- gaia/eval/webapp/node_modules/get-proto/README.md +50 -0
- gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.d.ts +3 -0
- gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.js +4 -0
- gaia/eval/webapp/node_modules/get-proto/index.d.ts +5 -0
- gaia/eval/webapp/node_modules/get-proto/index.js +27 -0
- gaia/eval/webapp/node_modules/get-proto/package.json +81 -0
- gaia/eval/webapp/node_modules/get-proto/test/index.js +68 -0
- gaia/eval/webapp/node_modules/get-proto/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/gopd/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/gopd/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/gopd/CHANGELOG.md +45 -0
- gaia/eval/webapp/node_modules/gopd/LICENSE +21 -0
- gaia/eval/webapp/node_modules/gopd/README.md +40 -0
- gaia/eval/webapp/node_modules/gopd/gOPD.d.ts +1 -0
- gaia/eval/webapp/node_modules/gopd/gOPD.js +4 -0
- gaia/eval/webapp/node_modules/gopd/index.d.ts +5 -0
- gaia/eval/webapp/node_modules/gopd/index.js +15 -0
- gaia/eval/webapp/node_modules/gopd/package.json +77 -0
- gaia/eval/webapp/node_modules/gopd/test/index.js +36 -0
- gaia/eval/webapp/node_modules/gopd/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/has-symbols/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/has-symbols/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/has-symbols/.nycrc +9 -0
- gaia/eval/webapp/node_modules/has-symbols/CHANGELOG.md +91 -0
- gaia/eval/webapp/node_modules/has-symbols/LICENSE +21 -0
- gaia/eval/webapp/node_modules/has-symbols/README.md +46 -0
- gaia/eval/webapp/node_modules/has-symbols/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/has-symbols/index.js +14 -0
- gaia/eval/webapp/node_modules/has-symbols/package.json +111 -0
- gaia/eval/webapp/node_modules/has-symbols/shams.d.ts +3 -0
- gaia/eval/webapp/node_modules/has-symbols/shams.js +45 -0
- gaia/eval/webapp/node_modules/has-symbols/test/index.js +22 -0
- gaia/eval/webapp/node_modules/has-symbols/test/shams/core-js.js +29 -0
- gaia/eval/webapp/node_modules/has-symbols/test/shams/get-own-property-symbols.js +29 -0
- gaia/eval/webapp/node_modules/has-symbols/test/tests.js +58 -0
- gaia/eval/webapp/node_modules/has-symbols/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/hasown/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/hasown/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/hasown/.nycrc +13 -0
- gaia/eval/webapp/node_modules/hasown/CHANGELOG.md +40 -0
- gaia/eval/webapp/node_modules/hasown/LICENSE +21 -0
- gaia/eval/webapp/node_modules/hasown/README.md +40 -0
- gaia/eval/webapp/node_modules/hasown/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/hasown/index.js +8 -0
- gaia/eval/webapp/node_modules/hasown/package.json +92 -0
- gaia/eval/webapp/node_modules/hasown/tsconfig.json +6 -0
- gaia/eval/webapp/node_modules/http-errors/HISTORY.md +180 -0
- gaia/eval/webapp/node_modules/http-errors/LICENSE +23 -0
- gaia/eval/webapp/node_modules/http-errors/README.md +169 -0
- gaia/eval/webapp/node_modules/http-errors/index.js +289 -0
- gaia/eval/webapp/node_modules/http-errors/package.json +50 -0
- gaia/eval/webapp/node_modules/iconv-lite/Changelog.md +162 -0
- gaia/eval/webapp/node_modules/iconv-lite/LICENSE +21 -0
- gaia/eval/webapp/node_modules/iconv-lite/README.md +156 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-codec.js +555 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-data.js +176 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/index.js +22 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/internal.js +188 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-codec.js +72 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data-generated.js +451 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data.js +174 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/big5-added.json +122 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp936.json +264 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp949.json +273 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp950.json +177 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/eucjp.json +182 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gb18030-ranges.json +1 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gbk-added.json +55 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/shiftjis.json +125 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/utf16.js +177 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/utf7.js +290 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/bom-handling.js +52 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/extend-node.js +217 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/index.d.ts +24 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/index.js +153 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/streams.js +121 -0
- gaia/eval/webapp/node_modules/iconv-lite/package.json +46 -0
- gaia/eval/webapp/node_modules/inherits/LICENSE +16 -0
- gaia/eval/webapp/node_modules/inherits/README.md +42 -0
- gaia/eval/webapp/node_modules/inherits/inherits.js +9 -0
- gaia/eval/webapp/node_modules/inherits/inherits_browser.js +27 -0
- gaia/eval/webapp/node_modules/inherits/package.json +29 -0
- gaia/eval/webapp/node_modules/ipaddr.js/LICENSE +19 -0
- gaia/eval/webapp/node_modules/ipaddr.js/README.md +233 -0
- gaia/eval/webapp/node_modules/ipaddr.js/ipaddr.min.js +1 -0
- gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js +673 -0
- gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js.d.ts +68 -0
- gaia/eval/webapp/node_modules/ipaddr.js/package.json +35 -0
- gaia/eval/webapp/node_modules/math-intrinsics/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/math-intrinsics/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/math-intrinsics/CHANGELOG.md +24 -0
- gaia/eval/webapp/node_modules/math-intrinsics/LICENSE +21 -0
- gaia/eval/webapp/node_modules/math-intrinsics/README.md +50 -0
- gaia/eval/webapp/node_modules/math-intrinsics/abs.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/abs.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.js +5 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.js +5 -0
- gaia/eval/webapp/node_modules/math-intrinsics/floor.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/floor.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isFinite.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isFinite.js +12 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isInteger.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isInteger.js +16 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNaN.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNaN.js +6 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.js +6 -0
- gaia/eval/webapp/node_modules/math-intrinsics/max.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/max.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/min.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/min.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/mod.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/mod.js +9 -0
- gaia/eval/webapp/node_modules/math-intrinsics/package.json +86 -0
- gaia/eval/webapp/node_modules/math-intrinsics/pow.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/pow.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/round.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/round.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/sign.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/sign.js +11 -0
- gaia/eval/webapp/node_modules/math-intrinsics/test/index.js +192 -0
- gaia/eval/webapp/node_modules/math-intrinsics/tsconfig.json +3 -0
- gaia/eval/webapp/node_modules/media-typer/HISTORY.md +22 -0
- gaia/eval/webapp/node_modules/media-typer/LICENSE +22 -0
- gaia/eval/webapp/node_modules/media-typer/README.md +81 -0
- gaia/eval/webapp/node_modules/media-typer/index.js +270 -0
- gaia/eval/webapp/node_modules/media-typer/package.json +26 -0
- gaia/eval/webapp/node_modules/merge-descriptors/HISTORY.md +21 -0
- gaia/eval/webapp/node_modules/merge-descriptors/LICENSE +23 -0
- gaia/eval/webapp/node_modules/merge-descriptors/README.md +49 -0
- gaia/eval/webapp/node_modules/merge-descriptors/index.js +60 -0
- gaia/eval/webapp/node_modules/merge-descriptors/package.json +39 -0
- gaia/eval/webapp/node_modules/methods/HISTORY.md +29 -0
- gaia/eval/webapp/node_modules/methods/LICENSE +24 -0
- gaia/eval/webapp/node_modules/methods/README.md +51 -0
- gaia/eval/webapp/node_modules/methods/index.js +69 -0
- gaia/eval/webapp/node_modules/methods/package.json +36 -0
- gaia/eval/webapp/node_modules/mime/.npmignore +0 -0
- gaia/eval/webapp/node_modules/mime/CHANGELOG.md +164 -0
- gaia/eval/webapp/node_modules/mime/LICENSE +21 -0
- gaia/eval/webapp/node_modules/mime/README.md +90 -0
- gaia/eval/webapp/node_modules/mime/cli.js +8 -0
- gaia/eval/webapp/node_modules/mime/mime.js +108 -0
- gaia/eval/webapp/node_modules/mime/package.json +44 -0
- gaia/eval/webapp/node_modules/mime/src/build.js +53 -0
- gaia/eval/webapp/node_modules/mime/src/test.js +60 -0
- gaia/eval/webapp/node_modules/mime/types.json +1 -0
- gaia/eval/webapp/node_modules/mime-db/HISTORY.md +507 -0
- gaia/eval/webapp/node_modules/mime-db/LICENSE +23 -0
- gaia/eval/webapp/node_modules/mime-db/README.md +100 -0
- gaia/eval/webapp/node_modules/mime-db/db.json +8519 -0
- gaia/eval/webapp/node_modules/mime-db/index.js +12 -0
- gaia/eval/webapp/node_modules/mime-db/package.json +60 -0
- gaia/eval/webapp/node_modules/mime-types/HISTORY.md +397 -0
- gaia/eval/webapp/node_modules/mime-types/LICENSE +23 -0
- gaia/eval/webapp/node_modules/mime-types/README.md +113 -0
- gaia/eval/webapp/node_modules/mime-types/index.js +188 -0
- gaia/eval/webapp/node_modules/mime-types/package.json +44 -0
- gaia/eval/webapp/node_modules/ms/index.js +152 -0
- gaia/eval/webapp/node_modules/ms/license.md +21 -0
- gaia/eval/webapp/node_modules/ms/package.json +37 -0
- gaia/eval/webapp/node_modules/ms/readme.md +51 -0
- gaia/eval/webapp/node_modules/negotiator/HISTORY.md +108 -0
- gaia/eval/webapp/node_modules/negotiator/LICENSE +24 -0
- gaia/eval/webapp/node_modules/negotiator/README.md +203 -0
- gaia/eval/webapp/node_modules/negotiator/index.js +82 -0
- gaia/eval/webapp/node_modules/negotiator/lib/charset.js +169 -0
- gaia/eval/webapp/node_modules/negotiator/lib/encoding.js +184 -0
- gaia/eval/webapp/node_modules/negotiator/lib/language.js +179 -0
- gaia/eval/webapp/node_modules/negotiator/lib/mediaType.js +294 -0
- gaia/eval/webapp/node_modules/negotiator/package.json +42 -0
- gaia/eval/webapp/node_modules/object-inspect/.eslintrc +53 -0
- gaia/eval/webapp/node_modules/object-inspect/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/object-inspect/.nycrc +13 -0
- gaia/eval/webapp/node_modules/object-inspect/CHANGELOG.md +424 -0
- gaia/eval/webapp/node_modules/object-inspect/LICENSE +21 -0
- gaia/eval/webapp/node_modules/object-inspect/example/all.js +23 -0
- gaia/eval/webapp/node_modules/object-inspect/example/circular.js +6 -0
- gaia/eval/webapp/node_modules/object-inspect/example/fn.js +5 -0
- gaia/eval/webapp/node_modules/object-inspect/example/inspect.js +10 -0
- gaia/eval/webapp/node_modules/object-inspect/index.js +544 -0
- gaia/eval/webapp/node_modules/object-inspect/package-support.json +20 -0
- gaia/eval/webapp/node_modules/object-inspect/package.json +105 -0
- gaia/eval/webapp/node_modules/object-inspect/readme.markdown +84 -0
- gaia/eval/webapp/node_modules/object-inspect/test/bigint.js +58 -0
- gaia/eval/webapp/node_modules/object-inspect/test/browser/dom.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/circular.js +16 -0
- gaia/eval/webapp/node_modules/object-inspect/test/deep.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/element.js +53 -0
- gaia/eval/webapp/node_modules/object-inspect/test/err.js +48 -0
- gaia/eval/webapp/node_modules/object-inspect/test/fakes.js +29 -0
- gaia/eval/webapp/node_modules/object-inspect/test/fn.js +76 -0
- gaia/eval/webapp/node_modules/object-inspect/test/global.js +17 -0
- gaia/eval/webapp/node_modules/object-inspect/test/has.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/holes.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/indent-option.js +271 -0
- gaia/eval/webapp/node_modules/object-inspect/test/inspect.js +139 -0
- gaia/eval/webapp/node_modules/object-inspect/test/lowbyte.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/number.js +58 -0
- gaia/eval/webapp/node_modules/object-inspect/test/quoteStyle.js +26 -0
- gaia/eval/webapp/node_modules/object-inspect/test/toStringTag.js +40 -0
- gaia/eval/webapp/node_modules/object-inspect/test/undef.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/values.js +261 -0
- gaia/eval/webapp/node_modules/object-inspect/test-core-js.js +26 -0
- gaia/eval/webapp/node_modules/object-inspect/util.inspect.js +1 -0
- gaia/eval/webapp/node_modules/on-finished/HISTORY.md +98 -0
- gaia/eval/webapp/node_modules/on-finished/LICENSE +23 -0
- gaia/eval/webapp/node_modules/on-finished/README.md +162 -0
- gaia/eval/webapp/node_modules/on-finished/index.js +234 -0
- gaia/eval/webapp/node_modules/on-finished/package.json +39 -0
- gaia/eval/webapp/node_modules/parseurl/HISTORY.md +58 -0
- gaia/eval/webapp/node_modules/parseurl/LICENSE +24 -0
- gaia/eval/webapp/node_modules/parseurl/README.md +133 -0
- gaia/eval/webapp/node_modules/parseurl/index.js +158 -0
- gaia/eval/webapp/node_modules/parseurl/package.json +40 -0
- gaia/eval/webapp/node_modules/path/.npmignore +1 -0
- gaia/eval/webapp/node_modules/path/LICENSE +18 -0
- gaia/eval/webapp/node_modules/path/README.md +15 -0
- gaia/eval/webapp/node_modules/path/package.json +24 -0
- gaia/eval/webapp/node_modules/path/path.js +628 -0
- gaia/eval/webapp/node_modules/path-to-regexp/LICENSE +21 -0
- gaia/eval/webapp/node_modules/path-to-regexp/Readme.md +35 -0
- gaia/eval/webapp/node_modules/path-to-regexp/index.js +156 -0
- gaia/eval/webapp/node_modules/path-to-regexp/package.json +30 -0
- gaia/eval/webapp/node_modules/process/.eslintrc +21 -0
- gaia/eval/webapp/node_modules/process/LICENSE +22 -0
- gaia/eval/webapp/node_modules/process/README.md +26 -0
- gaia/eval/webapp/node_modules/process/browser.js +184 -0
- gaia/eval/webapp/node_modules/process/index.js +2 -0
- gaia/eval/webapp/node_modules/process/package.json +27 -0
- gaia/eval/webapp/node_modules/process/test.js +199 -0
- gaia/eval/webapp/node_modules/proxy-addr/HISTORY.md +161 -0
- gaia/eval/webapp/node_modules/proxy-addr/LICENSE +22 -0
- gaia/eval/webapp/node_modules/proxy-addr/README.md +139 -0
- gaia/eval/webapp/node_modules/proxy-addr/index.js +327 -0
- gaia/eval/webapp/node_modules/proxy-addr/package.json +47 -0
- gaia/eval/webapp/node_modules/qs/.editorconfig +46 -0
- gaia/eval/webapp/node_modules/qs/.eslintrc +38 -0
- gaia/eval/webapp/node_modules/qs/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/qs/.nycrc +13 -0
- gaia/eval/webapp/node_modules/qs/CHANGELOG.md +600 -0
- gaia/eval/webapp/node_modules/qs/LICENSE.md +29 -0
- gaia/eval/webapp/node_modules/qs/README.md +709 -0
- gaia/eval/webapp/node_modules/qs/dist/qs.js +90 -0
- gaia/eval/webapp/node_modules/qs/lib/formats.js +23 -0
- gaia/eval/webapp/node_modules/qs/lib/index.js +11 -0
- gaia/eval/webapp/node_modules/qs/lib/parse.js +296 -0
- gaia/eval/webapp/node_modules/qs/lib/stringify.js +351 -0
- gaia/eval/webapp/node_modules/qs/lib/utils.js +265 -0
- gaia/eval/webapp/node_modules/qs/package.json +91 -0
- gaia/eval/webapp/node_modules/qs/test/empty-keys-cases.js +267 -0
- gaia/eval/webapp/node_modules/qs/test/parse.js +1170 -0
- gaia/eval/webapp/node_modules/qs/test/stringify.js +1298 -0
- gaia/eval/webapp/node_modules/qs/test/utils.js +136 -0
- gaia/eval/webapp/node_modules/range-parser/HISTORY.md +56 -0
- gaia/eval/webapp/node_modules/range-parser/LICENSE +23 -0
- gaia/eval/webapp/node_modules/range-parser/README.md +84 -0
- gaia/eval/webapp/node_modules/range-parser/index.js +162 -0
- gaia/eval/webapp/node_modules/range-parser/package.json +44 -0
- gaia/eval/webapp/node_modules/raw-body/HISTORY.md +308 -0
- gaia/eval/webapp/node_modules/raw-body/LICENSE +22 -0
- gaia/eval/webapp/node_modules/raw-body/README.md +223 -0
- gaia/eval/webapp/node_modules/raw-body/SECURITY.md +24 -0
- gaia/eval/webapp/node_modules/raw-body/index.d.ts +87 -0
- gaia/eval/webapp/node_modules/raw-body/index.js +336 -0
- gaia/eval/webapp/node_modules/raw-body/package.json +49 -0
- gaia/eval/webapp/node_modules/safe-buffer/LICENSE +21 -0
- gaia/eval/webapp/node_modules/safe-buffer/README.md +584 -0
- gaia/eval/webapp/node_modules/safe-buffer/index.d.ts +187 -0
- gaia/eval/webapp/node_modules/safe-buffer/index.js +65 -0
- gaia/eval/webapp/node_modules/safe-buffer/package.json +51 -0
- gaia/eval/webapp/node_modules/safer-buffer/LICENSE +21 -0
- gaia/eval/webapp/node_modules/safer-buffer/Porting-Buffer.md +268 -0
- gaia/eval/webapp/node_modules/safer-buffer/Readme.md +156 -0
- gaia/eval/webapp/node_modules/safer-buffer/dangerous.js +58 -0
- gaia/eval/webapp/node_modules/safer-buffer/package.json +34 -0
- gaia/eval/webapp/node_modules/safer-buffer/safer.js +77 -0
- gaia/eval/webapp/node_modules/safer-buffer/tests.js +406 -0
- gaia/eval/webapp/node_modules/send/HISTORY.md +526 -0
- gaia/eval/webapp/node_modules/send/LICENSE +23 -0
- gaia/eval/webapp/node_modules/send/README.md +327 -0
- gaia/eval/webapp/node_modules/send/SECURITY.md +24 -0
- gaia/eval/webapp/node_modules/send/index.js +1142 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/HISTORY.md +14 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/LICENSE +22 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/README.md +128 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/index.js +60 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/package.json +40 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/index.js +162 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/license.md +21 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/package.json +38 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/readme.md +59 -0
- gaia/eval/webapp/node_modules/send/package.json +62 -0
- gaia/eval/webapp/node_modules/serve-static/HISTORY.md +487 -0
- gaia/eval/webapp/node_modules/serve-static/LICENSE +25 -0
- gaia/eval/webapp/node_modules/serve-static/README.md +257 -0
- gaia/eval/webapp/node_modules/serve-static/index.js +209 -0
- gaia/eval/webapp/node_modules/serve-static/package.json +42 -0
- gaia/eval/webapp/node_modules/setprototypeof/LICENSE +13 -0
- gaia/eval/webapp/node_modules/setprototypeof/README.md +31 -0
- gaia/eval/webapp/node_modules/setprototypeof/index.d.ts +2 -0
- gaia/eval/webapp/node_modules/setprototypeof/index.js +17 -0
- gaia/eval/webapp/node_modules/setprototypeof/package.json +38 -0
- gaia/eval/webapp/node_modules/setprototypeof/test/index.js +24 -0
- gaia/eval/webapp/node_modules/side-channel/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel/.eslintrc +12 -0
- gaia/eval/webapp/node_modules/side-channel/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel/CHANGELOG.md +110 -0
- gaia/eval/webapp/node_modules/side-channel/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel/README.md +61 -0
- gaia/eval/webapp/node_modules/side-channel/index.d.ts +14 -0
- gaia/eval/webapp/node_modules/side-channel/index.js +43 -0
- gaia/eval/webapp/node_modules/side-channel/package.json +85 -0
- gaia/eval/webapp/node_modules/side-channel/test/index.js +104 -0
- gaia/eval/webapp/node_modules/side-channel/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-list/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-list/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/side-channel-list/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-list/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-list/CHANGELOG.md +15 -0
- gaia/eval/webapp/node_modules/side-channel-list/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-list/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-list/index.d.ts +13 -0
- gaia/eval/webapp/node_modules/side-channel-list/index.js +113 -0
- gaia/eval/webapp/node_modules/side-channel-list/list.d.ts +14 -0
- gaia/eval/webapp/node_modules/side-channel-list/package.json +77 -0
- gaia/eval/webapp/node_modules/side-channel-list/test/index.js +104 -0
- gaia/eval/webapp/node_modules/side-channel-list/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-map/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-map/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/side-channel-map/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-map/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-map/CHANGELOG.md +22 -0
- gaia/eval/webapp/node_modules/side-channel-map/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-map/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-map/index.d.ts +15 -0
- gaia/eval/webapp/node_modules/side-channel-map/index.js +68 -0
- gaia/eval/webapp/node_modules/side-channel-map/package.json +80 -0
- gaia/eval/webapp/node_modules/side-channel-map/test/index.js +114 -0
- gaia/eval/webapp/node_modules/side-channel-map/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.eslintrc +12 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/CHANGELOG.md +28 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/index.d.ts +15 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/index.js +84 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/package.json +87 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/test/index.js +114 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/statuses/HISTORY.md +82 -0
- gaia/eval/webapp/node_modules/statuses/LICENSE +23 -0
- gaia/eval/webapp/node_modules/statuses/README.md +136 -0
- gaia/eval/webapp/node_modules/statuses/codes.json +65 -0
- gaia/eval/webapp/node_modules/statuses/index.js +146 -0
- gaia/eval/webapp/node_modules/statuses/package.json +49 -0
- gaia/eval/webapp/node_modules/toidentifier/HISTORY.md +9 -0
- gaia/eval/webapp/node_modules/toidentifier/LICENSE +21 -0
- gaia/eval/webapp/node_modules/toidentifier/README.md +61 -0
- gaia/eval/webapp/node_modules/toidentifier/index.js +32 -0
- gaia/eval/webapp/node_modules/toidentifier/package.json +38 -0
- gaia/eval/webapp/node_modules/type-is/HISTORY.md +259 -0
- gaia/eval/webapp/node_modules/type-is/LICENSE +23 -0
- gaia/eval/webapp/node_modules/type-is/README.md +170 -0
- gaia/eval/webapp/node_modules/type-is/index.js +266 -0
- gaia/eval/webapp/node_modules/type-is/package.json +45 -0
- gaia/eval/webapp/node_modules/unpipe/HISTORY.md +4 -0
- gaia/eval/webapp/node_modules/unpipe/LICENSE +22 -0
- gaia/eval/webapp/node_modules/unpipe/README.md +43 -0
- gaia/eval/webapp/node_modules/unpipe/index.js +69 -0
- gaia/eval/webapp/node_modules/unpipe/package.json +27 -0
- gaia/eval/webapp/node_modules/util/LICENSE +18 -0
- gaia/eval/webapp/node_modules/util/README.md +15 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/LICENSE +16 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/README.md +42 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits.js +7 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits_browser.js +23 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/package.json +29 -0
- gaia/eval/webapp/node_modules/util/package.json +35 -0
- gaia/eval/webapp/node_modules/util/support/isBuffer.js +3 -0
- gaia/eval/webapp/node_modules/util/support/isBufferBrowser.js +6 -0
- gaia/eval/webapp/node_modules/util/util.js +586 -0
- gaia/eval/webapp/node_modules/utils-merge/.npmignore +9 -0
- gaia/eval/webapp/node_modules/utils-merge/LICENSE +20 -0
- gaia/eval/webapp/node_modules/utils-merge/README.md +34 -0
- gaia/eval/webapp/node_modules/utils-merge/index.js +23 -0
- gaia/eval/webapp/node_modules/utils-merge/package.json +40 -0
- gaia/eval/webapp/node_modules/vary/HISTORY.md +39 -0
- gaia/eval/webapp/node_modules/vary/LICENSE +22 -0
- gaia/eval/webapp/node_modules/vary/README.md +101 -0
- gaia/eval/webapp/node_modules/vary/index.js +149 -0
- gaia/eval/webapp/node_modules/vary/package.json +43 -0
- gaia/eval/webapp/package-lock.json +875 -0
- gaia/eval/webapp/package.json +21 -0
- gaia/eval/webapp/public/app.js +3403 -0
- gaia/eval/webapp/public/index.html +88 -0
- gaia/eval/webapp/public/styles.css +3661 -0
- gaia/eval/webapp/server.js +416 -0
- gaia/eval/webapp/test-setup.js +73 -0
- gaia/llm/__init__.py +2 -0
- gaia/llm/lemonade_client.py +3083 -0
- gaia/llm/lemonade_manager.py +269 -0
- gaia/llm/llm_client.py +729 -0
- gaia/llm/vlm_client.py +307 -0
- gaia/logger.py +189 -0
- gaia/mcp/agent_mcp_server.py +245 -0
- gaia/mcp/blender_mcp_client.py +138 -0
- gaia/mcp/blender_mcp_server.py +648 -0
- gaia/mcp/context7_cache.py +332 -0
- gaia/mcp/external_services.py +518 -0
- gaia/mcp/mcp_bridge.py +550 -0
- gaia/mcp/servers/__init__.py +6 -0
- gaia/mcp/servers/docker_mcp.py +83 -0
- gaia/rag/__init__.py +10 -0
- gaia/rag/app.py +293 -0
- gaia/rag/demo.py +304 -0
- gaia/rag/pdf_utils.py +235 -0
- gaia/rag/sdk.py +2194 -0
- gaia/security.py +163 -0
- gaia/talk/app.py +289 -0
- gaia/talk/sdk.py +538 -0
- gaia/util.py +46 -0
- gaia/version.py +100 -0
|
@@ -0,0 +1,2332 @@
|
|
|
1
|
+
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Tuple
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from gaia.chat.prompts import Prompts
|
|
15
|
+
from gaia.eval.claude import ClaudeClient
|
|
16
|
+
from gaia.eval.config import DEFAULT_CLAUDE_MODEL
|
|
17
|
+
from gaia.llm.lemonade_client import LemonadeClient
|
|
18
|
+
from gaia.logger import get_logger
|
|
19
|
+
|
|
20
|
+
# Import PDF reader
|
|
21
|
+
try:
|
|
22
|
+
from pypdf import PdfReader
|
|
23
|
+
except ImportError:
|
|
24
|
+
PdfReader = None
|
|
25
|
+
|
|
26
|
+
# Experiment configuration constants
|
|
27
|
+
CREATIVE_TEMPERATURE_MAX = 0.7
|
|
28
|
+
CREATIVE_TEMPERATURE_INCREMENT = 0.3
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def should_use_chat_template(task_type: str) -> bool:
|
|
32
|
+
"""
|
|
33
|
+
Determine if chat template formatting should be used for a given task.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
task_type: Type of task ('chat', 'qa', 'summarization', etc.)
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
True if chat template should be used, False otherwise
|
|
40
|
+
"""
|
|
41
|
+
# Only use chat templates for actual conversation/QA tasks
|
|
42
|
+
# NOT for completion tasks like summarization
|
|
43
|
+
return task_type in ["chat", "qa"]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def format_prompt_with_template(
|
|
47
|
+
model: str,
|
|
48
|
+
system_prompt: str,
|
|
49
|
+
user_content: str,
|
|
50
|
+
document_content: str = "",
|
|
51
|
+
use_chat_template: bool = False,
|
|
52
|
+
) -> str:
|
|
53
|
+
"""
|
|
54
|
+
Format prompt using model-specific template from Prompts class, or simple format.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
model: Model name/path
|
|
58
|
+
system_prompt: The system instruction
|
|
59
|
+
user_content: The user's query/content
|
|
60
|
+
document_content: Optional document/transcript context to include
|
|
61
|
+
use_chat_template: Whether to use chat template formatting (for QA/chat tasks)
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Formatted prompt
|
|
65
|
+
"""
|
|
66
|
+
if use_chat_template:
|
|
67
|
+
# Use ChatML formatting for chat/QA tasks
|
|
68
|
+
if document_content:
|
|
69
|
+
enhanced_system_prompt = (
|
|
70
|
+
f"{system_prompt}\n\nContext Document:\n{document_content}"
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
enhanced_system_prompt = system_prompt
|
|
74
|
+
|
|
75
|
+
# Convert to chat history format expected by Prompts.format_chat_history
|
|
76
|
+
chat_history = [f"user: {user_content}"]
|
|
77
|
+
return Prompts.format_chat_history(
|
|
78
|
+
model=model, chat_history=chat_history, system_prompt=enhanced_system_prompt
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
# Simple format for completion tasks (like summarization)
|
|
82
|
+
if document_content:
|
|
83
|
+
return f"{system_prompt}\n\nDocument Content:\n{document_content}\n\n{user_content}"
|
|
84
|
+
else:
|
|
85
|
+
return f"{system_prompt}\n\n{user_content}"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def extract_thinking_from_response(response_text: str) -> Dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
Extract thinking content from <think>...</think> tags if present.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
response_text: The raw model response
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Dict with 'response' (final answer) and 'thinking' (reasoning process, or None)
|
|
97
|
+
"""
|
|
98
|
+
if not response_text:
|
|
99
|
+
return {"response": "", "thinking": None}
|
|
100
|
+
|
|
101
|
+
# Check for thinking tags
|
|
102
|
+
think_pattern = r"<think>(.*?)</think>"
|
|
103
|
+
match = re.search(think_pattern, response_text, flags=re.DOTALL)
|
|
104
|
+
|
|
105
|
+
if match:
|
|
106
|
+
thinking = match.group(1).strip()
|
|
107
|
+
# Extract everything after the </think> tag as the final response
|
|
108
|
+
final_response = re.sub(
|
|
109
|
+
think_pattern, "", response_text, flags=re.DOTALL
|
|
110
|
+
).strip()
|
|
111
|
+
return {"response": final_response, "thinking": thinking}
|
|
112
|
+
|
|
113
|
+
# No thinking tags found, return full response
|
|
114
|
+
return {"response": response_text.strip(), "thinking": None}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class ExperimentConfig:
|
|
119
|
+
"""Configuration for a single experiment."""
|
|
120
|
+
|
|
121
|
+
name: str
|
|
122
|
+
llm_type: str # "claude" or "lemonade"
|
|
123
|
+
model: str
|
|
124
|
+
system_prompt: str
|
|
125
|
+
experiment_type: str = "qa" # "qa" or "summarization"
|
|
126
|
+
max_tokens: int = 512
|
|
127
|
+
temperature: float = 0.7
|
|
128
|
+
parameters: Dict[str, Any] = None
|
|
129
|
+
|
|
130
|
+
def __post_init__(self):
|
|
131
|
+
if self.parameters is None:
|
|
132
|
+
self.parameters = {}
|
|
133
|
+
if self.experiment_type not in ["qa", "summarization"]:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
f"experiment_type must be 'qa' or 'summarization', got: {self.experiment_type}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class BatchExperimentRunner:
|
|
140
|
+
"""Run batch experiments with different LLM configurations on transcript data.
|
|
141
|
+
|
|
142
|
+
Summarization experiments make independent LLM calls for each component
|
|
143
|
+
(executive summary, detailed summary, action items, etc.) to produce
|
|
144
|
+
natural, focused outputs without complex JSON formatting.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
def __init__(self, config_file: str):
|
|
148
|
+
self.log = get_logger(__name__)
|
|
149
|
+
self.config_file = config_file
|
|
150
|
+
self.experiments = []
|
|
151
|
+
self.load_config()
|
|
152
|
+
|
|
153
|
+
def _extract_text_from_pdf(self, pdf_path: str) -> str:
|
|
154
|
+
"""Extract text from PDF file using local PDF library."""
|
|
155
|
+
if PdfReader is None:
|
|
156
|
+
raise ImportError(
|
|
157
|
+
"PDF reading library not found. Please install pypdf:\n"
|
|
158
|
+
" pip install pypdf"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
reader = PdfReader(pdf_path)
|
|
163
|
+
total_pages = len(reader.pages)
|
|
164
|
+
self.log.info(
|
|
165
|
+
f"📄 Extracting text from {total_pages} pages of {pdf_path}..."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
text = ""
|
|
169
|
+
for i, page in enumerate(reader.pages, 1):
|
|
170
|
+
# Show progress for large PDFs
|
|
171
|
+
if i % 10 == 0 or i == total_pages:
|
|
172
|
+
self.log.debug(f" Processing page {i}/{total_pages}...")
|
|
173
|
+
page_text = page.extract_text()
|
|
174
|
+
if page_text:
|
|
175
|
+
text += page_text + "\n"
|
|
176
|
+
|
|
177
|
+
extracted_text = text.strip()
|
|
178
|
+
self.log.info(f"📝 Extracted {len(extracted_text):,} characters from PDF")
|
|
179
|
+
return extracted_text
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
self.log.error(f"Error reading PDF {pdf_path}: {e}")
|
|
183
|
+
raise
|
|
184
|
+
|
|
185
|
+
def load_config(self):
|
|
186
|
+
"""Load experiment configuration from JSON file."""
|
|
187
|
+
try:
|
|
188
|
+
with open(self.config_file, "r", encoding="utf-8") as f:
|
|
189
|
+
config_data = json.load(f)
|
|
190
|
+
|
|
191
|
+
# Validate config structure
|
|
192
|
+
if "experiments" not in config_data:
|
|
193
|
+
raise ValueError("Configuration file must contain 'experiments' array")
|
|
194
|
+
|
|
195
|
+
# Parse experiments
|
|
196
|
+
for exp_data in config_data["experiments"]:
|
|
197
|
+
experiment = ExperimentConfig(
|
|
198
|
+
name=exp_data["name"],
|
|
199
|
+
llm_type=exp_data["llm_type"],
|
|
200
|
+
model=exp_data["model"],
|
|
201
|
+
system_prompt=exp_data["system_prompt"],
|
|
202
|
+
experiment_type=exp_data.get("experiment_type", "qa"),
|
|
203
|
+
max_tokens=exp_data.get("max_tokens", 512),
|
|
204
|
+
temperature=exp_data.get("temperature", 0.7),
|
|
205
|
+
parameters=exp_data.get("parameters", {}),
|
|
206
|
+
)
|
|
207
|
+
self.experiments.append(experiment)
|
|
208
|
+
|
|
209
|
+
self.log.info(f"Loaded {len(self.experiments)} experiments from config")
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
self.log.error(f"Error loading config file: {e}")
|
|
213
|
+
raise
|
|
214
|
+
|
|
215
|
+
def create_llm_client(self, experiment: ExperimentConfig):
|
|
216
|
+
"""Create appropriate LLM client based on experiment config."""
|
|
217
|
+
if experiment.llm_type.lower() == "claude":
|
|
218
|
+
return ClaudeClient(
|
|
219
|
+
model=experiment.model, max_tokens=experiment.max_tokens
|
|
220
|
+
)
|
|
221
|
+
elif experiment.llm_type.lower() == "lemonade":
|
|
222
|
+
# Filter out non-LLM client parameters before passing to client constructor
|
|
223
|
+
# Parameters like 'stop', 'combined_prompt' are for completions API, not client init
|
|
224
|
+
llm_params = {
|
|
225
|
+
k: v
|
|
226
|
+
for k, v in experiment.parameters.items()
|
|
227
|
+
if k not in ["combined_prompt", "stop"]
|
|
228
|
+
}
|
|
229
|
+
return LemonadeClient(model=experiment.model, verbose=False, **llm_params)
|
|
230
|
+
else:
|
|
231
|
+
raise ValueError(f"Unsupported LLM type: {experiment.llm_type}")
|
|
232
|
+
|
|
233
|
+
def process_question_claude(
|
|
234
|
+
self,
|
|
235
|
+
client: ClaudeClient,
|
|
236
|
+
question: str,
|
|
237
|
+
system_prompt: str,
|
|
238
|
+
document_content: str = "",
|
|
239
|
+
) -> Dict:
|
|
240
|
+
"""Process a question using Claude client."""
|
|
241
|
+
try:
|
|
242
|
+
if document_content:
|
|
243
|
+
# Include document content in the prompt
|
|
244
|
+
prompt = f"{system_prompt}\n\nDocument Content:\n{document_content}\n\nQuestion: {question}\n\nAnswer:"
|
|
245
|
+
else:
|
|
246
|
+
prompt = f"{system_prompt}\n\nQuestion: {question}\n\nAnswer:"
|
|
247
|
+
response_data = client.get_completion_with_usage(prompt)
|
|
248
|
+
|
|
249
|
+
# Extract response text
|
|
250
|
+
response = response_data["content"]
|
|
251
|
+
if isinstance(response, list):
|
|
252
|
+
response_text = (
|
|
253
|
+
response[0].text
|
|
254
|
+
if hasattr(response[0], "text")
|
|
255
|
+
else str(response[0])
|
|
256
|
+
)
|
|
257
|
+
else:
|
|
258
|
+
response_text = (
|
|
259
|
+
response.text if hasattr(response, "text") else str(response)
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
return {
|
|
263
|
+
"response": response_text.strip(),
|
|
264
|
+
"usage": response_data["usage"],
|
|
265
|
+
"cost": response_data["cost"],
|
|
266
|
+
"error": None,
|
|
267
|
+
}
|
|
268
|
+
except Exception as e:
|
|
269
|
+
self.log.error(f"Error processing question with Claude: {e}")
|
|
270
|
+
return {
|
|
271
|
+
"response": f"ERROR: {str(e)}",
|
|
272
|
+
"usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
|
|
273
|
+
"cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
|
|
274
|
+
"error": str(e),
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
def process_question_lemonade(
|
|
278
|
+
self,
|
|
279
|
+
client: LemonadeClient,
|
|
280
|
+
question: str,
|
|
281
|
+
system_prompt: str,
|
|
282
|
+
max_tokens: int,
|
|
283
|
+
temperature: float,
|
|
284
|
+
document_content: str = "",
|
|
285
|
+
) -> Dict:
|
|
286
|
+
"""Process a question using Lemonade client."""
|
|
287
|
+
try:
|
|
288
|
+
# Format prompt for QA (uses ChatML for supported models)
|
|
289
|
+
# Document content is passed separately and added to system prompt
|
|
290
|
+
formatted_prompt = format_prompt_with_template(
|
|
291
|
+
model=client.model,
|
|
292
|
+
system_prompt=system_prompt,
|
|
293
|
+
user_content=question,
|
|
294
|
+
document_content=document_content,
|
|
295
|
+
use_chat_template=True, # Use chat template for QA tasks
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Use completions method with the client's loaded model
|
|
299
|
+
response_data = client.completions(
|
|
300
|
+
model=client.model, # Use model from experiment config
|
|
301
|
+
prompt=formatted_prompt,
|
|
302
|
+
max_tokens=max_tokens,
|
|
303
|
+
temperature=temperature,
|
|
304
|
+
stream=False,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Extract text from the response
|
|
308
|
+
response_text = ""
|
|
309
|
+
if "choices" in response_data and response_data["choices"]:
|
|
310
|
+
response_text = response_data["choices"][0].get("text", "")
|
|
311
|
+
|
|
312
|
+
# Extract thinking tokens if present
|
|
313
|
+
extracted = extract_thinking_from_response(response_text)
|
|
314
|
+
|
|
315
|
+
# Get token statistics from Lemonade
|
|
316
|
+
try:
|
|
317
|
+
stats = client.get_stats()
|
|
318
|
+
input_tokens = stats.get("input_tokens", 0) if stats else 0
|
|
319
|
+
output_tokens = stats.get("output_tokens", 0) if stats else 0
|
|
320
|
+
total_tokens = input_tokens + output_tokens
|
|
321
|
+
except Exception as e:
|
|
322
|
+
self.log.warning(f"Failed to get stats from Lemonade: {e}")
|
|
323
|
+
input_tokens = output_tokens = total_tokens = 0
|
|
324
|
+
|
|
325
|
+
result = {
|
|
326
|
+
"response": extracted["response"],
|
|
327
|
+
"usage": {
|
|
328
|
+
"input_tokens": input_tokens,
|
|
329
|
+
"output_tokens": output_tokens,
|
|
330
|
+
"total_tokens": total_tokens,
|
|
331
|
+
},
|
|
332
|
+
"cost": {
|
|
333
|
+
"input_cost": 0.0,
|
|
334
|
+
"output_cost": 0.0,
|
|
335
|
+
"total_cost": 0.0,
|
|
336
|
+
}, # Local inference has no cost
|
|
337
|
+
"error": None,
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
# Add thinking tokens if present
|
|
341
|
+
if extracted["thinking"]:
|
|
342
|
+
result["thinking"] = extracted["thinking"]
|
|
343
|
+
|
|
344
|
+
return result
|
|
345
|
+
|
|
346
|
+
except Exception as e:
|
|
347
|
+
self.log.error(f"Error processing question with Lemonade: {e}")
|
|
348
|
+
return {
|
|
349
|
+
"response": f"ERROR: {str(e)}",
|
|
350
|
+
"usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
|
|
351
|
+
"cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
|
|
352
|
+
"error": str(e),
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
def _get_summary_prompts(self) -> Dict[str, str]:
|
|
356
|
+
"""Generate individual user prompts for each summary component.
|
|
357
|
+
|
|
358
|
+
Note: System prompt is passed separately when formatting with chat template.
|
|
359
|
+
These are just the user-facing questions/tasks.
|
|
360
|
+
"""
|
|
361
|
+
return {
|
|
362
|
+
"executive_summary": "Provide a brief executive summary (2-3 sentences) of the key outcomes and decisions from this transcript.",
|
|
363
|
+
"detailed_summary": "Provide a detailed summary of the transcript, covering all major topics, discussions, and outcomes in paragraph form.",
|
|
364
|
+
"action_items": "List the specific action items that were assigned during this meeting. Include who is responsible for each item when mentioned. Provide as a simple list.",
|
|
365
|
+
"key_decisions": "List the key decisions that were made during this meeting. Focus on concrete decisions and outcomes. Provide as a simple list.",
|
|
366
|
+
"participants": "List the participants mentioned in this transcript. Include their roles or titles when available. Provide as a simple list.",
|
|
367
|
+
"topics_discussed": "List the main topics and subjects that were discussed in this meeting. Provide as a simple list.",
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
def process_summarization_claude(
|
|
371
|
+
self,
|
|
372
|
+
client: ClaudeClient,
|
|
373
|
+
transcript: str,
|
|
374
|
+
system_prompt: str,
|
|
375
|
+
combined_prompt: bool = False,
|
|
376
|
+
) -> Dict:
|
|
377
|
+
"""Process summarization by making independent or combined calls for each component."""
|
|
378
|
+
try:
|
|
379
|
+
summary_prompts = self._get_summary_prompts()
|
|
380
|
+
results = {}
|
|
381
|
+
total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
|
382
|
+
total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
|
|
383
|
+
errors = []
|
|
384
|
+
|
|
385
|
+
if combined_prompt:
|
|
386
|
+
# Make a single call with all components
|
|
387
|
+
self.log.info(
|
|
388
|
+
f"Summarizing transcript with 1 combined model call for: {', '.join(summary_prompts.keys())}"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
# Build combined prompt
|
|
392
|
+
combined_request = f"{system_prompt}\n\nPlease provide the following summaries for the transcript:\n\n"
|
|
393
|
+
for component, user_prompt in summary_prompts.items():
|
|
394
|
+
combined_request += f"**{component.upper()}**:\n{user_prompt}\n\n"
|
|
395
|
+
combined_request += f"\nTranscript:\n{transcript}\n\nPlease structure your response with clear headers for each section."
|
|
396
|
+
|
|
397
|
+
response_data = client.get_completion_with_usage(combined_request)
|
|
398
|
+
|
|
399
|
+
# Extract response text
|
|
400
|
+
response = response_data["content"]
|
|
401
|
+
if isinstance(response, list):
|
|
402
|
+
response_text = (
|
|
403
|
+
response[0].text
|
|
404
|
+
if hasattr(response[0], "text")
|
|
405
|
+
else str(response[0])
|
|
406
|
+
)
|
|
407
|
+
else:
|
|
408
|
+
response_text = (
|
|
409
|
+
response.text if hasattr(response, "text") else str(response)
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Parse response into components
|
|
413
|
+
for component in summary_prompts.keys():
|
|
414
|
+
# Try to extract each component from the combined response
|
|
415
|
+
component_upper = component.upper()
|
|
416
|
+
start_markers = [
|
|
417
|
+
f"**{component_upper}**:",
|
|
418
|
+
f"{component_upper}:",
|
|
419
|
+
f"# {component_upper}",
|
|
420
|
+
f"## {component_upper}",
|
|
421
|
+
]
|
|
422
|
+
|
|
423
|
+
section_text = ""
|
|
424
|
+
for marker in start_markers:
|
|
425
|
+
if marker in response_text:
|
|
426
|
+
start_idx = response_text.find(marker) + len(marker)
|
|
427
|
+
# Find the next section or end
|
|
428
|
+
end_idx = len(response_text)
|
|
429
|
+
for other_component in summary_prompts.keys():
|
|
430
|
+
if other_component == component:
|
|
431
|
+
continue
|
|
432
|
+
other_upper = other_component.upper()
|
|
433
|
+
for other_marker in [
|
|
434
|
+
f"**{other_upper}**:",
|
|
435
|
+
f"{other_upper}:",
|
|
436
|
+
f"# {other_upper}",
|
|
437
|
+
f"## {other_upper}",
|
|
438
|
+
]:
|
|
439
|
+
idx = response_text.find(other_marker, start_idx)
|
|
440
|
+
if idx != -1 and idx < end_idx:
|
|
441
|
+
end_idx = idx
|
|
442
|
+
section_text = response_text[start_idx:end_idx].strip()
|
|
443
|
+
break
|
|
444
|
+
|
|
445
|
+
results[component] = (
|
|
446
|
+
section_text if section_text else response_text.strip()
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# Use combined usage and cost
|
|
450
|
+
if response_data["usage"]:
|
|
451
|
+
total_usage = response_data["usage"]
|
|
452
|
+
if response_data["cost"]:
|
|
453
|
+
total_cost = response_data["cost"]
|
|
454
|
+
|
|
455
|
+
else:
|
|
456
|
+
# Original behavior: independent calls
|
|
457
|
+
self.log.info(
|
|
458
|
+
f"Summarizing transcript with {len(summary_prompts)} independent model calls: {', '.join(summary_prompts.keys())}"
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
for component, user_prompt in summary_prompts.items():
|
|
462
|
+
try:
|
|
463
|
+
# Create full prompt with system prompt, user prompt, and transcript
|
|
464
|
+
full_prompt = f"{system_prompt}\n\n{user_prompt}\n\nTranscript:\n{transcript}\n\nResponse:"
|
|
465
|
+
|
|
466
|
+
response_data = client.get_completion_with_usage(full_prompt)
|
|
467
|
+
|
|
468
|
+
# Extract response text
|
|
469
|
+
response = response_data["content"]
|
|
470
|
+
if isinstance(response, list):
|
|
471
|
+
response_text = (
|
|
472
|
+
response[0].text
|
|
473
|
+
if hasattr(response[0], "text")
|
|
474
|
+
else str(response[0])
|
|
475
|
+
)
|
|
476
|
+
else:
|
|
477
|
+
response_text = (
|
|
478
|
+
response.text
|
|
479
|
+
if hasattr(response, "text")
|
|
480
|
+
else str(response)
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
results[component] = response_text.strip()
|
|
484
|
+
|
|
485
|
+
# Accumulate usage and cost
|
|
486
|
+
if response_data["usage"]:
|
|
487
|
+
for key in total_usage:
|
|
488
|
+
total_usage[key] += response_data["usage"].get(key, 0)
|
|
489
|
+
if response_data["cost"]:
|
|
490
|
+
for key in total_cost:
|
|
491
|
+
total_cost[key] += response_data["cost"].get(key, 0.0)
|
|
492
|
+
|
|
493
|
+
# Small delay between component calls to avoid rate limiting
|
|
494
|
+
time.sleep(0.5)
|
|
495
|
+
|
|
496
|
+
except Exception as e:
|
|
497
|
+
self.log.error(f"Error processing {component} with Claude: {e}")
|
|
498
|
+
results[component] = f"ERROR: {str(e)}"
|
|
499
|
+
errors.append(f"{component}: {str(e)}")
|
|
500
|
+
|
|
501
|
+
return {
|
|
502
|
+
"response": results,
|
|
503
|
+
"usage": total_usage,
|
|
504
|
+
"cost": total_cost,
|
|
505
|
+
"error": "; ".join(errors) if errors else None,
|
|
506
|
+
}
|
|
507
|
+
except Exception as e:
|
|
508
|
+
self.log.error(f"Error in independent summarization with Claude: {e}")
|
|
509
|
+
return {
|
|
510
|
+
"response": f"ERROR: {str(e)}",
|
|
511
|
+
"usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
|
|
512
|
+
"cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
|
|
513
|
+
"error": str(e),
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
def process_summarization_lemonade(
|
|
517
|
+
self,
|
|
518
|
+
client: LemonadeClient,
|
|
519
|
+
transcript: str,
|
|
520
|
+
system_prompt: str,
|
|
521
|
+
max_tokens: int,
|
|
522
|
+
temperature: float,
|
|
523
|
+
combined_prompt: bool = False,
|
|
524
|
+
extra_params: Dict[str, Any] = None,
|
|
525
|
+
) -> Dict:
|
|
526
|
+
"""Process summarization by making independent or combined calls for each component."""
|
|
527
|
+
try:
|
|
528
|
+
summary_prompts = self._get_summary_prompts()
|
|
529
|
+
results = {}
|
|
530
|
+
total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
|
531
|
+
total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
|
|
532
|
+
errors = []
|
|
533
|
+
|
|
534
|
+
# Prepare extra parameters (like stop sequences)
|
|
535
|
+
if extra_params is None:
|
|
536
|
+
extra_params = {}
|
|
537
|
+
|
|
538
|
+
if combined_prompt:
|
|
539
|
+
# Make a single call with all components
|
|
540
|
+
self.log.info(
|
|
541
|
+
f"Summarizing transcript with 1 combined model call for: {', '.join(summary_prompts.keys())}"
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
# Build user request for all components
|
|
545
|
+
user_request = (
|
|
546
|
+
"Please provide the following summaries for the transcript:\n\n"
|
|
547
|
+
)
|
|
548
|
+
for component, user_prompt in summary_prompts.items():
|
|
549
|
+
user_request += f"**{component.upper()}**:\n{user_prompt}\n\n"
|
|
550
|
+
user_request += "\nPlease structure your response with clear headers for each section."
|
|
551
|
+
|
|
552
|
+
# Format prompt for summarization using chat template
|
|
553
|
+
formatted_prompt = format_prompt_with_template(
|
|
554
|
+
model=client.model,
|
|
555
|
+
system_prompt=system_prompt,
|
|
556
|
+
user_content=user_request,
|
|
557
|
+
document_content=transcript, # Pass transcript as context
|
|
558
|
+
use_chat_template=True, # Use chat template for summarization
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
response_data = client.completions(
|
|
562
|
+
model=client.model,
|
|
563
|
+
prompt=formatted_prompt,
|
|
564
|
+
max_tokens=max_tokens,
|
|
565
|
+
temperature=temperature,
|
|
566
|
+
stream=False,
|
|
567
|
+
**extra_params, # Pass stop sequences and other params
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
# Extract text from the response
|
|
571
|
+
response_text = ""
|
|
572
|
+
if "choices" in response_data and response_data["choices"]:
|
|
573
|
+
response_text = response_data["choices"][0].get("text", "")
|
|
574
|
+
|
|
575
|
+
# Extract thinking tokens if present
|
|
576
|
+
extracted = extract_thinking_from_response(response_text)
|
|
577
|
+
response_text = extracted["response"]
|
|
578
|
+
thinking_content = extracted["thinking"]
|
|
579
|
+
|
|
580
|
+
# Get token statistics from Lemonade
|
|
581
|
+
try:
|
|
582
|
+
stats = client.get_stats()
|
|
583
|
+
input_tokens = stats.get("input_tokens", 0) if stats else 0
|
|
584
|
+
output_tokens = stats.get("output_tokens", 0) if stats else 0
|
|
585
|
+
total_tokens = input_tokens + output_tokens
|
|
586
|
+
total_usage = {
|
|
587
|
+
"input_tokens": input_tokens,
|
|
588
|
+
"output_tokens": output_tokens,
|
|
589
|
+
"total_tokens": total_tokens,
|
|
590
|
+
}
|
|
591
|
+
except Exception as e:
|
|
592
|
+
self.log.warning(f"Failed to get stats from Lemonade: {e}")
|
|
593
|
+
total_usage = {
|
|
594
|
+
"input_tokens": 0,
|
|
595
|
+
"output_tokens": 0,
|
|
596
|
+
"total_tokens": 0,
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
# Parse response into components
|
|
600
|
+
for component in summary_prompts.keys():
|
|
601
|
+
# Try to extract each component from the combined response
|
|
602
|
+
component_upper = component.upper()
|
|
603
|
+
start_markers = [
|
|
604
|
+
f"**{component_upper}**:",
|
|
605
|
+
f"{component_upper}:",
|
|
606
|
+
f"# {component_upper}",
|
|
607
|
+
f"## {component_upper}",
|
|
608
|
+
]
|
|
609
|
+
|
|
610
|
+
section_text = ""
|
|
611
|
+
for marker in start_markers:
|
|
612
|
+
if marker in response_text:
|
|
613
|
+
start_idx = response_text.find(marker) + len(marker)
|
|
614
|
+
# Find the next section or end
|
|
615
|
+
end_idx = len(response_text)
|
|
616
|
+
for other_component in summary_prompts.keys():
|
|
617
|
+
if other_component == component:
|
|
618
|
+
continue
|
|
619
|
+
other_upper = other_component.upper()
|
|
620
|
+
for other_marker in [
|
|
621
|
+
f"**{other_upper}**:",
|
|
622
|
+
f"{other_upper}:",
|
|
623
|
+
f"# {other_upper}",
|
|
624
|
+
f"## {other_upper}",
|
|
625
|
+
]:
|
|
626
|
+
idx = response_text.find(other_marker, start_idx)
|
|
627
|
+
if idx != -1 and idx < end_idx:
|
|
628
|
+
end_idx = idx
|
|
629
|
+
section_text = response_text[start_idx:end_idx].strip()
|
|
630
|
+
break
|
|
631
|
+
|
|
632
|
+
results[component] = (
|
|
633
|
+
section_text if section_text else response_text.strip()
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# Total usage already calculated above, cost is always 0 for local
|
|
637
|
+
total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
|
|
638
|
+
|
|
639
|
+
# Store thinking content if present (for combined mode)
|
|
640
|
+
if thinking_content:
|
|
641
|
+
results["_thinking"] = thinking_content
|
|
642
|
+
|
|
643
|
+
else:
|
|
644
|
+
# Original behavior: independent calls
|
|
645
|
+
self.log.info(
|
|
646
|
+
f"Summarizing transcript with {len(summary_prompts)} independent model calls: {', '.join(summary_prompts.keys())}"
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
for component, user_prompt in summary_prompts.items():
|
|
650
|
+
try:
|
|
651
|
+
# Format using chat template with separate system prompt and user prompt
|
|
652
|
+
formatted_prompt = format_prompt_with_template(
|
|
653
|
+
model=client.model,
|
|
654
|
+
system_prompt=system_prompt,
|
|
655
|
+
user_content=user_prompt,
|
|
656
|
+
document_content=transcript,
|
|
657
|
+
use_chat_template=True, # Use chat template for summarization
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
response_data = client.completions(
|
|
661
|
+
model=client.model,
|
|
662
|
+
prompt=formatted_prompt,
|
|
663
|
+
max_tokens=max_tokens,
|
|
664
|
+
temperature=temperature,
|
|
665
|
+
stream=False,
|
|
666
|
+
**extra_params, # Pass stop sequences and other params
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# Extract text from the response
|
|
670
|
+
response_text = ""
|
|
671
|
+
if "choices" in response_data and response_data["choices"]:
|
|
672
|
+
response_text = response_data["choices"][0].get("text", "")
|
|
673
|
+
|
|
674
|
+
# Extract thinking tokens if present
|
|
675
|
+
extracted = extract_thinking_from_response(response_text)
|
|
676
|
+
|
|
677
|
+
results[component] = extracted["response"]
|
|
678
|
+
|
|
679
|
+
# Get token statistics from Lemonade
|
|
680
|
+
try:
|
|
681
|
+
stats = client.get_stats()
|
|
682
|
+
if stats:
|
|
683
|
+
total_usage["input_tokens"] += stats.get(
|
|
684
|
+
"input_tokens", 0
|
|
685
|
+
)
|
|
686
|
+
total_usage["output_tokens"] += stats.get(
|
|
687
|
+
"output_tokens", 0
|
|
688
|
+
)
|
|
689
|
+
total_usage["total_tokens"] += stats.get(
|
|
690
|
+
"input_tokens", 0
|
|
691
|
+
) + stats.get("output_tokens", 0)
|
|
692
|
+
except Exception as e:
|
|
693
|
+
self.log.warning(f"Failed to get stats from Lemonade: {e}")
|
|
694
|
+
|
|
695
|
+
# Small delay between component calls to avoid rate limiting
|
|
696
|
+
time.sleep(0.5)
|
|
697
|
+
|
|
698
|
+
except Exception as e:
|
|
699
|
+
self.log.error(
|
|
700
|
+
f"Error processing {component} with Lemonade: {e}"
|
|
701
|
+
)
|
|
702
|
+
results[component] = f"ERROR: {str(e)}"
|
|
703
|
+
errors.append(f"{component}: {str(e)}")
|
|
704
|
+
|
|
705
|
+
result_dict = {
|
|
706
|
+
"response": results,
|
|
707
|
+
"usage": total_usage,
|
|
708
|
+
"cost": total_cost,
|
|
709
|
+
"error": "; ".join(errors) if errors else None,
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
# Add thinking if present (stored with key "_thinking" in results dict)
|
|
713
|
+
if "_thinking" in results:
|
|
714
|
+
result_dict["thinking"] = results.pop("_thinking")
|
|
715
|
+
|
|
716
|
+
return result_dict
|
|
717
|
+
|
|
718
|
+
except Exception as e:
|
|
719
|
+
self.log.error(f"Error in independent summarization with Lemonade: {e}")
|
|
720
|
+
return {
|
|
721
|
+
"response": f"ERROR: {str(e)}",
|
|
722
|
+
"usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
|
|
723
|
+
"cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
|
|
724
|
+
"error": str(e),
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
def check_experiment_exists(
|
|
728
|
+
self, experiment: ExperimentConfig, output_dir: str
|
|
729
|
+
) -> bool:
|
|
730
|
+
"""Check if an experiment file already exists in the output directory.
|
|
731
|
+
|
|
732
|
+
Args:
|
|
733
|
+
experiment: The experiment configuration
|
|
734
|
+
output_dir: The output directory path
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
True if experiment file exists, False otherwise
|
|
738
|
+
"""
|
|
739
|
+
output_base_path = Path(output_dir)
|
|
740
|
+
|
|
741
|
+
# Generate the same safe filename that would be used for the output
|
|
742
|
+
safe_name = "".join(
|
|
743
|
+
c if (c.isalnum() or c in (" ", "-", "_")) else "_" if c == "." else ""
|
|
744
|
+
for c in experiment.name
|
|
745
|
+
).rstrip()
|
|
746
|
+
safe_name = safe_name.replace(" ", "_")
|
|
747
|
+
|
|
748
|
+
# Check for consolidated file
|
|
749
|
+
consolidated_filename = f"{safe_name}.experiment.json"
|
|
750
|
+
consolidated_path = output_base_path / consolidated_filename
|
|
751
|
+
|
|
752
|
+
if consolidated_path.exists():
|
|
753
|
+
self.log.info(f"Experiment file already exists: {consolidated_path}")
|
|
754
|
+
return True
|
|
755
|
+
|
|
756
|
+
return False
|
|
757
|
+
|
|
758
|
+
def load_data_from_source(
|
|
759
|
+
self, input_path: str, experiment_type: str = "qa", queries_source: str = None
|
|
760
|
+
) -> List[Dict]:
|
|
761
|
+
"""Load data from various input sources: groundtruth files, transcript files, or directories."""
|
|
762
|
+
input_path = Path(input_path)
|
|
763
|
+
|
|
764
|
+
self.log.info(f"Loading data from source: {input_path}, type={experiment_type}")
|
|
765
|
+
self.log.info(
|
|
766
|
+
f"Input path exists: {input_path.exists()}, is_file: {input_path.is_file()}, suffix: {input_path.suffix}"
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
try:
|
|
770
|
+
if input_path.is_file():
|
|
771
|
+
if input_path.suffix == ".json":
|
|
772
|
+
# Handle groundtruth JSON files
|
|
773
|
+
return self._load_from_groundtruth_file(
|
|
774
|
+
str(input_path), experiment_type
|
|
775
|
+
)
|
|
776
|
+
else:
|
|
777
|
+
# Handle individual transcript files
|
|
778
|
+
return self._load_from_transcript_file(
|
|
779
|
+
str(input_path), experiment_type, queries_source
|
|
780
|
+
)
|
|
781
|
+
elif input_path.is_dir():
|
|
782
|
+
# Handle directories of transcript files
|
|
783
|
+
return self._load_from_transcript_directory(
|
|
784
|
+
str(input_path), experiment_type, queries_source
|
|
785
|
+
)
|
|
786
|
+
else:
|
|
787
|
+
raise FileNotFoundError(f"Input path not found: {input_path}")
|
|
788
|
+
|
|
789
|
+
except Exception as e:
|
|
790
|
+
self.log.error(f"Error loading data from source: {e}")
|
|
791
|
+
raise
|
|
792
|
+
|
|
793
|
+
def _load_queries_from_groundtruth(self, groundtruth_file: str) -> List[str]:
|
|
794
|
+
"""Extract queries from a groundtruth file for use with raw transcripts."""
|
|
795
|
+
with open(groundtruth_file, "r", encoding="utf-8") as f:
|
|
796
|
+
groundtruth_data = json.load(f)
|
|
797
|
+
|
|
798
|
+
analysis = groundtruth_data.get("analysis", {})
|
|
799
|
+
qa_pairs = analysis.get("qa_pairs", [])
|
|
800
|
+
|
|
801
|
+
if not qa_pairs:
|
|
802
|
+
raise ValueError(
|
|
803
|
+
f"No QA pairs found in groundtruth file: {groundtruth_file}"
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
queries = []
|
|
807
|
+
for qa_pair in qa_pairs:
|
|
808
|
+
query = qa_pair.get("query", qa_pair.get("question", ""))
|
|
809
|
+
if query:
|
|
810
|
+
queries.append(query)
|
|
811
|
+
|
|
812
|
+
if not queries:
|
|
813
|
+
raise ValueError(
|
|
814
|
+
f"No valid queries found in groundtruth file: {groundtruth_file}"
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
return queries
|
|
818
|
+
|
|
819
|
+
def _get_default_queries(self) -> List[str]:
|
|
820
|
+
"""Return default questions for QA experiments on raw transcripts."""
|
|
821
|
+
return [
|
|
822
|
+
"What were the main topics discussed in this meeting?",
|
|
823
|
+
"What action items were assigned and to whom?",
|
|
824
|
+
"What decisions were made during this meeting?",
|
|
825
|
+
"Who participated in this meeting and what were their roles?",
|
|
826
|
+
"What are the next steps or follow-up items?",
|
|
827
|
+
]
|
|
828
|
+
|
|
829
|
+
def _load_from_groundtruth_file(
|
|
830
|
+
self, groundtruth_file: str, experiment_type: str
|
|
831
|
+
) -> List[Dict]:
|
|
832
|
+
"""Load data from a groundtruth JSON file (individual or consolidated)."""
|
|
833
|
+
self.log.info(
|
|
834
|
+
f"Loading groundtruth file: {groundtruth_file} for experiment type: {experiment_type}"
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
with open(groundtruth_file, "r", encoding="utf-8") as f:
|
|
838
|
+
groundtruth_data = json.load(f)
|
|
839
|
+
|
|
840
|
+
analysis = groundtruth_data.get("analysis", {})
|
|
841
|
+
metadata = groundtruth_data.get("metadata", {})
|
|
842
|
+
|
|
843
|
+
# Check if this is a consolidated groundtruth file
|
|
844
|
+
is_consolidated = "consolidated_from" in metadata or "source_files" in metadata
|
|
845
|
+
|
|
846
|
+
self.log.info(f"Metadata keys: {list(metadata.keys())}")
|
|
847
|
+
self.log.info(f"Is consolidated: {is_consolidated}")
|
|
848
|
+
|
|
849
|
+
if is_consolidated:
|
|
850
|
+
return self._load_from_consolidated_groundtruth(
|
|
851
|
+
groundtruth_data, experiment_type
|
|
852
|
+
)
|
|
853
|
+
else:
|
|
854
|
+
return self._load_from_individual_groundtruth(
|
|
855
|
+
groundtruth_data, experiment_type
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
def _load_from_individual_groundtruth(
|
|
859
|
+
self, groundtruth_data: Dict, experiment_type: str
|
|
860
|
+
) -> List[Dict]:
|
|
861
|
+
"""Load data from an individual groundtruth file."""
|
|
862
|
+
analysis = groundtruth_data.get("analysis", {})
|
|
863
|
+
metadata = groundtruth_data.get("metadata", {})
|
|
864
|
+
|
|
865
|
+
if experiment_type == "qa":
|
|
866
|
+
# Extract QA pairs from groundtruth
|
|
867
|
+
qa_pairs = analysis.get("qa_pairs", [])
|
|
868
|
+
|
|
869
|
+
if not qa_pairs:
|
|
870
|
+
raise ValueError(
|
|
871
|
+
"No QA pairs found in groundtruth file for QA experiment"
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
data = []
|
|
875
|
+
for qa_pair in qa_pairs:
|
|
876
|
+
data.append(
|
|
877
|
+
{
|
|
878
|
+
"type": "qa",
|
|
879
|
+
"query": qa_pair.get("query", qa_pair.get("question", "")),
|
|
880
|
+
"ground_truth": qa_pair.get(
|
|
881
|
+
"response", qa_pair.get("answer", "")
|
|
882
|
+
),
|
|
883
|
+
}
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
return data
|
|
887
|
+
|
|
888
|
+
elif experiment_type == "summarization":
|
|
889
|
+
# Extract transcript content and summaries from groundtruth
|
|
890
|
+
summaries = analysis.get("summaries", {})
|
|
891
|
+
|
|
892
|
+
if not summaries:
|
|
893
|
+
raise ValueError(
|
|
894
|
+
"No summaries found in groundtruth file for summarization experiment"
|
|
895
|
+
)
|
|
896
|
+
|
|
897
|
+
# Get the source transcript content
|
|
898
|
+
source_file = metadata.get("source_file", "")
|
|
899
|
+
|
|
900
|
+
# Read transcript content
|
|
901
|
+
if not source_file or not Path(source_file).exists():
|
|
902
|
+
raise ValueError(f"Source transcript file not found: {source_file}")
|
|
903
|
+
|
|
904
|
+
with open(source_file, "r", encoding="utf-8") as f:
|
|
905
|
+
transcript_content = f.read().strip()
|
|
906
|
+
|
|
907
|
+
if not transcript_content:
|
|
908
|
+
raise ValueError(f"Empty transcript file: {source_file}")
|
|
909
|
+
|
|
910
|
+
data = [
|
|
911
|
+
{
|
|
912
|
+
"type": "summarization",
|
|
913
|
+
"transcript": transcript_content,
|
|
914
|
+
"groundtruth_summaries": summaries,
|
|
915
|
+
"source_file": source_file,
|
|
916
|
+
}
|
|
917
|
+
]
|
|
918
|
+
|
|
919
|
+
return data
|
|
920
|
+
|
|
921
|
+
else:
|
|
922
|
+
raise ValueError(f"Unsupported experiment type: {experiment_type}")
|
|
923
|
+
|
|
924
|
+
def _load_from_consolidated_groundtruth(
|
|
925
|
+
self, groundtruth_data: Dict, experiment_type: str
|
|
926
|
+
) -> List[Dict]:
|
|
927
|
+
"""Load data from a consolidated groundtruth file."""
|
|
928
|
+
analysis = groundtruth_data.get("analysis", {})
|
|
929
|
+
metadata = groundtruth_data.get("metadata", {})
|
|
930
|
+
|
|
931
|
+
self.log.info(
|
|
932
|
+
f"Loading consolidated groundtruth for experiment type: {experiment_type}"
|
|
933
|
+
)
|
|
934
|
+
self.log.info(f"Metadata keys: {list(metadata.keys())}")
|
|
935
|
+
self.log.info(f"Analysis keys: {list(analysis.keys())}")
|
|
936
|
+
|
|
937
|
+
if experiment_type == "qa":
|
|
938
|
+
# For consolidated QA files, extract QA pairs from all items
|
|
939
|
+
data = []
|
|
940
|
+
|
|
941
|
+
# Get source file information from metadata for document loading
|
|
942
|
+
source_files_map = {}
|
|
943
|
+
if "source_files" in metadata:
|
|
944
|
+
for source_info in metadata["source_files"]:
|
|
945
|
+
doc_id = source_info.get("transcript_id", "")
|
|
946
|
+
source_file = source_info.get("source_file", "")
|
|
947
|
+
if doc_id and source_file:
|
|
948
|
+
source_files_map[doc_id] = source_file
|
|
949
|
+
|
|
950
|
+
# Cache for document content to avoid loading same document multiple times
|
|
951
|
+
document_content_cache = {}
|
|
952
|
+
|
|
953
|
+
# Check if analysis contains direct qa_pairs (can be dict or list)
|
|
954
|
+
if "qa_pairs" in analysis:
|
|
955
|
+
qa_pairs = analysis["qa_pairs"]
|
|
956
|
+
|
|
957
|
+
# Handle dict format (consolidated files key by document ID)
|
|
958
|
+
if isinstance(qa_pairs, dict):
|
|
959
|
+
for doc_id, doc_qa_pairs in qa_pairs.items():
|
|
960
|
+
# Try to load the source document content
|
|
961
|
+
document_content = ""
|
|
962
|
+
source_file = source_files_map.get(doc_id, "")
|
|
963
|
+
|
|
964
|
+
# Check cache first
|
|
965
|
+
if source_file in document_content_cache:
|
|
966
|
+
document_content = document_content_cache[source_file]
|
|
967
|
+
elif source_file:
|
|
968
|
+
source_path = Path(source_file)
|
|
969
|
+
if source_path.exists():
|
|
970
|
+
try:
|
|
971
|
+
# Handle PDF files
|
|
972
|
+
if source_path.suffix.lower() == ".pdf":
|
|
973
|
+
self.log.info(
|
|
974
|
+
f"PDF file detected: {source_path}"
|
|
975
|
+
)
|
|
976
|
+
# Use local PDF extraction
|
|
977
|
+
document_content = self._extract_text_from_pdf(
|
|
978
|
+
str(source_path)
|
|
979
|
+
)
|
|
980
|
+
# Handle text files
|
|
981
|
+
else:
|
|
982
|
+
with open(
|
|
983
|
+
source_path, "r", encoding="utf-8"
|
|
984
|
+
) as f:
|
|
985
|
+
document_content = f.read()
|
|
986
|
+
|
|
987
|
+
# Cache the content
|
|
988
|
+
document_content_cache[source_file] = (
|
|
989
|
+
document_content
|
|
990
|
+
)
|
|
991
|
+
|
|
992
|
+
except Exception as e:
|
|
993
|
+
self.log.warning(
|
|
994
|
+
f"Failed to load document {source_path}: {e}"
|
|
995
|
+
)
|
|
996
|
+
document_content = ""
|
|
997
|
+
else:
|
|
998
|
+
self.log.warning(
|
|
999
|
+
f"Source document not found: {source_path}"
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
for qa_pair in doc_qa_pairs:
|
|
1003
|
+
data.append(
|
|
1004
|
+
{
|
|
1005
|
+
"type": "qa",
|
|
1006
|
+
"query": qa_pair.get(
|
|
1007
|
+
"query", qa_pair.get("question", "")
|
|
1008
|
+
),
|
|
1009
|
+
"ground_truth": qa_pair.get(
|
|
1010
|
+
"response", qa_pair.get("answer", "")
|
|
1011
|
+
),
|
|
1012
|
+
"source_item": doc_id,
|
|
1013
|
+
"document_content": document_content,
|
|
1014
|
+
"source_file": source_file,
|
|
1015
|
+
}
|
|
1016
|
+
)
|
|
1017
|
+
# Handle list format (non-consolidated files)
|
|
1018
|
+
elif isinstance(qa_pairs, list):
|
|
1019
|
+
for qa_pair in qa_pairs:
|
|
1020
|
+
data.append(
|
|
1021
|
+
{
|
|
1022
|
+
"type": "qa",
|
|
1023
|
+
"query": qa_pair.get(
|
|
1024
|
+
"query", qa_pair.get("question", "")
|
|
1025
|
+
),
|
|
1026
|
+
"ground_truth": qa_pair.get(
|
|
1027
|
+
"response", qa_pair.get("answer", "")
|
|
1028
|
+
),
|
|
1029
|
+
}
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
# Also check for nested structure (qa_pairs within individual summaries)
|
|
1033
|
+
summaries = analysis.get("summaries", {})
|
|
1034
|
+
for item_id, item_data in summaries.items():
|
|
1035
|
+
if "qa_pairs" in item_data:
|
|
1036
|
+
for qa_pair in item_data["qa_pairs"]:
|
|
1037
|
+
data.append(
|
|
1038
|
+
{
|
|
1039
|
+
"type": "qa",
|
|
1040
|
+
"query": qa_pair.get(
|
|
1041
|
+
"query", qa_pair.get("question", "")
|
|
1042
|
+
),
|
|
1043
|
+
"ground_truth": qa_pair.get(
|
|
1044
|
+
"response", qa_pair.get("answer", "")
|
|
1045
|
+
),
|
|
1046
|
+
"source_item": item_id,
|
|
1047
|
+
}
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
if not data:
|
|
1051
|
+
raise ValueError(
|
|
1052
|
+
"No QA pairs found in consolidated groundtruth file for QA experiment"
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
return data
|
|
1056
|
+
|
|
1057
|
+
elif experiment_type == "summarization":
|
|
1058
|
+
# For consolidated summarization files, create separate items for each source
|
|
1059
|
+
summaries = analysis.get("summaries", {})
|
|
1060
|
+
|
|
1061
|
+
self.log.info(f"Found {len(summaries)} summaries in consolidated file")
|
|
1062
|
+
|
|
1063
|
+
if not summaries:
|
|
1064
|
+
raise ValueError(
|
|
1065
|
+
"No summaries found in consolidated groundtruth file for summarization experiment"
|
|
1066
|
+
)
|
|
1067
|
+
|
|
1068
|
+
data = []
|
|
1069
|
+
source_files_info = metadata.get("source_files", [])
|
|
1070
|
+
|
|
1071
|
+
self.log.info(f"Found {len(source_files_info)} source files in metadata")
|
|
1072
|
+
|
|
1073
|
+
# Create a mapping of transcript_id to source file info
|
|
1074
|
+
source_file_map = {}
|
|
1075
|
+
for source_info in source_files_info:
|
|
1076
|
+
transcript_id = source_info.get("transcript_id", "")
|
|
1077
|
+
source_file_map[transcript_id] = source_info
|
|
1078
|
+
self.log.info(
|
|
1079
|
+
f"Mapped transcript_id '{transcript_id}' to source file '{source_info.get('source_file', '')}'"
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
self.log.info(
|
|
1083
|
+
f"Created source file map with {len(source_file_map)} entries"
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
for item_id, item_summaries in summaries.items():
|
|
1087
|
+
# Get source file information
|
|
1088
|
+
source_info = source_file_map.get(item_id, {})
|
|
1089
|
+
source_file = source_info.get("source_file", "")
|
|
1090
|
+
|
|
1091
|
+
self.log.info(
|
|
1092
|
+
f"Processing item {item_id}, source file: '{source_file}'"
|
|
1093
|
+
)
|
|
1094
|
+
|
|
1095
|
+
if not source_file:
|
|
1096
|
+
self.log.warning(
|
|
1097
|
+
f"No source file found for item {item_id}, skipping"
|
|
1098
|
+
)
|
|
1099
|
+
continue
|
|
1100
|
+
|
|
1101
|
+
# Normalize path separators for current platform
|
|
1102
|
+
source_file = source_file.replace("\\", "/")
|
|
1103
|
+
source_path = Path(source_file)
|
|
1104
|
+
|
|
1105
|
+
self.log.info(
|
|
1106
|
+
f"Normalized source path: '{source_path}', exists: {source_path.exists()}"
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
# Read transcript content
|
|
1110
|
+
if not source_path.exists():
|
|
1111
|
+
self.log.warning(
|
|
1112
|
+
f"Source transcript file not found: {source_path}, skipping {item_id}"
|
|
1113
|
+
)
|
|
1114
|
+
continue
|
|
1115
|
+
|
|
1116
|
+
with open(source_path, "r", encoding="utf-8") as f:
|
|
1117
|
+
transcript_content = f.read().strip()
|
|
1118
|
+
|
|
1119
|
+
if not transcript_content:
|
|
1120
|
+
self.log.warning(
|
|
1121
|
+
f"Empty transcript file: {source_path}, skipping {item_id}"
|
|
1122
|
+
)
|
|
1123
|
+
continue
|
|
1124
|
+
|
|
1125
|
+
data.append(
|
|
1126
|
+
{
|
|
1127
|
+
"type": "summarization",
|
|
1128
|
+
"transcript": transcript_content,
|
|
1129
|
+
"groundtruth_summaries": item_summaries,
|
|
1130
|
+
"source_file": str(source_path),
|
|
1131
|
+
"item_id": item_id,
|
|
1132
|
+
}
|
|
1133
|
+
)
|
|
1134
|
+
|
|
1135
|
+
if not data:
|
|
1136
|
+
raise ValueError(
|
|
1137
|
+
"No valid data items found in consolidated groundtruth file for summarization experiment"
|
|
1138
|
+
)
|
|
1139
|
+
|
|
1140
|
+
return data
|
|
1141
|
+
|
|
1142
|
+
else:
|
|
1143
|
+
raise ValueError(f"Unsupported experiment type: {experiment_type}")
|
|
1144
|
+
|
|
1145
|
+
def _load_from_transcript_file(
|
|
1146
|
+
self, transcript_file: str, experiment_type: str, queries_source: str = None
|
|
1147
|
+
) -> List[Dict]:
|
|
1148
|
+
"""Load data from a single transcript file."""
|
|
1149
|
+
with open(transcript_file, "r", encoding="utf-8") as f:
|
|
1150
|
+
transcript_content = f.read().strip()
|
|
1151
|
+
|
|
1152
|
+
if not transcript_content:
|
|
1153
|
+
raise ValueError(f"Empty transcript file: {transcript_file}")
|
|
1154
|
+
|
|
1155
|
+
if experiment_type == "qa":
|
|
1156
|
+
# Get queries from groundtruth source
|
|
1157
|
+
if not queries_source:
|
|
1158
|
+
queries = self._get_default_queries()
|
|
1159
|
+
else:
|
|
1160
|
+
queries = self._load_queries_from_groundtruth(queries_source)
|
|
1161
|
+
self.log.info(
|
|
1162
|
+
f"Loaded {len(queries)} queries from groundtruth source: {queries_source}"
|
|
1163
|
+
)
|
|
1164
|
+
|
|
1165
|
+
# For QA experiments on raw transcripts, we can't provide ground truth
|
|
1166
|
+
# The experiment will generate responses that can be manually evaluated
|
|
1167
|
+
return [
|
|
1168
|
+
{
|
|
1169
|
+
"type": "qa_raw",
|
|
1170
|
+
"transcript": transcript_content,
|
|
1171
|
+
"source_file": transcript_file,
|
|
1172
|
+
"queries": queries,
|
|
1173
|
+
}
|
|
1174
|
+
]
|
|
1175
|
+
|
|
1176
|
+
elif experiment_type == "summarization":
|
|
1177
|
+
return [
|
|
1178
|
+
{
|
|
1179
|
+
"type": "summarization",
|
|
1180
|
+
"transcript": transcript_content,
|
|
1181
|
+
"source_file": transcript_file,
|
|
1182
|
+
}
|
|
1183
|
+
]
|
|
1184
|
+
|
|
1185
|
+
else:
|
|
1186
|
+
raise ValueError(f"Unsupported experiment type: {experiment_type}")
|
|
1187
|
+
|
|
1188
|
+
def _load_from_transcript_directory(
|
|
1189
|
+
self, transcript_dir: str, experiment_type: str, queries_source: str = None
|
|
1190
|
+
) -> List[Dict]:
|
|
1191
|
+
"""Load data from a directory of transcript files."""
|
|
1192
|
+
transcript_dir = Path(transcript_dir)
|
|
1193
|
+
|
|
1194
|
+
# Find all text files in directory (recursively)
|
|
1195
|
+
transcript_files = list(transcript_dir.rglob("*.txt"))
|
|
1196
|
+
if not transcript_files:
|
|
1197
|
+
raise ValueError(f"No .txt files found in directory: {transcript_dir}")
|
|
1198
|
+
|
|
1199
|
+
data = []
|
|
1200
|
+
for transcript_file in transcript_files:
|
|
1201
|
+
file_data = self._load_from_transcript_file(
|
|
1202
|
+
str(transcript_file), experiment_type, queries_source
|
|
1203
|
+
)
|
|
1204
|
+
data.extend(file_data)
|
|
1205
|
+
|
|
1206
|
+
return data
|
|
1207
|
+
|
|
1208
|
+
def run_experiment(
|
|
1209
|
+
self,
|
|
1210
|
+
experiment: ExperimentConfig,
|
|
1211
|
+
data_items: List[Dict],
|
|
1212
|
+
output_dir: str,
|
|
1213
|
+
delay_seconds: float = 1.0,
|
|
1214
|
+
) -> str:
|
|
1215
|
+
"""Run a single experiment with the given data items."""
|
|
1216
|
+
# Start timing the experiment
|
|
1217
|
+
experiment_start_time = time.time()
|
|
1218
|
+
|
|
1219
|
+
self.log.info(
|
|
1220
|
+
f"Running experiment: {experiment.name} (type: {experiment.experiment_type})"
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1223
|
+
# Create LLM client
|
|
1224
|
+
client = self.create_llm_client(experiment)
|
|
1225
|
+
|
|
1226
|
+
# Set up output directories for incremental writing
|
|
1227
|
+
output_base_path = Path(output_dir)
|
|
1228
|
+
output_base_path.mkdir(parents=True, exist_ok=True)
|
|
1229
|
+
|
|
1230
|
+
# Generate safe filename from experiment name
|
|
1231
|
+
safe_name = "".join(
|
|
1232
|
+
c if (c.isalnum() or c in (" ", "-", "_")) else "_" if c == "." else ""
|
|
1233
|
+
for c in experiment.name
|
|
1234
|
+
).rstrip()
|
|
1235
|
+
safe_name = safe_name.replace(" ", "_")
|
|
1236
|
+
|
|
1237
|
+
# Create intermediate results directory
|
|
1238
|
+
intermediate_dir = output_base_path / f"{safe_name}.intermediate"
|
|
1239
|
+
intermediate_dir.mkdir(parents=True, exist_ok=True)
|
|
1240
|
+
|
|
1241
|
+
# Process each data item
|
|
1242
|
+
results = []
|
|
1243
|
+
total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
|
1244
|
+
total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
|
|
1245
|
+
errors = []
|
|
1246
|
+
item_timings = [] # Track timing for each item
|
|
1247
|
+
|
|
1248
|
+
for i, data_item in enumerate(data_items):
|
|
1249
|
+
item_start_time = time.time()
|
|
1250
|
+
data_type = data_item["type"]
|
|
1251
|
+
self.log.info(
|
|
1252
|
+
f"Processing item {i+1}/{len(data_items)} (type: {data_type})"
|
|
1253
|
+
)
|
|
1254
|
+
|
|
1255
|
+
# Process based on experiment and data type
|
|
1256
|
+
if data_type == "qa":
|
|
1257
|
+
# Process Q&A pair with ground truth
|
|
1258
|
+
# Check if document content is available
|
|
1259
|
+
document_content = data_item.get("document_content", "")
|
|
1260
|
+
|
|
1261
|
+
if experiment.llm_type.lower() == "claude":
|
|
1262
|
+
if document_content:
|
|
1263
|
+
# Include document context with the question
|
|
1264
|
+
result = self.process_question_claude(
|
|
1265
|
+
client,
|
|
1266
|
+
data_item["query"],
|
|
1267
|
+
experiment.system_prompt,
|
|
1268
|
+
document_content,
|
|
1269
|
+
)
|
|
1270
|
+
else:
|
|
1271
|
+
result = self.process_question_claude(
|
|
1272
|
+
client, data_item["query"], experiment.system_prompt
|
|
1273
|
+
)
|
|
1274
|
+
elif experiment.llm_type.lower() == "lemonade":
|
|
1275
|
+
if document_content:
|
|
1276
|
+
result = self.process_question_lemonade(
|
|
1277
|
+
client,
|
|
1278
|
+
data_item["query"],
|
|
1279
|
+
experiment.system_prompt,
|
|
1280
|
+
experiment.max_tokens,
|
|
1281
|
+
experiment.temperature,
|
|
1282
|
+
document_content,
|
|
1283
|
+
)
|
|
1284
|
+
else:
|
|
1285
|
+
result = self.process_question_lemonade(
|
|
1286
|
+
client,
|
|
1287
|
+
data_item["query"],
|
|
1288
|
+
experiment.system_prompt,
|
|
1289
|
+
experiment.max_tokens,
|
|
1290
|
+
experiment.temperature,
|
|
1291
|
+
)
|
|
1292
|
+
|
|
1293
|
+
# Create QA result entry
|
|
1294
|
+
result_entry = {
|
|
1295
|
+
"query": data_item["query"],
|
|
1296
|
+
"ground_truth": data_item["ground_truth"],
|
|
1297
|
+
"response": result["response"],
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
# Add thinking tokens if present
|
|
1301
|
+
if "thinking" in result and result["thinking"]:
|
|
1302
|
+
result_entry["thinking"] = result["thinking"]
|
|
1303
|
+
|
|
1304
|
+
elif data_type == "qa_raw":
|
|
1305
|
+
# Process raw transcript with predefined questions
|
|
1306
|
+
qa_results = []
|
|
1307
|
+
total_result = {
|
|
1308
|
+
"response": "",
|
|
1309
|
+
"usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
|
|
1310
|
+
"cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
|
|
1311
|
+
"error": None,
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
for query in data_item["queries"]:
|
|
1315
|
+
if experiment.llm_type.lower() == "claude":
|
|
1316
|
+
# Create context-aware prompt with transcript
|
|
1317
|
+
context_prompt = f"{experiment.system_prompt}\n\nTranscript:\n{data_item['transcript']}\n\nQuestion: {query}\n\nAnswer:"
|
|
1318
|
+
# For Claude, we can use the context as system prompt
|
|
1319
|
+
query_result = {
|
|
1320
|
+
"response": "",
|
|
1321
|
+
"usage": {
|
|
1322
|
+
"input_tokens": 0,
|
|
1323
|
+
"output_tokens": 0,
|
|
1324
|
+
"total_tokens": 0,
|
|
1325
|
+
},
|
|
1326
|
+
"cost": {
|
|
1327
|
+
"input_cost": 0.0,
|
|
1328
|
+
"output_cost": 0.0,
|
|
1329
|
+
"total_cost": 0.0,
|
|
1330
|
+
},
|
|
1331
|
+
"error": None,
|
|
1332
|
+
}
|
|
1333
|
+
try:
|
|
1334
|
+
response_data = client.get_completion_with_usage(
|
|
1335
|
+
context_prompt
|
|
1336
|
+
)
|
|
1337
|
+
response = response_data["content"]
|
|
1338
|
+
if isinstance(response, list):
|
|
1339
|
+
response_text = (
|
|
1340
|
+
response[0].text
|
|
1341
|
+
if hasattr(response[0], "text")
|
|
1342
|
+
else str(response[0])
|
|
1343
|
+
)
|
|
1344
|
+
else:
|
|
1345
|
+
response_text = (
|
|
1346
|
+
response.text
|
|
1347
|
+
if hasattr(response, "text")
|
|
1348
|
+
else str(response)
|
|
1349
|
+
)
|
|
1350
|
+
|
|
1351
|
+
query_result = {
|
|
1352
|
+
"response": response_text.strip(),
|
|
1353
|
+
"usage": response_data["usage"],
|
|
1354
|
+
"cost": response_data["cost"],
|
|
1355
|
+
"error": None,
|
|
1356
|
+
}
|
|
1357
|
+
except Exception as e:
|
|
1358
|
+
self.log.error(f"Error processing QA with Claude: {e}")
|
|
1359
|
+
query_result["response"] = f"ERROR: {str(e)}"
|
|
1360
|
+
query_result["error"] = str(e)
|
|
1361
|
+
|
|
1362
|
+
elif experiment.llm_type.lower() == "lemonade":
|
|
1363
|
+
# For Lemonade, use ChatML formatting
|
|
1364
|
+
query_result = {
|
|
1365
|
+
"response": "",
|
|
1366
|
+
"usage": {
|
|
1367
|
+
"input_tokens": 0,
|
|
1368
|
+
"output_tokens": 0,
|
|
1369
|
+
"total_tokens": 0,
|
|
1370
|
+
},
|
|
1371
|
+
"cost": {
|
|
1372
|
+
"input_cost": 0.0,
|
|
1373
|
+
"output_cost": 0.0,
|
|
1374
|
+
"total_cost": 0.0,
|
|
1375
|
+
},
|
|
1376
|
+
"error": None,
|
|
1377
|
+
}
|
|
1378
|
+
try:
|
|
1379
|
+
# Format with ChatML template for QA tasks, transcript as document context
|
|
1380
|
+
formatted_prompt = format_prompt_with_template(
|
|
1381
|
+
model=client.model,
|
|
1382
|
+
system_prompt=experiment.system_prompt,
|
|
1383
|
+
user_content=query,
|
|
1384
|
+
document_content=data_item["transcript"],
|
|
1385
|
+
use_chat_template=True, # Use chat template for QA tasks
|
|
1386
|
+
)
|
|
1387
|
+
|
|
1388
|
+
response_data = client.completions(
|
|
1389
|
+
model=client.model, # Use model from experiment config
|
|
1390
|
+
prompt=formatted_prompt,
|
|
1391
|
+
max_tokens=experiment.max_tokens,
|
|
1392
|
+
temperature=experiment.temperature,
|
|
1393
|
+
stream=False,
|
|
1394
|
+
)
|
|
1395
|
+
|
|
1396
|
+
# Extract text from the response
|
|
1397
|
+
response_text = ""
|
|
1398
|
+
if "choices" in response_data and response_data["choices"]:
|
|
1399
|
+
response_text = response_data["choices"][0].get(
|
|
1400
|
+
"text", ""
|
|
1401
|
+
)
|
|
1402
|
+
|
|
1403
|
+
# Extract thinking tokens if present
|
|
1404
|
+
extracted = extract_thinking_from_response(response_text)
|
|
1405
|
+
|
|
1406
|
+
query_result["response"] = extracted["response"]
|
|
1407
|
+
# Store thinking for this query if present
|
|
1408
|
+
if extracted["thinking"]:
|
|
1409
|
+
query_result["thinking"] = extracted["thinking"]
|
|
1410
|
+
|
|
1411
|
+
except Exception as e:
|
|
1412
|
+
self.log.error(f"Error processing QA with Lemonade: {e}")
|
|
1413
|
+
query_result["response"] = f"ERROR: {str(e)}"
|
|
1414
|
+
query_result["error"] = str(e)
|
|
1415
|
+
|
|
1416
|
+
qa_result_item = {
|
|
1417
|
+
"query": query,
|
|
1418
|
+
"response": query_result["response"],
|
|
1419
|
+
}
|
|
1420
|
+
# Add thinking if present
|
|
1421
|
+
if "thinking" in query_result and query_result["thinking"]:
|
|
1422
|
+
qa_result_item["thinking"] = query_result["thinking"]
|
|
1423
|
+
qa_results.append(qa_result_item)
|
|
1424
|
+
|
|
1425
|
+
# Accumulate usage/cost
|
|
1426
|
+
if query_result["usage"]:
|
|
1427
|
+
for key in total_result["usage"]:
|
|
1428
|
+
total_result["usage"][key] += query_result["usage"].get(
|
|
1429
|
+
key, 0
|
|
1430
|
+
)
|
|
1431
|
+
if query_result["cost"]:
|
|
1432
|
+
for key in total_result["cost"]:
|
|
1433
|
+
total_result["cost"][key] += query_result["cost"].get(
|
|
1434
|
+
key, 0.0
|
|
1435
|
+
)
|
|
1436
|
+
if query_result["error"]:
|
|
1437
|
+
if total_result["error"]:
|
|
1438
|
+
total_result["error"] += f"; {query_result['error']}"
|
|
1439
|
+
else:
|
|
1440
|
+
total_result["error"] = query_result["error"]
|
|
1441
|
+
|
|
1442
|
+
result = total_result
|
|
1443
|
+
result_entry = {
|
|
1444
|
+
"transcript": (
|
|
1445
|
+
data_item["transcript"][:500] + "..."
|
|
1446
|
+
if len(data_item["transcript"]) > 500
|
|
1447
|
+
else data_item["transcript"]
|
|
1448
|
+
),
|
|
1449
|
+
"source_file": data_item.get("source_file", ""),
|
|
1450
|
+
"qa_results": qa_results,
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
elif data_type == "summarization":
|
|
1454
|
+
# Process summarization task using independent calls for each component
|
|
1455
|
+
if experiment.llm_type.lower() == "claude":
|
|
1456
|
+
combined = experiment.parameters.get("combined_prompt", False)
|
|
1457
|
+
result = self.process_summarization_claude(
|
|
1458
|
+
client,
|
|
1459
|
+
data_item["transcript"],
|
|
1460
|
+
experiment.system_prompt,
|
|
1461
|
+
combined,
|
|
1462
|
+
)
|
|
1463
|
+
elif experiment.llm_type.lower() == "lemonade":
|
|
1464
|
+
combined = experiment.parameters.get("combined_prompt", False)
|
|
1465
|
+
# Extract parameters to pass (excluding combined_prompt)
|
|
1466
|
+
extra_params = {
|
|
1467
|
+
k: v
|
|
1468
|
+
for k, v in experiment.parameters.items()
|
|
1469
|
+
if k != "combined_prompt"
|
|
1470
|
+
}
|
|
1471
|
+
result = self.process_summarization_lemonade(
|
|
1472
|
+
client,
|
|
1473
|
+
data_item["transcript"],
|
|
1474
|
+
experiment.system_prompt,
|
|
1475
|
+
experiment.max_tokens,
|
|
1476
|
+
experiment.temperature,
|
|
1477
|
+
combined,
|
|
1478
|
+
extra_params,
|
|
1479
|
+
)
|
|
1480
|
+
|
|
1481
|
+
# Use the structured response directly from independent calls
|
|
1482
|
+
generated_summaries = result["response"]
|
|
1483
|
+
|
|
1484
|
+
# Create summarization result entry
|
|
1485
|
+
result_entry = {
|
|
1486
|
+
"transcript": (
|
|
1487
|
+
data_item["transcript"][:500] + "..."
|
|
1488
|
+
if len(data_item["transcript"]) > 500
|
|
1489
|
+
else data_item["transcript"]
|
|
1490
|
+
),
|
|
1491
|
+
"generated_summaries": generated_summaries,
|
|
1492
|
+
"source_file": data_item.get("source_file", ""),
|
|
1493
|
+
}
|
|
1494
|
+
|
|
1495
|
+
# Add ground truth summaries if available (from groundtruth files)
|
|
1496
|
+
if "groundtruth_summaries" in data_item:
|
|
1497
|
+
result_entry["groundtruth_summaries"] = data_item[
|
|
1498
|
+
"groundtruth_summaries"
|
|
1499
|
+
]
|
|
1500
|
+
|
|
1501
|
+
# Add thinking tokens if present (for combined mode)
|
|
1502
|
+
if "thinking" in result and result["thinking"]:
|
|
1503
|
+
result_entry["thinking"] = result["thinking"]
|
|
1504
|
+
|
|
1505
|
+
else:
|
|
1506
|
+
self.log.error(f"Unsupported data type: {data_type}")
|
|
1507
|
+
continue
|
|
1508
|
+
|
|
1509
|
+
# Accumulate usage and cost data
|
|
1510
|
+
if result["usage"]:
|
|
1511
|
+
for key in total_usage:
|
|
1512
|
+
total_usage[key] += result["usage"].get(key, 0)
|
|
1513
|
+
if result["cost"]:
|
|
1514
|
+
for key in total_cost:
|
|
1515
|
+
total_cost[key] += result["cost"].get(key, 0.0)
|
|
1516
|
+
|
|
1517
|
+
if result["error"]:
|
|
1518
|
+
errors.append(f"Item {i+1}: {result['error']}")
|
|
1519
|
+
|
|
1520
|
+
# Add processing time to result entry
|
|
1521
|
+
item_time = time.time() - item_start_time
|
|
1522
|
+
result_entry["processing_time_seconds"] = round(item_time, 3)
|
|
1523
|
+
item_timings.append(item_time)
|
|
1524
|
+
|
|
1525
|
+
results.append(result_entry)
|
|
1526
|
+
|
|
1527
|
+
# Write intermediate result immediately for crash recovery
|
|
1528
|
+
try:
|
|
1529
|
+
intermediate_file = (
|
|
1530
|
+
intermediate_dir / f"item_{i+1:04d}_{data_type}.json"
|
|
1531
|
+
)
|
|
1532
|
+
intermediate_data = {
|
|
1533
|
+
"item_index": i,
|
|
1534
|
+
"data_type": data_type,
|
|
1535
|
+
"data_item": data_item,
|
|
1536
|
+
"result": result_entry,
|
|
1537
|
+
"usage": result.get("usage", {}),
|
|
1538
|
+
"cost": result.get("cost", {}),
|
|
1539
|
+
"error": result.get("error"),
|
|
1540
|
+
"timestamp": datetime.now().isoformat(),
|
|
1541
|
+
"processing_time_seconds": round(item_time, 3),
|
|
1542
|
+
}
|
|
1543
|
+
|
|
1544
|
+
with open(intermediate_file, "w", encoding="utf-8") as f:
|
|
1545
|
+
json.dump(intermediate_data, f, indent=2)
|
|
1546
|
+
|
|
1547
|
+
# Update progress file
|
|
1548
|
+
progress_file = intermediate_dir / "progress.json"
|
|
1549
|
+
progress_data = {
|
|
1550
|
+
"experiment_name": experiment.name,
|
|
1551
|
+
"total_items": len(data_items),
|
|
1552
|
+
"completed_items": i + 1,
|
|
1553
|
+
"progress_percent": round((i + 1) / len(data_items) * 100, 1),
|
|
1554
|
+
"total_usage": total_usage.copy(),
|
|
1555
|
+
"total_cost": total_cost.copy(),
|
|
1556
|
+
"errors_count": len(errors),
|
|
1557
|
+
"last_updated": datetime.now().isoformat(),
|
|
1558
|
+
"estimated_remaining_time": None,
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
# Calculate estimated remaining time
|
|
1562
|
+
if i > 0:
|
|
1563
|
+
avg_time_per_item = sum(item_timings) / len(item_timings)
|
|
1564
|
+
remaining_items = len(data_items) - (i + 1)
|
|
1565
|
+
estimated_remaining = remaining_items * avg_time_per_item
|
|
1566
|
+
progress_data["estimated_remaining_time"] = round(
|
|
1567
|
+
estimated_remaining, 1
|
|
1568
|
+
)
|
|
1569
|
+
|
|
1570
|
+
with open(progress_file, "w", encoding="utf-8") as f:
|
|
1571
|
+
json.dump(progress_data, f, indent=2)
|
|
1572
|
+
|
|
1573
|
+
self.log.info(
|
|
1574
|
+
f"Progress: {i+1}/{len(data_items)} items completed ({progress_data['progress_percent']}%)"
|
|
1575
|
+
)
|
|
1576
|
+
|
|
1577
|
+
except Exception as e:
|
|
1578
|
+
self.log.warning(f"Failed to write intermediate result {i+1}: {e}")
|
|
1579
|
+
|
|
1580
|
+
# Add delay between requests to avoid rate limiting
|
|
1581
|
+
if delay_seconds > 0 and i < len(data_items) - 1:
|
|
1582
|
+
time.sleep(delay_seconds)
|
|
1583
|
+
|
|
1584
|
+
# Calculate total experiment time
|
|
1585
|
+
total_experiment_time = time.time() - experiment_start_time
|
|
1586
|
+
|
|
1587
|
+
# Determine inference type (cloud vs local)
|
|
1588
|
+
inference_type = "cloud" if experiment.llm_type.lower() == "claude" else "local"
|
|
1589
|
+
|
|
1590
|
+
# Create output data in format expected by eval tool
|
|
1591
|
+
output_data = {
|
|
1592
|
+
"metadata": {
|
|
1593
|
+
"experiment_name": experiment.name,
|
|
1594
|
+
"experiment_type": experiment.experiment_type,
|
|
1595
|
+
"llm_type": experiment.llm_type,
|
|
1596
|
+
"model": experiment.model,
|
|
1597
|
+
"inference_type": inference_type, # Add inference type
|
|
1598
|
+
"system_prompt": experiment.system_prompt,
|
|
1599
|
+
"max_tokens": experiment.max_tokens,
|
|
1600
|
+
"temperature": experiment.temperature,
|
|
1601
|
+
"parameters": experiment.parameters,
|
|
1602
|
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
1603
|
+
"similarity_threshold": 0.7, # Default threshold for eval
|
|
1604
|
+
"total_items": len(data_items),
|
|
1605
|
+
"total_usage": total_usage,
|
|
1606
|
+
"total_cost": total_cost,
|
|
1607
|
+
"errors": errors,
|
|
1608
|
+
"timing": {
|
|
1609
|
+
"total_experiment_time_seconds": round(total_experiment_time, 3),
|
|
1610
|
+
"per_item_times_seconds": [round(t, 3) for t in item_timings],
|
|
1611
|
+
"average_per_item_seconds": (
|
|
1612
|
+
round(np.mean(item_timings), 3) if item_timings else 0
|
|
1613
|
+
),
|
|
1614
|
+
"max_per_item_seconds": (
|
|
1615
|
+
round(max(item_timings), 3) if item_timings else 0
|
|
1616
|
+
),
|
|
1617
|
+
"min_per_item_seconds": (
|
|
1618
|
+
round(min(item_timings), 3) if item_timings else 0
|
|
1619
|
+
),
|
|
1620
|
+
},
|
|
1621
|
+
},
|
|
1622
|
+
"analysis": {},
|
|
1623
|
+
}
|
|
1624
|
+
|
|
1625
|
+
# Set analysis data based on experiment type and data type
|
|
1626
|
+
if experiment.experiment_type == "qa":
|
|
1627
|
+
# Check if we have traditional QA results or raw transcript QA results
|
|
1628
|
+
if results and "qa_results" in results[0]:
|
|
1629
|
+
output_data["analysis"]["transcript_qa_results"] = results
|
|
1630
|
+
else:
|
|
1631
|
+
output_data["analysis"]["qa_results"] = results
|
|
1632
|
+
elif experiment.experiment_type == "summarization":
|
|
1633
|
+
output_data["analysis"]["summarization_results"] = results
|
|
1634
|
+
|
|
1635
|
+
# Determine output structure based on data items
|
|
1636
|
+
# (output_base_path and safe_name already created earlier for incremental writing)
|
|
1637
|
+
|
|
1638
|
+
# Check if we have multiple items with individual source files (hierarchical structure needed)
|
|
1639
|
+
has_individual_items = any(
|
|
1640
|
+
"source_file" in result for result in results if isinstance(result, dict)
|
|
1641
|
+
)
|
|
1642
|
+
has_item_ids = any(
|
|
1643
|
+
"item_id" in data_item
|
|
1644
|
+
for data_item in data_items
|
|
1645
|
+
if isinstance(data_item, dict)
|
|
1646
|
+
)
|
|
1647
|
+
|
|
1648
|
+
if has_individual_items or has_item_ids:
|
|
1649
|
+
# Create hierarchical structure - save individual files and consolidated
|
|
1650
|
+
individual_files = self._save_individual_experiment_files(
|
|
1651
|
+
output_data, data_items, results, output_base_path, safe_name
|
|
1652
|
+
)
|
|
1653
|
+
|
|
1654
|
+
# Create consolidated file at root
|
|
1655
|
+
consolidated_filename = f"{safe_name}.experiment.json"
|
|
1656
|
+
consolidated_path = output_base_path / consolidated_filename
|
|
1657
|
+
|
|
1658
|
+
# Add consolidation metadata
|
|
1659
|
+
output_data["metadata"]["consolidated_from"] = len(individual_files)
|
|
1660
|
+
output_data["metadata"]["individual_files"] = individual_files
|
|
1661
|
+
|
|
1662
|
+
with open(consolidated_path, "w", encoding="utf-8") as f:
|
|
1663
|
+
json.dump(output_data, f, indent=2)
|
|
1664
|
+
|
|
1665
|
+
self.log.info(
|
|
1666
|
+
f"Consolidated experiment results saved to: {consolidated_path}"
|
|
1667
|
+
)
|
|
1668
|
+
self.log.info(f"Individual experiment files: {len(individual_files)}")
|
|
1669
|
+
|
|
1670
|
+
# Clean up intermediate files after successful completion
|
|
1671
|
+
self._cleanup_intermediate_files(intermediate_dir)
|
|
1672
|
+
|
|
1673
|
+
return str(consolidated_path)
|
|
1674
|
+
else:
|
|
1675
|
+
# Single file output (traditional behavior)
|
|
1676
|
+
result_filename = f"{safe_name}.experiment.json"
|
|
1677
|
+
result_path = output_base_path / result_filename
|
|
1678
|
+
|
|
1679
|
+
with open(result_path, "w", encoding="utf-8") as f:
|
|
1680
|
+
json.dump(output_data, f, indent=2)
|
|
1681
|
+
|
|
1682
|
+
self.log.info(f"Experiment results saved to: {result_path}")
|
|
1683
|
+
|
|
1684
|
+
# Clean up intermediate files after successful completion
|
|
1685
|
+
self._cleanup_intermediate_files(intermediate_dir)
|
|
1686
|
+
|
|
1687
|
+
return str(result_path)
|
|
1688
|
+
|
|
1689
|
+
def _save_individual_experiment_files(
|
|
1690
|
+
self,
|
|
1691
|
+
base_output_data: Dict,
|
|
1692
|
+
data_items: List[Dict],
|
|
1693
|
+
results: List[Dict],
|
|
1694
|
+
output_base_path: Path,
|
|
1695
|
+
safe_experiment_name: str,
|
|
1696
|
+
) -> List[str]:
|
|
1697
|
+
"""Save individual experiment files maintaining directory hierarchy."""
|
|
1698
|
+
individual_files = []
|
|
1699
|
+
|
|
1700
|
+
for i, (data_item, result) in enumerate(zip(data_items, results)):
|
|
1701
|
+
# Determine output path based on source file or item_id
|
|
1702
|
+
if "item_id" in data_item:
|
|
1703
|
+
# From consolidated groundtruth - use item_id to determine path
|
|
1704
|
+
item_id = data_item["item_id"]
|
|
1705
|
+
# Create directory structure like emails/file_name or meetings/file_name
|
|
1706
|
+
if "/" in item_id:
|
|
1707
|
+
relative_dir = item_id.split("/")[0] if "/" in item_id else ""
|
|
1708
|
+
file_base = item_id.split("/")[-1] if "/" in item_id else item_id
|
|
1709
|
+
else:
|
|
1710
|
+
# Guess directory from item_id pattern
|
|
1711
|
+
if "email" in item_id.lower():
|
|
1712
|
+
relative_dir = "emails"
|
|
1713
|
+
elif "meeting" in item_id.lower():
|
|
1714
|
+
relative_dir = "meetings"
|
|
1715
|
+
else:
|
|
1716
|
+
relative_dir = "misc"
|
|
1717
|
+
file_base = item_id
|
|
1718
|
+
elif "source_file" in data_item:
|
|
1719
|
+
# From individual files - extract relative path from source file
|
|
1720
|
+
source_file = Path(data_item["source_file"])
|
|
1721
|
+
if "test_data" in source_file.parts:
|
|
1722
|
+
# Extract relative path from test_data structure
|
|
1723
|
+
test_data_index = source_file.parts.index("test_data")
|
|
1724
|
+
relative_parts = source_file.parts[test_data_index + 1 :]
|
|
1725
|
+
if len(relative_parts) > 1:
|
|
1726
|
+
relative_dir = "/".join(relative_parts[:-1])
|
|
1727
|
+
file_base = source_file.stem
|
|
1728
|
+
else:
|
|
1729
|
+
relative_dir = ""
|
|
1730
|
+
file_base = source_file.stem
|
|
1731
|
+
else:
|
|
1732
|
+
relative_dir = ""
|
|
1733
|
+
file_base = source_file.stem
|
|
1734
|
+
else:
|
|
1735
|
+
# Fallback - no hierarchical structure
|
|
1736
|
+
relative_dir = ""
|
|
1737
|
+
file_base = f"item_{i+1}"
|
|
1738
|
+
|
|
1739
|
+
# Create individual output data
|
|
1740
|
+
individual_output_data = {
|
|
1741
|
+
"metadata": base_output_data["metadata"].copy(),
|
|
1742
|
+
"analysis": {},
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
# Adjust metadata for individual file
|
|
1746
|
+
individual_output_data["metadata"]["total_items"] = 1
|
|
1747
|
+
individual_output_data["metadata"]["source_item"] = data_item.get(
|
|
1748
|
+
"item_id", ""
|
|
1749
|
+
)
|
|
1750
|
+
individual_output_data["metadata"]["source_file"] = data_item.get(
|
|
1751
|
+
"source_file", ""
|
|
1752
|
+
)
|
|
1753
|
+
|
|
1754
|
+
# Add single result to analysis
|
|
1755
|
+
if base_output_data["metadata"]["experiment_type"] == "qa":
|
|
1756
|
+
if "qa_results" in result:
|
|
1757
|
+
individual_output_data["analysis"]["transcript_qa_results"] = [
|
|
1758
|
+
result
|
|
1759
|
+
]
|
|
1760
|
+
else:
|
|
1761
|
+
individual_output_data["analysis"]["qa_results"] = [result]
|
|
1762
|
+
elif base_output_data["metadata"]["experiment_type"] == "summarization":
|
|
1763
|
+
individual_output_data["analysis"]["summarization_results"] = [result]
|
|
1764
|
+
|
|
1765
|
+
# Create output directory and file
|
|
1766
|
+
if relative_dir:
|
|
1767
|
+
output_dir = output_base_path / relative_dir
|
|
1768
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
1769
|
+
else:
|
|
1770
|
+
output_dir = output_base_path
|
|
1771
|
+
|
|
1772
|
+
individual_filename = f"{file_base}.{safe_experiment_name}.experiment.json"
|
|
1773
|
+
individual_path = output_dir / individual_filename
|
|
1774
|
+
|
|
1775
|
+
with open(individual_path, "w", encoding="utf-8") as f:
|
|
1776
|
+
json.dump(individual_output_data, f, indent=2)
|
|
1777
|
+
|
|
1778
|
+
# Store relative path for consolidation metadata
|
|
1779
|
+
if relative_dir:
|
|
1780
|
+
relative_path = f"{relative_dir}/{individual_filename}"
|
|
1781
|
+
else:
|
|
1782
|
+
relative_path = individual_filename
|
|
1783
|
+
|
|
1784
|
+
individual_files.append(relative_path)
|
|
1785
|
+
|
|
1786
|
+
return individual_files
|
|
1787
|
+
|
|
1788
|
+
def _cleanup_intermediate_files(self, intermediate_dir: Path) -> None:
|
|
1789
|
+
"""Clean up intermediate files after successful completion."""
|
|
1790
|
+
try:
|
|
1791
|
+
import shutil
|
|
1792
|
+
|
|
1793
|
+
if intermediate_dir.exists():
|
|
1794
|
+
shutil.rmtree(intermediate_dir)
|
|
1795
|
+
self.log.info(f"Cleaned up intermediate files from: {intermediate_dir}")
|
|
1796
|
+
except Exception as e:
|
|
1797
|
+
self.log.warning(
|
|
1798
|
+
f"Failed to clean up intermediate directory {intermediate_dir}: {e}"
|
|
1799
|
+
)
|
|
1800
|
+
|
|
1801
|
+
def run_all_experiments(
|
|
1802
|
+
self,
|
|
1803
|
+
input_path: str,
|
|
1804
|
+
output_dir: str,
|
|
1805
|
+
delay_seconds: float = 1.0,
|
|
1806
|
+
queries_source: str = None,
|
|
1807
|
+
skip_existing: bool = False,
|
|
1808
|
+
) -> Tuple[List[str], int]:
|
|
1809
|
+
"""Run all experiments defined in the config file.
|
|
1810
|
+
|
|
1811
|
+
Returns:
|
|
1812
|
+
tuple: (result_files, skipped_count) where result_files is a list of output file paths
|
|
1813
|
+
and skipped_count is the number of experiments that were skipped
|
|
1814
|
+
"""
|
|
1815
|
+
# Start timing all experiments
|
|
1816
|
+
all_experiments_start_time = time.time()
|
|
1817
|
+
|
|
1818
|
+
self.log.info(
|
|
1819
|
+
f"Starting batch experiments with {len(self.experiments)} configurations"
|
|
1820
|
+
)
|
|
1821
|
+
|
|
1822
|
+
# Run each experiment
|
|
1823
|
+
result_files = []
|
|
1824
|
+
skipped_count = 0
|
|
1825
|
+
for i, experiment in enumerate(self.experiments):
|
|
1826
|
+
# Check if we should skip this experiment
|
|
1827
|
+
if skip_existing and self.check_experiment_exists(experiment, output_dir):
|
|
1828
|
+
self.log.info(
|
|
1829
|
+
f"Skipping experiment {i+1}/{len(self.experiments)}: {experiment.name} (already exists)"
|
|
1830
|
+
)
|
|
1831
|
+
skipped_count += 1
|
|
1832
|
+
|
|
1833
|
+
# Add the existing file to result_files for consolidated report
|
|
1834
|
+
output_base_path = Path(output_dir)
|
|
1835
|
+
safe_name = "".join(
|
|
1836
|
+
(
|
|
1837
|
+
c
|
|
1838
|
+
if (c.isalnum() or c in (" ", "-", "_"))
|
|
1839
|
+
else "_" if c == "." else ""
|
|
1840
|
+
)
|
|
1841
|
+
for c in experiment.name
|
|
1842
|
+
).rstrip()
|
|
1843
|
+
safe_name = safe_name.replace(" ", "_")
|
|
1844
|
+
consolidated_filename = f"{safe_name}.experiment.json"
|
|
1845
|
+
consolidated_path = output_base_path / consolidated_filename
|
|
1846
|
+
result_files.append(str(consolidated_path))
|
|
1847
|
+
continue
|
|
1848
|
+
|
|
1849
|
+
self.log.info(
|
|
1850
|
+
f"Running experiment {i+1}/{len(self.experiments)}: {experiment.name} (type: {experiment.experiment_type})"
|
|
1851
|
+
)
|
|
1852
|
+
|
|
1853
|
+
# Load data from input source based on experiment type
|
|
1854
|
+
data_items = self.load_data_from_source(
|
|
1855
|
+
input_path, experiment.experiment_type, queries_source
|
|
1856
|
+
)
|
|
1857
|
+
self.log.info(
|
|
1858
|
+
f"Loaded {len(data_items)} data items from {input_path} for {experiment.experiment_type} experiment"
|
|
1859
|
+
)
|
|
1860
|
+
|
|
1861
|
+
result_file = self.run_experiment(
|
|
1862
|
+
experiment, data_items, output_dir, delay_seconds
|
|
1863
|
+
)
|
|
1864
|
+
result_files.append(result_file)
|
|
1865
|
+
|
|
1866
|
+
# Add delay between experiments
|
|
1867
|
+
if delay_seconds > 0 and i < len(self.experiments) - 1:
|
|
1868
|
+
self.log.info(f"Waiting {delay_seconds}s before next experiment...")
|
|
1869
|
+
time.sleep(delay_seconds)
|
|
1870
|
+
|
|
1871
|
+
# Calculate total time for all experiments
|
|
1872
|
+
total_time = time.time() - all_experiments_start_time
|
|
1873
|
+
|
|
1874
|
+
if skipped_count > 0:
|
|
1875
|
+
self.log.info(
|
|
1876
|
+
f"Completed {len(result_files) - skipped_count} new experiments, skipped {skipped_count} existing"
|
|
1877
|
+
)
|
|
1878
|
+
else:
|
|
1879
|
+
self.log.info(
|
|
1880
|
+
f"Completed {len(result_files)} out of {len(self.experiments)} experiments"
|
|
1881
|
+
)
|
|
1882
|
+
self.log.info(f"Total execution time: {round(total_time, 2)} seconds")
|
|
1883
|
+
|
|
1884
|
+
# Create consolidated experiments report at root level
|
|
1885
|
+
if len(result_files) > 1:
|
|
1886
|
+
consolidated_report_path = self._create_consolidated_experiments_report(
|
|
1887
|
+
result_files, output_dir, input_path, total_time
|
|
1888
|
+
)
|
|
1889
|
+
self.log.info(
|
|
1890
|
+
f"Consolidated experiments report saved to: {consolidated_report_path}"
|
|
1891
|
+
)
|
|
1892
|
+
|
|
1893
|
+
return result_files, skipped_count
|
|
1894
|
+
|
|
1895
|
+
def _create_consolidated_experiments_report(
|
|
1896
|
+
self,
|
|
1897
|
+
result_files: List[str],
|
|
1898
|
+
output_dir: str,
|
|
1899
|
+
input_path: str,
|
|
1900
|
+
total_time: float = None,
|
|
1901
|
+
) -> str:
|
|
1902
|
+
"""Create a consolidated report of all experiments."""
|
|
1903
|
+
output_base_path = Path(output_dir)
|
|
1904
|
+
|
|
1905
|
+
# Load all experiment results
|
|
1906
|
+
all_experiments = []
|
|
1907
|
+
total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
|
1908
|
+
total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
|
|
1909
|
+
|
|
1910
|
+
for result_file in result_files:
|
|
1911
|
+
try:
|
|
1912
|
+
with open(result_file, "r", encoding="utf-8") as f:
|
|
1913
|
+
experiment_data = json.load(f)
|
|
1914
|
+
|
|
1915
|
+
experiment_info = {
|
|
1916
|
+
"experiment_name": experiment_data["metadata"]["experiment_name"],
|
|
1917
|
+
"experiment_type": experiment_data["metadata"]["experiment_type"],
|
|
1918
|
+
"model": experiment_data["metadata"]["model"],
|
|
1919
|
+
"llm_type": experiment_data["metadata"]["llm_type"],
|
|
1920
|
+
"file_path": str(Path(result_file).relative_to(output_base_path)),
|
|
1921
|
+
"timestamp": experiment_data["metadata"]["timestamp"],
|
|
1922
|
+
"total_items": experiment_data["metadata"]["total_items"],
|
|
1923
|
+
"usage": experiment_data["metadata"]["total_usage"],
|
|
1924
|
+
"cost": experiment_data["metadata"]["total_cost"],
|
|
1925
|
+
"individual_files": experiment_data["metadata"].get(
|
|
1926
|
+
"individual_files", []
|
|
1927
|
+
),
|
|
1928
|
+
"consolidated_from": experiment_data["metadata"].get(
|
|
1929
|
+
"consolidated_from", 0
|
|
1930
|
+
),
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
# Include analysis results based on experiment type
|
|
1934
|
+
if "analysis" in experiment_data:
|
|
1935
|
+
analysis = experiment_data["analysis"]
|
|
1936
|
+
if experiment_data["metadata"]["experiment_type"] == "qa":
|
|
1937
|
+
# Include Q&A results
|
|
1938
|
+
if "qa_results" in analysis:
|
|
1939
|
+
experiment_info["qa_results"] = analysis["qa_results"]
|
|
1940
|
+
elif (
|
|
1941
|
+
experiment_data["metadata"]["experiment_type"]
|
|
1942
|
+
== "summarization"
|
|
1943
|
+
):
|
|
1944
|
+
# Include summarization results
|
|
1945
|
+
if "summaries" in analysis:
|
|
1946
|
+
experiment_info["summaries"] = analysis["summaries"]
|
|
1947
|
+
|
|
1948
|
+
all_experiments.append(experiment_info)
|
|
1949
|
+
|
|
1950
|
+
# Accumulate totals
|
|
1951
|
+
for key in total_usage:
|
|
1952
|
+
total_usage[key] += experiment_data["metadata"]["total_usage"].get(
|
|
1953
|
+
key, 0
|
|
1954
|
+
)
|
|
1955
|
+
for key in total_cost:
|
|
1956
|
+
total_cost[key] += experiment_data["metadata"]["total_cost"].get(
|
|
1957
|
+
key, 0.0
|
|
1958
|
+
)
|
|
1959
|
+
|
|
1960
|
+
except Exception as e:
|
|
1961
|
+
self.log.error(f"Error loading experiment file {result_file}: {e}")
|
|
1962
|
+
continue
|
|
1963
|
+
|
|
1964
|
+
# Create consolidated report
|
|
1965
|
+
consolidated_report = {
|
|
1966
|
+
"metadata": {
|
|
1967
|
+
"report_type": "consolidated_experiments",
|
|
1968
|
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
1969
|
+
"input_source": input_path,
|
|
1970
|
+
"output_directory": output_dir,
|
|
1971
|
+
"total_experiments": len(all_experiments),
|
|
1972
|
+
"total_usage": total_usage,
|
|
1973
|
+
"total_cost": total_cost,
|
|
1974
|
+
},
|
|
1975
|
+
"experiments": all_experiments,
|
|
1976
|
+
}
|
|
1977
|
+
|
|
1978
|
+
# Add total execution time if provided
|
|
1979
|
+
if total_time is not None:
|
|
1980
|
+
consolidated_report["metadata"]["total_execution_time_seconds"] = round(
|
|
1981
|
+
total_time, 3
|
|
1982
|
+
)
|
|
1983
|
+
|
|
1984
|
+
# Save consolidated report
|
|
1985
|
+
consolidated_filename = "consolidated_experiments_report.json"
|
|
1986
|
+
consolidated_path = output_base_path / consolidated_filename
|
|
1987
|
+
|
|
1988
|
+
with open(consolidated_path, "w", encoding="utf-8") as f:
|
|
1989
|
+
json.dump(consolidated_report, f, indent=2)
|
|
1990
|
+
|
|
1991
|
+
return str(consolidated_path)
|
|
1992
|
+
|
|
1993
|
+
def create_sample_config(self, output_path: str):
|
|
1994
|
+
"""Create a sample configuration file."""
|
|
1995
|
+
sample_config = {
|
|
1996
|
+
"description": "Batch experiment configuration for transcript evaluation (both Q&A and summarization)",
|
|
1997
|
+
"experiments": [
|
|
1998
|
+
{
|
|
1999
|
+
"name": "Claude-Sonnet-QA-Standard",
|
|
2000
|
+
"llm_type": "claude",
|
|
2001
|
+
"model": DEFAULT_CLAUDE_MODEL,
|
|
2002
|
+
"experiment_type": "qa",
|
|
2003
|
+
"system_prompt": "You are a helpful assistant that answers questions about meeting transcripts. Provide accurate, concise answers based on the transcript content.",
|
|
2004
|
+
"max_tokens": 512,
|
|
2005
|
+
"temperature": 0.1,
|
|
2006
|
+
"parameters": {},
|
|
2007
|
+
"_comment": "Cloud inference - will incur API costs",
|
|
2008
|
+
},
|
|
2009
|
+
{
|
|
2010
|
+
"name": "Claude-Sonnet-Summarization-Standard",
|
|
2011
|
+
"llm_type": "claude",
|
|
2012
|
+
"model": DEFAULT_CLAUDE_MODEL,
|
|
2013
|
+
"experiment_type": "summarization",
|
|
2014
|
+
"system_prompt": "You are an expert meeting analyst. Analyze the transcript carefully and provide clear, accurate information based on the content.",
|
|
2015
|
+
"max_tokens": 512,
|
|
2016
|
+
"temperature": 0.1,
|
|
2017
|
+
"parameters": {},
|
|
2018
|
+
"_comment": "Cloud inference - will incur API costs",
|
|
2019
|
+
},
|
|
2020
|
+
{
|
|
2021
|
+
"name": "Claude-Sonnet-QA-Detailed",
|
|
2022
|
+
"llm_type": "claude",
|
|
2023
|
+
"model": DEFAULT_CLAUDE_MODEL,
|
|
2024
|
+
"experiment_type": "qa",
|
|
2025
|
+
"system_prompt": "You are an expert meeting analyst. Provide comprehensive, detailed answers about meeting transcripts including context, participants, and implications. Be thorough and precise.",
|
|
2026
|
+
"max_tokens": 1024,
|
|
2027
|
+
"temperature": 0.2,
|
|
2028
|
+
"parameters": {},
|
|
2029
|
+
},
|
|
2030
|
+
{
|
|
2031
|
+
"name": "Lemonade-Llama-QA-Standard",
|
|
2032
|
+
"llm_type": "lemonade",
|
|
2033
|
+
"model": "llama3.2:3b",
|
|
2034
|
+
"experiment_type": "qa",
|
|
2035
|
+
"system_prompt": "Answer questions about meeting transcripts clearly and accurately. Focus on the key information requested.",
|
|
2036
|
+
"max_tokens": 512,
|
|
2037
|
+
"temperature": 0.1,
|
|
2038
|
+
"parameters": {"host": "localhost", "port": 8000},
|
|
2039
|
+
"_comment": "Local inference - FREE, runs on your hardware",
|
|
2040
|
+
},
|
|
2041
|
+
{
|
|
2042
|
+
"name": "Lemonade-Llama-Summarization-Creative",
|
|
2043
|
+
"llm_type": "lemonade",
|
|
2044
|
+
"model": "llama3.2:3b",
|
|
2045
|
+
"experiment_type": "summarization",
|
|
2046
|
+
"system_prompt": "You are a creative meeting analyst. Analyze the transcript thoughtfully and provide insightful information that captures key insights and implications.",
|
|
2047
|
+
"max_tokens": 512,
|
|
2048
|
+
"temperature": 0.7,
|
|
2049
|
+
"parameters": {"host": "localhost", "port": 8000},
|
|
2050
|
+
"_comment": "Local inference - FREE, runs on your hardware",
|
|
2051
|
+
},
|
|
2052
|
+
],
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
2056
|
+
json.dump(sample_config, f, indent=2)
|
|
2057
|
+
|
|
2058
|
+
self.log.info(f"Sample configuration saved to: {output_path}")
|
|
2059
|
+
|
|
2060
|
+
def create_config_from_groundtruth(
|
|
2061
|
+
self, groundtruth_file: str, output_file: str
|
|
2062
|
+
) -> None:
|
|
2063
|
+
"""Create experiment configuration from groundtruth file metadata."""
|
|
2064
|
+
try:
|
|
2065
|
+
# Load groundtruth file
|
|
2066
|
+
with open(groundtruth_file, "r", encoding="utf-8") as f:
|
|
2067
|
+
groundtruth_data = json.load(f)
|
|
2068
|
+
|
|
2069
|
+
metadata = groundtruth_data.get("metadata", {})
|
|
2070
|
+
analysis = groundtruth_data.get("analysis", {})
|
|
2071
|
+
|
|
2072
|
+
# Extract key information
|
|
2073
|
+
use_case = metadata.get("use_case", "qa")
|
|
2074
|
+
original_model = metadata.get("claude_model", DEFAULT_CLAUDE_MODEL)
|
|
2075
|
+
original_prompt = metadata.get("system_prompt", "")
|
|
2076
|
+
max_tokens = metadata.get("max_tokens", 512 if use_case == "qa" else 1024)
|
|
2077
|
+
temperature = metadata.get("temperature", 0.1)
|
|
2078
|
+
|
|
2079
|
+
# Determine appropriate system prompt if not in metadata
|
|
2080
|
+
if not original_prompt:
|
|
2081
|
+
if use_case == "qa":
|
|
2082
|
+
original_prompt = "You are an expert meeting analyst. Answer questions about the transcript accurately and concisely based only on the provided information."
|
|
2083
|
+
elif use_case == "summarization":
|
|
2084
|
+
original_prompt = "You are an expert meeting analyst. Create a concise summary of the transcript including key topics, decisions, and action items."
|
|
2085
|
+
else:
|
|
2086
|
+
original_prompt = "You are an expert analyst. Process the provided content according to the task requirements."
|
|
2087
|
+
|
|
2088
|
+
# Create base experiment configuration
|
|
2089
|
+
experiments = []
|
|
2090
|
+
|
|
2091
|
+
# Original configuration
|
|
2092
|
+
base_name = original_model.replace("claude-", "").replace("-", "-").title()
|
|
2093
|
+
experiments.append(
|
|
2094
|
+
{
|
|
2095
|
+
"name": f"{base_name}-Original",
|
|
2096
|
+
"llm_type": "claude",
|
|
2097
|
+
"model": original_model,
|
|
2098
|
+
"experiment_type": use_case,
|
|
2099
|
+
"system_prompt": original_prompt,
|
|
2100
|
+
"max_tokens": max_tokens,
|
|
2101
|
+
"temperature": temperature,
|
|
2102
|
+
"parameters": {},
|
|
2103
|
+
}
|
|
2104
|
+
)
|
|
2105
|
+
|
|
2106
|
+
# Add model variations with same prompt
|
|
2107
|
+
model_variants = [
|
|
2108
|
+
("claude-3-haiku-20240307", "Haiku"),
|
|
2109
|
+
("claude-3-opus-20240229", "Opus"),
|
|
2110
|
+
(DEFAULT_CLAUDE_MODEL, "Sonnet-4.5"),
|
|
2111
|
+
]
|
|
2112
|
+
|
|
2113
|
+
for model, name in model_variants:
|
|
2114
|
+
if model != original_model: # Don't duplicate original
|
|
2115
|
+
experiments.append(
|
|
2116
|
+
{
|
|
2117
|
+
"name": f"Claude-{name}-Same-Prompt",
|
|
2118
|
+
"llm_type": "claude",
|
|
2119
|
+
"model": model,
|
|
2120
|
+
"experiment_type": use_case,
|
|
2121
|
+
"system_prompt": original_prompt,
|
|
2122
|
+
"max_tokens": max_tokens,
|
|
2123
|
+
"temperature": temperature,
|
|
2124
|
+
"parameters": {},
|
|
2125
|
+
}
|
|
2126
|
+
)
|
|
2127
|
+
|
|
2128
|
+
# Add temperature variations for original model
|
|
2129
|
+
if temperature != 0.0:
|
|
2130
|
+
experiments.append(
|
|
2131
|
+
{
|
|
2132
|
+
"name": f"{base_name}-Creative",
|
|
2133
|
+
"llm_type": "claude",
|
|
2134
|
+
"model": original_model,
|
|
2135
|
+
"experiment_type": use_case,
|
|
2136
|
+
"system_prompt": original_prompt,
|
|
2137
|
+
"max_tokens": max_tokens,
|
|
2138
|
+
"temperature": min(
|
|
2139
|
+
CREATIVE_TEMPERATURE_MAX,
|
|
2140
|
+
temperature + CREATIVE_TEMPERATURE_INCREMENT,
|
|
2141
|
+
),
|
|
2142
|
+
"parameters": {},
|
|
2143
|
+
}
|
|
2144
|
+
)
|
|
2145
|
+
|
|
2146
|
+
if temperature != 0.0:
|
|
2147
|
+
experiments.append(
|
|
2148
|
+
{
|
|
2149
|
+
"name": f"{base_name}-Deterministic",
|
|
2150
|
+
"llm_type": "claude",
|
|
2151
|
+
"model": original_model,
|
|
2152
|
+
"experiment_type": use_case,
|
|
2153
|
+
"system_prompt": original_prompt,
|
|
2154
|
+
"max_tokens": max_tokens,
|
|
2155
|
+
"temperature": 0.0,
|
|
2156
|
+
"parameters": {},
|
|
2157
|
+
}
|
|
2158
|
+
)
|
|
2159
|
+
|
|
2160
|
+
# Create configuration structure
|
|
2161
|
+
groundtruth_name = Path(groundtruth_file).stem
|
|
2162
|
+
config = {
|
|
2163
|
+
"description": f"Configuration generated from groundtruth metadata: {groundtruth_name}",
|
|
2164
|
+
"source_groundtruth": groundtruth_file,
|
|
2165
|
+
"generated_at": datetime.now().isoformat(),
|
|
2166
|
+
"original_metadata": metadata,
|
|
2167
|
+
"experiments": experiments,
|
|
2168
|
+
}
|
|
2169
|
+
|
|
2170
|
+
# Save configuration
|
|
2171
|
+
output_path = Path(output_file)
|
|
2172
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
2173
|
+
|
|
2174
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
2175
|
+
json.dump(config, f, indent=2, ensure_ascii=False)
|
|
2176
|
+
|
|
2177
|
+
self.log.info(
|
|
2178
|
+
f"Generated experiment configuration with {len(experiments)} experiments"
|
|
2179
|
+
)
|
|
2180
|
+
self.log.info(f"Configuration saved to: {output_path}")
|
|
2181
|
+
|
|
2182
|
+
return str(output_path)
|
|
2183
|
+
|
|
2184
|
+
except Exception as e:
|
|
2185
|
+
self.log.error(f"Error creating config from groundtruth: {e}")
|
|
2186
|
+
raise
|
|
2187
|
+
|
|
2188
|
+
|
|
2189
|
+
def main():
|
|
2190
|
+
"""Command line interface for batch experiments."""
|
|
2191
|
+
import argparse
|
|
2192
|
+
|
|
2193
|
+
parser = argparse.ArgumentParser(
|
|
2194
|
+
description="Run batch experiments with different LLM configurations",
|
|
2195
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
2196
|
+
epilog="""
|
|
2197
|
+
Examples:
|
|
2198
|
+
# Create sample configuration file
|
|
2199
|
+
python -m gaia.eval.batch_experiment --create-sample-config experiment_config.json
|
|
2200
|
+
|
|
2201
|
+
# Run batch experiments on transcript directory
|
|
2202
|
+
python -m gaia.eval.batch_experiment -c experiment_config.json -i ./transcripts -o ./experiments
|
|
2203
|
+
|
|
2204
|
+
# Run batch experiments on transcript directory with custom queries from groundtruth
|
|
2205
|
+
python -m gaia.eval.batch_experiment -c experiment_config.json -i ./transcripts -q ./groundtruth/meeting.qa.groundtruth.json -o ./experiments
|
|
2206
|
+
|
|
2207
|
+
# Run batch experiments on groundtruth file
|
|
2208
|
+
python -m gaia.eval.batch_experiment -c experiment_config.json -i ./groundtruth/transcript.qa.groundtruth.json -o ./experiments
|
|
2209
|
+
|
|
2210
|
+
# Run batch experiments on consolidated groundtruth file
|
|
2211
|
+
python -m gaia.eval.batch_experiment -c experiment_config.json -i ./groundtruth/consolidated_summarization_groundtruth.json -o ./experiments
|
|
2212
|
+
|
|
2213
|
+
# Run with custom delay between requests
|
|
2214
|
+
python -m gaia.eval.batch_experiment -c experiment_config.json -i ./transcripts -o ./experiments --delay 2.0
|
|
2215
|
+
""",
|
|
2216
|
+
)
|
|
2217
|
+
|
|
2218
|
+
parser.add_argument(
|
|
2219
|
+
"-c", "--config", type=str, help="Path to experiment configuration JSON file"
|
|
2220
|
+
)
|
|
2221
|
+
parser.add_argument(
|
|
2222
|
+
"-i",
|
|
2223
|
+
"--input",
|
|
2224
|
+
type=str,
|
|
2225
|
+
help="Path to input data: transcript file, directory of transcripts, or groundtruth JSON file",
|
|
2226
|
+
)
|
|
2227
|
+
parser.add_argument(
|
|
2228
|
+
"-q",
|
|
2229
|
+
"--queries-source",
|
|
2230
|
+
type=str,
|
|
2231
|
+
help="Path to groundtruth JSON file to extract queries from (for QA experiments on raw transcripts)",
|
|
2232
|
+
)
|
|
2233
|
+
parser.add_argument(
|
|
2234
|
+
"-o",
|
|
2235
|
+
"--output-dir",
|
|
2236
|
+
type=str,
|
|
2237
|
+
default="./experiments",
|
|
2238
|
+
help="Output directory for experiment results (default: ./experiments)",
|
|
2239
|
+
)
|
|
2240
|
+
parser.add_argument(
|
|
2241
|
+
"--delay",
|
|
2242
|
+
type=float,
|
|
2243
|
+
default=1.0,
|
|
2244
|
+
help="Delay in seconds between requests to avoid rate limiting (default: 1.0)",
|
|
2245
|
+
)
|
|
2246
|
+
parser.add_argument(
|
|
2247
|
+
"--create-sample-config",
|
|
2248
|
+
type=str,
|
|
2249
|
+
help="Create a sample configuration file at the specified path",
|
|
2250
|
+
)
|
|
2251
|
+
parser.add_argument(
|
|
2252
|
+
"--create-config-from-groundtruth",
|
|
2253
|
+
type=str,
|
|
2254
|
+
help="Create configuration from groundtruth file metadata (provide groundtruth file path)",
|
|
2255
|
+
)
|
|
2256
|
+
parser.add_argument(
|
|
2257
|
+
"--force",
|
|
2258
|
+
action="store_true",
|
|
2259
|
+
help="Force regeneration of all experiments, even if they already exist (default: skip existing)",
|
|
2260
|
+
)
|
|
2261
|
+
|
|
2262
|
+
args = parser.parse_args()
|
|
2263
|
+
|
|
2264
|
+
# Create sample config if requested
|
|
2265
|
+
if args.create_sample_config:
|
|
2266
|
+
runner = BatchExperimentRunner.__new__(BatchExperimentRunner)
|
|
2267
|
+
runner.log = get_logger(__name__)
|
|
2268
|
+
runner.create_sample_config(args.create_sample_config)
|
|
2269
|
+
print(f"✅ Sample configuration created: {args.create_sample_config}")
|
|
2270
|
+
print("Edit this file to define your experiments, then run:")
|
|
2271
|
+
print(
|
|
2272
|
+
f" python -m gaia.eval.batch_experiment -c {args.create_sample_config} -i <input_path> -o <output_dir>"
|
|
2273
|
+
)
|
|
2274
|
+
return
|
|
2275
|
+
|
|
2276
|
+
# Create config from groundtruth if requested
|
|
2277
|
+
if args.create_config_from_groundtruth:
|
|
2278
|
+
# Determine output filename if not provided in the argument
|
|
2279
|
+
groundtruth_path = Path(args.create_config_from_groundtruth)
|
|
2280
|
+
default_output = f"{groundtruth_path.stem}.config.json"
|
|
2281
|
+
|
|
2282
|
+
runner = BatchExperimentRunner.__new__(BatchExperimentRunner)
|
|
2283
|
+
runner.log = get_logger(__name__)
|
|
2284
|
+
config_path = runner.create_config_from_groundtruth(
|
|
2285
|
+
args.create_config_from_groundtruth, default_output
|
|
2286
|
+
)
|
|
2287
|
+
print(f"✅ Configuration created from groundtruth metadata: {config_path}")
|
|
2288
|
+
print("Review and edit the configuration, then run:")
|
|
2289
|
+
print(
|
|
2290
|
+
f" python -m gaia.eval.batch_experiment -c {config_path} -i <input_path> -o <output_dir>"
|
|
2291
|
+
)
|
|
2292
|
+
return
|
|
2293
|
+
|
|
2294
|
+
# Validate required arguments
|
|
2295
|
+
if not args.config or not args.input:
|
|
2296
|
+
parser.error(
|
|
2297
|
+
"Both --config and --input are required (unless using --create-sample-config or --create-config-from-groundtruth)"
|
|
2298
|
+
)
|
|
2299
|
+
|
|
2300
|
+
# Run batch experiments
|
|
2301
|
+
runner = BatchExperimentRunner(args.config)
|
|
2302
|
+
# By default skip existing experiments, unless --force is specified
|
|
2303
|
+
skip_existing = not args.force
|
|
2304
|
+
result_files, skipped_count = runner.run_all_experiments(
|
|
2305
|
+
input_path=args.input,
|
|
2306
|
+
output_dir=args.output_dir,
|
|
2307
|
+
delay_seconds=args.delay,
|
|
2308
|
+
queries_source=args.queries_source,
|
|
2309
|
+
skip_existing=skip_existing,
|
|
2310
|
+
)
|
|
2311
|
+
|
|
2312
|
+
# Report results with skip information
|
|
2313
|
+
if skipped_count > 0:
|
|
2314
|
+
new_count = len(result_files) - skipped_count
|
|
2315
|
+
print(
|
|
2316
|
+
f"✅ Completed {len(result_files)} experiments ({new_count} new, {skipped_count} skipped)"
|
|
2317
|
+
)
|
|
2318
|
+
else:
|
|
2319
|
+
print(f"✅ Completed {len(result_files)} experiments")
|
|
2320
|
+
|
|
2321
|
+
print(f" Results saved to: {args.output_dir}")
|
|
2322
|
+
print(f" Generated files:")
|
|
2323
|
+
for result_file in result_files:
|
|
2324
|
+
print(f" - {Path(result_file).name}")
|
|
2325
|
+
|
|
2326
|
+
print(f"\nNext steps:")
|
|
2327
|
+
print(f" 1. Evaluate results using: gaia eval -f <result_file>")
|
|
2328
|
+
print(f" 2. Generate comparative report: gaia report -d {args.output_dir}")
|
|
2329
|
+
|
|
2330
|
+
|
|
2331
|
+
if __name__ == "__main__":
|
|
2332
|
+
exit(main())
|