amd-gaia 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amd_gaia-0.14.1.dist-info/METADATA +768 -0
- amd_gaia-0.14.1.dist-info/RECORD +800 -0
- amd_gaia-0.14.1.dist-info/WHEEL +5 -0
- amd_gaia-0.14.1.dist-info/entry_points.txt +5 -0
- amd_gaia-0.14.1.dist-info/licenses/LICENSE.md +21 -0
- amd_gaia-0.14.1.dist-info/top_level.txt +1 -0
- gaia/__init__.py +2 -0
- gaia/agents/__init__.py +19 -0
- gaia/agents/base/__init__.py +9 -0
- gaia/agents/base/agent.py +2072 -0
- gaia/agents/base/api_agent.py +120 -0
- gaia/agents/base/console.py +1457 -0
- gaia/agents/base/mcp_agent.py +86 -0
- gaia/agents/base/tools.py +83 -0
- gaia/agents/blender/agent.py +556 -0
- gaia/agents/blender/agent_simple.py +135 -0
- gaia/agents/blender/app.py +211 -0
- gaia/agents/blender/app_simple.py +41 -0
- gaia/agents/blender/core/__init__.py +16 -0
- gaia/agents/blender/core/materials.py +506 -0
- gaia/agents/blender/core/objects.py +316 -0
- gaia/agents/blender/core/rendering.py +225 -0
- gaia/agents/blender/core/scene.py +220 -0
- gaia/agents/blender/core/view.py +146 -0
- gaia/agents/chat/__init__.py +9 -0
- gaia/agents/chat/agent.py +975 -0
- gaia/agents/chat/app.py +1058 -0
- gaia/agents/chat/session.py +508 -0
- gaia/agents/chat/tools/__init__.py +15 -0
- gaia/agents/chat/tools/file_tools.py +96 -0
- gaia/agents/chat/tools/rag_tools.py +1729 -0
- gaia/agents/chat/tools/shell_tools.py +436 -0
- gaia/agents/code/__init__.py +7 -0
- gaia/agents/code/agent.py +547 -0
- gaia/agents/code/app.py +266 -0
- gaia/agents/code/models.py +135 -0
- gaia/agents/code/orchestration/__init__.py +24 -0
- gaia/agents/code/orchestration/checklist_executor.py +1739 -0
- gaia/agents/code/orchestration/checklist_generator.py +709 -0
- gaia/agents/code/orchestration/factories/__init__.py +9 -0
- gaia/agents/code/orchestration/factories/base.py +63 -0
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -0
- gaia/agents/code/orchestration/factories/python_factory.py +106 -0
- gaia/agents/code/orchestration/orchestrator.py +610 -0
- gaia/agents/code/orchestration/project_analyzer.py +391 -0
- gaia/agents/code/orchestration/steps/__init__.py +67 -0
- gaia/agents/code/orchestration/steps/base.py +188 -0
- gaia/agents/code/orchestration/steps/error_handler.py +314 -0
- gaia/agents/code/orchestration/steps/nextjs.py +828 -0
- gaia/agents/code/orchestration/steps/python.py +307 -0
- gaia/agents/code/orchestration/template_catalog.py +463 -0
- gaia/agents/code/orchestration/workflows/__init__.py +14 -0
- gaia/agents/code/orchestration/workflows/base.py +80 -0
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -0
- gaia/agents/code/orchestration/workflows/python.py +94 -0
- gaia/agents/code/prompts/__init__.py +11 -0
- gaia/agents/code/prompts/base_prompt.py +77 -0
- gaia/agents/code/prompts/code_patterns.py +1925 -0
- gaia/agents/code/prompts/nextjs_prompt.py +40 -0
- gaia/agents/code/prompts/python_prompt.py +109 -0
- gaia/agents/code/schema_inference.py +365 -0
- gaia/agents/code/system_prompt.py +41 -0
- gaia/agents/code/tools/__init__.py +42 -0
- gaia/agents/code/tools/cli_tools.py +1138 -0
- gaia/agents/code/tools/code_formatting.py +319 -0
- gaia/agents/code/tools/code_tools.py +769 -0
- gaia/agents/code/tools/error_fixing.py +1347 -0
- gaia/agents/code/tools/external_tools.py +180 -0
- gaia/agents/code/tools/file_io.py +845 -0
- gaia/agents/code/tools/prisma_tools.py +190 -0
- gaia/agents/code/tools/project_management.py +1016 -0
- gaia/agents/code/tools/testing.py +321 -0
- gaia/agents/code/tools/typescript_tools.py +122 -0
- gaia/agents/code/tools/validation_parsing.py +461 -0
- gaia/agents/code/tools/validation_tools.py +803 -0
- gaia/agents/code/tools/web_dev_tools.py +1744 -0
- gaia/agents/code/validators/__init__.py +16 -0
- gaia/agents/code/validators/antipattern_checker.py +241 -0
- gaia/agents/code/validators/ast_analyzer.py +197 -0
- gaia/agents/code/validators/requirements_validator.py +145 -0
- gaia/agents/code/validators/syntax_validator.py +171 -0
- gaia/agents/docker/__init__.py +7 -0
- gaia/agents/docker/agent.py +642 -0
- gaia/agents/jira/__init__.py +11 -0
- gaia/agents/jira/agent.py +894 -0
- gaia/agents/jira/jql_templates.py +299 -0
- gaia/agents/routing/__init__.py +7 -0
- gaia/agents/routing/agent.py +512 -0
- gaia/agents/routing/system_prompt.py +75 -0
- gaia/api/__init__.py +23 -0
- gaia/api/agent_registry.py +238 -0
- gaia/api/app.py +305 -0
- gaia/api/openai_server.py +575 -0
- gaia/api/schemas.py +186 -0
- gaia/api/sse_handler.py +370 -0
- gaia/apps/__init__.py +4 -0
- gaia/apps/llm/__init__.py +6 -0
- gaia/apps/llm/app.py +169 -0
- gaia/apps/summarize/app.py +633 -0
- gaia/apps/summarize/html_viewer.py +133 -0
- gaia/apps/summarize/pdf_formatter.py +284 -0
- gaia/audio/__init__.py +2 -0
- gaia/audio/audio_client.py +439 -0
- gaia/audio/audio_recorder.py +269 -0
- gaia/audio/kokoro_tts.py +599 -0
- gaia/audio/whisper_asr.py +432 -0
- gaia/chat/__init__.py +16 -0
- gaia/chat/app.py +430 -0
- gaia/chat/prompts.py +522 -0
- gaia/chat/sdk.py +1200 -0
- gaia/cli.py +5621 -0
- gaia/eval/batch_experiment.py +2332 -0
- gaia/eval/claude.py +542 -0
- gaia/eval/config.py +37 -0
- gaia/eval/email_generator.py +512 -0
- gaia/eval/eval.py +3179 -0
- gaia/eval/groundtruth.py +1130 -0
- gaia/eval/transcript_generator.py +582 -0
- gaia/eval/webapp/README.md +168 -0
- gaia/eval/webapp/node_modules/.bin/mime +16 -0
- gaia/eval/webapp/node_modules/.bin/mime.cmd +17 -0
- gaia/eval/webapp/node_modules/.bin/mime.ps1 +28 -0
- gaia/eval/webapp/node_modules/.package-lock.json +865 -0
- gaia/eval/webapp/node_modules/accepts/HISTORY.md +243 -0
- gaia/eval/webapp/node_modules/accepts/LICENSE +23 -0
- gaia/eval/webapp/node_modules/accepts/README.md +140 -0
- gaia/eval/webapp/node_modules/accepts/index.js +238 -0
- gaia/eval/webapp/node_modules/accepts/package.json +47 -0
- gaia/eval/webapp/node_modules/array-flatten/LICENSE +21 -0
- gaia/eval/webapp/node_modules/array-flatten/README.md +43 -0
- gaia/eval/webapp/node_modules/array-flatten/array-flatten.js +64 -0
- gaia/eval/webapp/node_modules/array-flatten/package.json +39 -0
- gaia/eval/webapp/node_modules/body-parser/HISTORY.md +672 -0
- gaia/eval/webapp/node_modules/body-parser/LICENSE +23 -0
- gaia/eval/webapp/node_modules/body-parser/README.md +476 -0
- gaia/eval/webapp/node_modules/body-parser/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/body-parser/index.js +156 -0
- gaia/eval/webapp/node_modules/body-parser/lib/read.js +205 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/json.js +247 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/raw.js +101 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/text.js +121 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/urlencoded.js +307 -0
- gaia/eval/webapp/node_modules/body-parser/package.json +56 -0
- gaia/eval/webapp/node_modules/bytes/History.md +97 -0
- gaia/eval/webapp/node_modules/bytes/LICENSE +23 -0
- gaia/eval/webapp/node_modules/bytes/Readme.md +152 -0
- gaia/eval/webapp/node_modules/bytes/index.js +170 -0
- gaia/eval/webapp/node_modules/bytes/package.json +42 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.eslintrc +17 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.nycrc +9 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/CHANGELOG.md +30 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/LICENSE +21 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/README.md +62 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.js +10 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.d.ts +19 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.js +10 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.d.ts +64 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.js +15 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/package.json +85 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.d.ts +3 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/test/index.js +63 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/call-bound/.eslintrc +13 -0
- gaia/eval/webapp/node_modules/call-bound/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/call-bound/.nycrc +9 -0
- gaia/eval/webapp/node_modules/call-bound/CHANGELOG.md +42 -0
- gaia/eval/webapp/node_modules/call-bound/LICENSE +21 -0
- gaia/eval/webapp/node_modules/call-bound/README.md +53 -0
- gaia/eval/webapp/node_modules/call-bound/index.d.ts +94 -0
- gaia/eval/webapp/node_modules/call-bound/index.js +19 -0
- gaia/eval/webapp/node_modules/call-bound/package.json +99 -0
- gaia/eval/webapp/node_modules/call-bound/test/index.js +61 -0
- gaia/eval/webapp/node_modules/call-bound/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/content-disposition/HISTORY.md +60 -0
- gaia/eval/webapp/node_modules/content-disposition/LICENSE +22 -0
- gaia/eval/webapp/node_modules/content-disposition/README.md +142 -0
- gaia/eval/webapp/node_modules/content-disposition/index.js +458 -0
- gaia/eval/webapp/node_modules/content-disposition/package.json +44 -0
- gaia/eval/webapp/node_modules/content-type/HISTORY.md +29 -0
- gaia/eval/webapp/node_modules/content-type/LICENSE +22 -0
- gaia/eval/webapp/node_modules/content-type/README.md +94 -0
- gaia/eval/webapp/node_modules/content-type/index.js +225 -0
- gaia/eval/webapp/node_modules/content-type/package.json +42 -0
- gaia/eval/webapp/node_modules/cookie/LICENSE +24 -0
- gaia/eval/webapp/node_modules/cookie/README.md +317 -0
- gaia/eval/webapp/node_modules/cookie/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/cookie/index.js +334 -0
- gaia/eval/webapp/node_modules/cookie/package.json +44 -0
- gaia/eval/webapp/node_modules/cookie-signature/.npmignore +4 -0
- gaia/eval/webapp/node_modules/cookie-signature/History.md +38 -0
- gaia/eval/webapp/node_modules/cookie-signature/Readme.md +42 -0
- gaia/eval/webapp/node_modules/cookie-signature/index.js +51 -0
- gaia/eval/webapp/node_modules/cookie-signature/package.json +18 -0
- gaia/eval/webapp/node_modules/debug/.coveralls.yml +1 -0
- gaia/eval/webapp/node_modules/debug/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/debug/.npmignore +9 -0
- gaia/eval/webapp/node_modules/debug/.travis.yml +14 -0
- gaia/eval/webapp/node_modules/debug/CHANGELOG.md +362 -0
- gaia/eval/webapp/node_modules/debug/LICENSE +19 -0
- gaia/eval/webapp/node_modules/debug/Makefile +50 -0
- gaia/eval/webapp/node_modules/debug/README.md +312 -0
- gaia/eval/webapp/node_modules/debug/component.json +19 -0
- gaia/eval/webapp/node_modules/debug/karma.conf.js +70 -0
- gaia/eval/webapp/node_modules/debug/node.js +1 -0
- gaia/eval/webapp/node_modules/debug/package.json +49 -0
- gaia/eval/webapp/node_modules/debug/src/browser.js +185 -0
- gaia/eval/webapp/node_modules/debug/src/debug.js +202 -0
- gaia/eval/webapp/node_modules/debug/src/index.js +10 -0
- gaia/eval/webapp/node_modules/debug/src/inspector-log.js +15 -0
- gaia/eval/webapp/node_modules/debug/src/node.js +248 -0
- gaia/eval/webapp/node_modules/depd/History.md +103 -0
- gaia/eval/webapp/node_modules/depd/LICENSE +22 -0
- gaia/eval/webapp/node_modules/depd/Readme.md +280 -0
- gaia/eval/webapp/node_modules/depd/index.js +538 -0
- gaia/eval/webapp/node_modules/depd/lib/browser/index.js +77 -0
- gaia/eval/webapp/node_modules/depd/package.json +45 -0
- gaia/eval/webapp/node_modules/destroy/LICENSE +23 -0
- gaia/eval/webapp/node_modules/destroy/README.md +63 -0
- gaia/eval/webapp/node_modules/destroy/index.js +209 -0
- gaia/eval/webapp/node_modules/destroy/package.json +48 -0
- gaia/eval/webapp/node_modules/dunder-proto/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/dunder-proto/.nycrc +13 -0
- gaia/eval/webapp/node_modules/dunder-proto/CHANGELOG.md +24 -0
- gaia/eval/webapp/node_modules/dunder-proto/LICENSE +21 -0
- gaia/eval/webapp/node_modules/dunder-proto/README.md +54 -0
- gaia/eval/webapp/node_modules/dunder-proto/get.d.ts +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/get.js +30 -0
- gaia/eval/webapp/node_modules/dunder-proto/package.json +76 -0
- gaia/eval/webapp/node_modules/dunder-proto/set.d.ts +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/set.js +35 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/get.js +34 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/index.js +4 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/set.js +50 -0
- gaia/eval/webapp/node_modules/dunder-proto/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/ee-first/LICENSE +22 -0
- gaia/eval/webapp/node_modules/ee-first/README.md +80 -0
- gaia/eval/webapp/node_modules/ee-first/index.js +95 -0
- gaia/eval/webapp/node_modules/ee-first/package.json +29 -0
- gaia/eval/webapp/node_modules/encodeurl/LICENSE +22 -0
- gaia/eval/webapp/node_modules/encodeurl/README.md +109 -0
- gaia/eval/webapp/node_modules/encodeurl/index.js +60 -0
- gaia/eval/webapp/node_modules/encodeurl/package.json +40 -0
- gaia/eval/webapp/node_modules/es-define-property/.eslintrc +13 -0
- gaia/eval/webapp/node_modules/es-define-property/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-define-property/.nycrc +9 -0
- gaia/eval/webapp/node_modules/es-define-property/CHANGELOG.md +29 -0
- gaia/eval/webapp/node_modules/es-define-property/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-define-property/README.md +49 -0
- gaia/eval/webapp/node_modules/es-define-property/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-define-property/index.js +14 -0
- gaia/eval/webapp/node_modules/es-define-property/package.json +81 -0
- gaia/eval/webapp/node_modules/es-define-property/test/index.js +56 -0
- gaia/eval/webapp/node_modules/es-define-property/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/es-errors/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/es-errors/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-errors/CHANGELOG.md +40 -0
- gaia/eval/webapp/node_modules/es-errors/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-errors/README.md +55 -0
- gaia/eval/webapp/node_modules/es-errors/eval.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/eval.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/index.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/package.json +80 -0
- gaia/eval/webapp/node_modules/es-errors/range.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/range.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/ref.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/ref.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/syntax.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/syntax.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/test/index.js +19 -0
- gaia/eval/webapp/node_modules/es-errors/tsconfig.json +49 -0
- gaia/eval/webapp/node_modules/es-errors/type.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/type.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/uri.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/uri.js +4 -0
- gaia/eval/webapp/node_modules/es-object-atoms/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/es-object-atoms/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-object-atoms/CHANGELOG.md +37 -0
- gaia/eval/webapp/node_modules/es-object-atoms/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-object-atoms/README.md +63 -0
- gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.js +11 -0
- gaia/eval/webapp/node_modules/es-object-atoms/ToObject.d.ts +7 -0
- gaia/eval/webapp/node_modules/es-object-atoms/ToObject.js +10 -0
- gaia/eval/webapp/node_modules/es-object-atoms/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/index.js +4 -0
- gaia/eval/webapp/node_modules/es-object-atoms/isObject.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/isObject.js +6 -0
- gaia/eval/webapp/node_modules/es-object-atoms/package.json +80 -0
- gaia/eval/webapp/node_modules/es-object-atoms/test/index.js +38 -0
- gaia/eval/webapp/node_modules/es-object-atoms/tsconfig.json +6 -0
- gaia/eval/webapp/node_modules/escape-html/LICENSE +24 -0
- gaia/eval/webapp/node_modules/escape-html/Readme.md +43 -0
- gaia/eval/webapp/node_modules/escape-html/index.js +78 -0
- gaia/eval/webapp/node_modules/escape-html/package.json +24 -0
- gaia/eval/webapp/node_modules/etag/HISTORY.md +83 -0
- gaia/eval/webapp/node_modules/etag/LICENSE +22 -0
- gaia/eval/webapp/node_modules/etag/README.md +159 -0
- gaia/eval/webapp/node_modules/etag/index.js +131 -0
- gaia/eval/webapp/node_modules/etag/package.json +47 -0
- gaia/eval/webapp/node_modules/express/History.md +3656 -0
- gaia/eval/webapp/node_modules/express/LICENSE +24 -0
- gaia/eval/webapp/node_modules/express/Readme.md +260 -0
- gaia/eval/webapp/node_modules/express/index.js +11 -0
- gaia/eval/webapp/node_modules/express/lib/application.js +661 -0
- gaia/eval/webapp/node_modules/express/lib/express.js +116 -0
- gaia/eval/webapp/node_modules/express/lib/middleware/init.js +43 -0
- gaia/eval/webapp/node_modules/express/lib/middleware/query.js +47 -0
- gaia/eval/webapp/node_modules/express/lib/request.js +525 -0
- gaia/eval/webapp/node_modules/express/lib/response.js +1179 -0
- gaia/eval/webapp/node_modules/express/lib/router/index.js +673 -0
- gaia/eval/webapp/node_modules/express/lib/router/layer.js +181 -0
- gaia/eval/webapp/node_modules/express/lib/router/route.js +230 -0
- gaia/eval/webapp/node_modules/express/lib/utils.js +303 -0
- gaia/eval/webapp/node_modules/express/lib/view.js +182 -0
- gaia/eval/webapp/node_modules/express/package.json +102 -0
- gaia/eval/webapp/node_modules/finalhandler/HISTORY.md +210 -0
- gaia/eval/webapp/node_modules/finalhandler/LICENSE +22 -0
- gaia/eval/webapp/node_modules/finalhandler/README.md +147 -0
- gaia/eval/webapp/node_modules/finalhandler/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/finalhandler/index.js +341 -0
- gaia/eval/webapp/node_modules/finalhandler/package.json +47 -0
- gaia/eval/webapp/node_modules/forwarded/HISTORY.md +21 -0
- gaia/eval/webapp/node_modules/forwarded/LICENSE +22 -0
- gaia/eval/webapp/node_modules/forwarded/README.md +57 -0
- gaia/eval/webapp/node_modules/forwarded/index.js +90 -0
- gaia/eval/webapp/node_modules/forwarded/package.json +45 -0
- gaia/eval/webapp/node_modules/fresh/HISTORY.md +70 -0
- gaia/eval/webapp/node_modules/fresh/LICENSE +23 -0
- gaia/eval/webapp/node_modules/fresh/README.md +119 -0
- gaia/eval/webapp/node_modules/fresh/index.js +137 -0
- gaia/eval/webapp/node_modules/fresh/package.json +46 -0
- gaia/eval/webapp/node_modules/fs/README.md +9 -0
- gaia/eval/webapp/node_modules/fs/package.json +20 -0
- gaia/eval/webapp/node_modules/function-bind/.eslintrc +21 -0
- gaia/eval/webapp/node_modules/function-bind/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/function-bind/.github/SECURITY.md +3 -0
- gaia/eval/webapp/node_modules/function-bind/.nycrc +13 -0
- gaia/eval/webapp/node_modules/function-bind/CHANGELOG.md +136 -0
- gaia/eval/webapp/node_modules/function-bind/LICENSE +20 -0
- gaia/eval/webapp/node_modules/function-bind/README.md +46 -0
- gaia/eval/webapp/node_modules/function-bind/implementation.js +84 -0
- gaia/eval/webapp/node_modules/function-bind/index.js +5 -0
- gaia/eval/webapp/node_modules/function-bind/package.json +87 -0
- gaia/eval/webapp/node_modules/function-bind/test/.eslintrc +9 -0
- gaia/eval/webapp/node_modules/function-bind/test/index.js +252 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.eslintrc +42 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.nycrc +9 -0
- gaia/eval/webapp/node_modules/get-intrinsic/CHANGELOG.md +186 -0
- gaia/eval/webapp/node_modules/get-intrinsic/LICENSE +21 -0
- gaia/eval/webapp/node_modules/get-intrinsic/README.md +71 -0
- gaia/eval/webapp/node_modules/get-intrinsic/index.js +378 -0
- gaia/eval/webapp/node_modules/get-intrinsic/package.json +97 -0
- gaia/eval/webapp/node_modules/get-intrinsic/test/GetIntrinsic.js +274 -0
- gaia/eval/webapp/node_modules/get-proto/.eslintrc +10 -0
- gaia/eval/webapp/node_modules/get-proto/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/get-proto/.nycrc +9 -0
- gaia/eval/webapp/node_modules/get-proto/CHANGELOG.md +21 -0
- gaia/eval/webapp/node_modules/get-proto/LICENSE +21 -0
- gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.d.ts +5 -0
- gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.js +6 -0
- gaia/eval/webapp/node_modules/get-proto/README.md +50 -0
- gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.d.ts +3 -0
- gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.js +4 -0
- gaia/eval/webapp/node_modules/get-proto/index.d.ts +5 -0
- gaia/eval/webapp/node_modules/get-proto/index.js +27 -0
- gaia/eval/webapp/node_modules/get-proto/package.json +81 -0
- gaia/eval/webapp/node_modules/get-proto/test/index.js +68 -0
- gaia/eval/webapp/node_modules/get-proto/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/gopd/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/gopd/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/gopd/CHANGELOG.md +45 -0
- gaia/eval/webapp/node_modules/gopd/LICENSE +21 -0
- gaia/eval/webapp/node_modules/gopd/README.md +40 -0
- gaia/eval/webapp/node_modules/gopd/gOPD.d.ts +1 -0
- gaia/eval/webapp/node_modules/gopd/gOPD.js +4 -0
- gaia/eval/webapp/node_modules/gopd/index.d.ts +5 -0
- gaia/eval/webapp/node_modules/gopd/index.js +15 -0
- gaia/eval/webapp/node_modules/gopd/package.json +77 -0
- gaia/eval/webapp/node_modules/gopd/test/index.js +36 -0
- gaia/eval/webapp/node_modules/gopd/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/has-symbols/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/has-symbols/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/has-symbols/.nycrc +9 -0
- gaia/eval/webapp/node_modules/has-symbols/CHANGELOG.md +91 -0
- gaia/eval/webapp/node_modules/has-symbols/LICENSE +21 -0
- gaia/eval/webapp/node_modules/has-symbols/README.md +46 -0
- gaia/eval/webapp/node_modules/has-symbols/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/has-symbols/index.js +14 -0
- gaia/eval/webapp/node_modules/has-symbols/package.json +111 -0
- gaia/eval/webapp/node_modules/has-symbols/shams.d.ts +3 -0
- gaia/eval/webapp/node_modules/has-symbols/shams.js +45 -0
- gaia/eval/webapp/node_modules/has-symbols/test/index.js +22 -0
- gaia/eval/webapp/node_modules/has-symbols/test/shams/core-js.js +29 -0
- gaia/eval/webapp/node_modules/has-symbols/test/shams/get-own-property-symbols.js +29 -0
- gaia/eval/webapp/node_modules/has-symbols/test/tests.js +58 -0
- gaia/eval/webapp/node_modules/has-symbols/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/hasown/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/hasown/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/hasown/.nycrc +13 -0
- gaia/eval/webapp/node_modules/hasown/CHANGELOG.md +40 -0
- gaia/eval/webapp/node_modules/hasown/LICENSE +21 -0
- gaia/eval/webapp/node_modules/hasown/README.md +40 -0
- gaia/eval/webapp/node_modules/hasown/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/hasown/index.js +8 -0
- gaia/eval/webapp/node_modules/hasown/package.json +92 -0
- gaia/eval/webapp/node_modules/hasown/tsconfig.json +6 -0
- gaia/eval/webapp/node_modules/http-errors/HISTORY.md +180 -0
- gaia/eval/webapp/node_modules/http-errors/LICENSE +23 -0
- gaia/eval/webapp/node_modules/http-errors/README.md +169 -0
- gaia/eval/webapp/node_modules/http-errors/index.js +289 -0
- gaia/eval/webapp/node_modules/http-errors/package.json +50 -0
- gaia/eval/webapp/node_modules/iconv-lite/Changelog.md +162 -0
- gaia/eval/webapp/node_modules/iconv-lite/LICENSE +21 -0
- gaia/eval/webapp/node_modules/iconv-lite/README.md +156 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-codec.js +555 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-data.js +176 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/index.js +22 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/internal.js +188 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-codec.js +72 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data-generated.js +451 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data.js +174 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/big5-added.json +122 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp936.json +264 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp949.json +273 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp950.json +177 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/eucjp.json +182 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gb18030-ranges.json +1 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gbk-added.json +55 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/shiftjis.json +125 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/utf16.js +177 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/utf7.js +290 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/bom-handling.js +52 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/extend-node.js +217 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/index.d.ts +24 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/index.js +153 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/streams.js +121 -0
- gaia/eval/webapp/node_modules/iconv-lite/package.json +46 -0
- gaia/eval/webapp/node_modules/inherits/LICENSE +16 -0
- gaia/eval/webapp/node_modules/inherits/README.md +42 -0
- gaia/eval/webapp/node_modules/inherits/inherits.js +9 -0
- gaia/eval/webapp/node_modules/inherits/inherits_browser.js +27 -0
- gaia/eval/webapp/node_modules/inherits/package.json +29 -0
- gaia/eval/webapp/node_modules/ipaddr.js/LICENSE +19 -0
- gaia/eval/webapp/node_modules/ipaddr.js/README.md +233 -0
- gaia/eval/webapp/node_modules/ipaddr.js/ipaddr.min.js +1 -0
- gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js +673 -0
- gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js.d.ts +68 -0
- gaia/eval/webapp/node_modules/ipaddr.js/package.json +35 -0
- gaia/eval/webapp/node_modules/math-intrinsics/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/math-intrinsics/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/math-intrinsics/CHANGELOG.md +24 -0
- gaia/eval/webapp/node_modules/math-intrinsics/LICENSE +21 -0
- gaia/eval/webapp/node_modules/math-intrinsics/README.md +50 -0
- gaia/eval/webapp/node_modules/math-intrinsics/abs.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/abs.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.js +5 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.js +5 -0
- gaia/eval/webapp/node_modules/math-intrinsics/floor.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/floor.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isFinite.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isFinite.js +12 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isInteger.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isInteger.js +16 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNaN.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNaN.js +6 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.js +6 -0
- gaia/eval/webapp/node_modules/math-intrinsics/max.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/max.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/min.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/min.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/mod.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/mod.js +9 -0
- gaia/eval/webapp/node_modules/math-intrinsics/package.json +86 -0
- gaia/eval/webapp/node_modules/math-intrinsics/pow.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/pow.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/round.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/round.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/sign.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/sign.js +11 -0
- gaia/eval/webapp/node_modules/math-intrinsics/test/index.js +192 -0
- gaia/eval/webapp/node_modules/math-intrinsics/tsconfig.json +3 -0
- gaia/eval/webapp/node_modules/media-typer/HISTORY.md +22 -0
- gaia/eval/webapp/node_modules/media-typer/LICENSE +22 -0
- gaia/eval/webapp/node_modules/media-typer/README.md +81 -0
- gaia/eval/webapp/node_modules/media-typer/index.js +270 -0
- gaia/eval/webapp/node_modules/media-typer/package.json +26 -0
- gaia/eval/webapp/node_modules/merge-descriptors/HISTORY.md +21 -0
- gaia/eval/webapp/node_modules/merge-descriptors/LICENSE +23 -0
- gaia/eval/webapp/node_modules/merge-descriptors/README.md +49 -0
- gaia/eval/webapp/node_modules/merge-descriptors/index.js +60 -0
- gaia/eval/webapp/node_modules/merge-descriptors/package.json +39 -0
- gaia/eval/webapp/node_modules/methods/HISTORY.md +29 -0
- gaia/eval/webapp/node_modules/methods/LICENSE +24 -0
- gaia/eval/webapp/node_modules/methods/README.md +51 -0
- gaia/eval/webapp/node_modules/methods/index.js +69 -0
- gaia/eval/webapp/node_modules/methods/package.json +36 -0
- gaia/eval/webapp/node_modules/mime/.npmignore +0 -0
- gaia/eval/webapp/node_modules/mime/CHANGELOG.md +164 -0
- gaia/eval/webapp/node_modules/mime/LICENSE +21 -0
- gaia/eval/webapp/node_modules/mime/README.md +90 -0
- gaia/eval/webapp/node_modules/mime/cli.js +8 -0
- gaia/eval/webapp/node_modules/mime/mime.js +108 -0
- gaia/eval/webapp/node_modules/mime/package.json +44 -0
- gaia/eval/webapp/node_modules/mime/src/build.js +53 -0
- gaia/eval/webapp/node_modules/mime/src/test.js +60 -0
- gaia/eval/webapp/node_modules/mime/types.json +1 -0
- gaia/eval/webapp/node_modules/mime-db/HISTORY.md +507 -0
- gaia/eval/webapp/node_modules/mime-db/LICENSE +23 -0
- gaia/eval/webapp/node_modules/mime-db/README.md +100 -0
- gaia/eval/webapp/node_modules/mime-db/db.json +8519 -0
- gaia/eval/webapp/node_modules/mime-db/index.js +12 -0
- gaia/eval/webapp/node_modules/mime-db/package.json +60 -0
- gaia/eval/webapp/node_modules/mime-types/HISTORY.md +397 -0
- gaia/eval/webapp/node_modules/mime-types/LICENSE +23 -0
- gaia/eval/webapp/node_modules/mime-types/README.md +113 -0
- gaia/eval/webapp/node_modules/mime-types/index.js +188 -0
- gaia/eval/webapp/node_modules/mime-types/package.json +44 -0
- gaia/eval/webapp/node_modules/ms/index.js +152 -0
- gaia/eval/webapp/node_modules/ms/license.md +21 -0
- gaia/eval/webapp/node_modules/ms/package.json +37 -0
- gaia/eval/webapp/node_modules/ms/readme.md +51 -0
- gaia/eval/webapp/node_modules/negotiator/HISTORY.md +108 -0
- gaia/eval/webapp/node_modules/negotiator/LICENSE +24 -0
- gaia/eval/webapp/node_modules/negotiator/README.md +203 -0
- gaia/eval/webapp/node_modules/negotiator/index.js +82 -0
- gaia/eval/webapp/node_modules/negotiator/lib/charset.js +169 -0
- gaia/eval/webapp/node_modules/negotiator/lib/encoding.js +184 -0
- gaia/eval/webapp/node_modules/negotiator/lib/language.js +179 -0
- gaia/eval/webapp/node_modules/negotiator/lib/mediaType.js +294 -0
- gaia/eval/webapp/node_modules/negotiator/package.json +42 -0
- gaia/eval/webapp/node_modules/object-inspect/.eslintrc +53 -0
- gaia/eval/webapp/node_modules/object-inspect/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/object-inspect/.nycrc +13 -0
- gaia/eval/webapp/node_modules/object-inspect/CHANGELOG.md +424 -0
- gaia/eval/webapp/node_modules/object-inspect/LICENSE +21 -0
- gaia/eval/webapp/node_modules/object-inspect/example/all.js +23 -0
- gaia/eval/webapp/node_modules/object-inspect/example/circular.js +6 -0
- gaia/eval/webapp/node_modules/object-inspect/example/fn.js +5 -0
- gaia/eval/webapp/node_modules/object-inspect/example/inspect.js +10 -0
- gaia/eval/webapp/node_modules/object-inspect/index.js +544 -0
- gaia/eval/webapp/node_modules/object-inspect/package-support.json +20 -0
- gaia/eval/webapp/node_modules/object-inspect/package.json +105 -0
- gaia/eval/webapp/node_modules/object-inspect/readme.markdown +84 -0
- gaia/eval/webapp/node_modules/object-inspect/test/bigint.js +58 -0
- gaia/eval/webapp/node_modules/object-inspect/test/browser/dom.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/circular.js +16 -0
- gaia/eval/webapp/node_modules/object-inspect/test/deep.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/element.js +53 -0
- gaia/eval/webapp/node_modules/object-inspect/test/err.js +48 -0
- gaia/eval/webapp/node_modules/object-inspect/test/fakes.js +29 -0
- gaia/eval/webapp/node_modules/object-inspect/test/fn.js +76 -0
- gaia/eval/webapp/node_modules/object-inspect/test/global.js +17 -0
- gaia/eval/webapp/node_modules/object-inspect/test/has.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/holes.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/indent-option.js +271 -0
- gaia/eval/webapp/node_modules/object-inspect/test/inspect.js +139 -0
- gaia/eval/webapp/node_modules/object-inspect/test/lowbyte.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/number.js +58 -0
- gaia/eval/webapp/node_modules/object-inspect/test/quoteStyle.js +26 -0
- gaia/eval/webapp/node_modules/object-inspect/test/toStringTag.js +40 -0
- gaia/eval/webapp/node_modules/object-inspect/test/undef.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/values.js +261 -0
- gaia/eval/webapp/node_modules/object-inspect/test-core-js.js +26 -0
- gaia/eval/webapp/node_modules/object-inspect/util.inspect.js +1 -0
- gaia/eval/webapp/node_modules/on-finished/HISTORY.md +98 -0
- gaia/eval/webapp/node_modules/on-finished/LICENSE +23 -0
- gaia/eval/webapp/node_modules/on-finished/README.md +162 -0
- gaia/eval/webapp/node_modules/on-finished/index.js +234 -0
- gaia/eval/webapp/node_modules/on-finished/package.json +39 -0
- gaia/eval/webapp/node_modules/parseurl/HISTORY.md +58 -0
- gaia/eval/webapp/node_modules/parseurl/LICENSE +24 -0
- gaia/eval/webapp/node_modules/parseurl/README.md +133 -0
- gaia/eval/webapp/node_modules/parseurl/index.js +158 -0
- gaia/eval/webapp/node_modules/parseurl/package.json +40 -0
- gaia/eval/webapp/node_modules/path/.npmignore +1 -0
- gaia/eval/webapp/node_modules/path/LICENSE +18 -0
- gaia/eval/webapp/node_modules/path/README.md +15 -0
- gaia/eval/webapp/node_modules/path/package.json +24 -0
- gaia/eval/webapp/node_modules/path/path.js +628 -0
- gaia/eval/webapp/node_modules/path-to-regexp/LICENSE +21 -0
- gaia/eval/webapp/node_modules/path-to-regexp/Readme.md +35 -0
- gaia/eval/webapp/node_modules/path-to-regexp/index.js +156 -0
- gaia/eval/webapp/node_modules/path-to-regexp/package.json +30 -0
- gaia/eval/webapp/node_modules/process/.eslintrc +21 -0
- gaia/eval/webapp/node_modules/process/LICENSE +22 -0
- gaia/eval/webapp/node_modules/process/README.md +26 -0
- gaia/eval/webapp/node_modules/process/browser.js +184 -0
- gaia/eval/webapp/node_modules/process/index.js +2 -0
- gaia/eval/webapp/node_modules/process/package.json +27 -0
- gaia/eval/webapp/node_modules/process/test.js +199 -0
- gaia/eval/webapp/node_modules/proxy-addr/HISTORY.md +161 -0
- gaia/eval/webapp/node_modules/proxy-addr/LICENSE +22 -0
- gaia/eval/webapp/node_modules/proxy-addr/README.md +139 -0
- gaia/eval/webapp/node_modules/proxy-addr/index.js +327 -0
- gaia/eval/webapp/node_modules/proxy-addr/package.json +47 -0
- gaia/eval/webapp/node_modules/qs/.editorconfig +46 -0
- gaia/eval/webapp/node_modules/qs/.eslintrc +38 -0
- gaia/eval/webapp/node_modules/qs/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/qs/.nycrc +13 -0
- gaia/eval/webapp/node_modules/qs/CHANGELOG.md +600 -0
- gaia/eval/webapp/node_modules/qs/LICENSE.md +29 -0
- gaia/eval/webapp/node_modules/qs/README.md +709 -0
- gaia/eval/webapp/node_modules/qs/dist/qs.js +90 -0
- gaia/eval/webapp/node_modules/qs/lib/formats.js +23 -0
- gaia/eval/webapp/node_modules/qs/lib/index.js +11 -0
- gaia/eval/webapp/node_modules/qs/lib/parse.js +296 -0
- gaia/eval/webapp/node_modules/qs/lib/stringify.js +351 -0
- gaia/eval/webapp/node_modules/qs/lib/utils.js +265 -0
- gaia/eval/webapp/node_modules/qs/package.json +91 -0
- gaia/eval/webapp/node_modules/qs/test/empty-keys-cases.js +267 -0
- gaia/eval/webapp/node_modules/qs/test/parse.js +1170 -0
- gaia/eval/webapp/node_modules/qs/test/stringify.js +1298 -0
- gaia/eval/webapp/node_modules/qs/test/utils.js +136 -0
- gaia/eval/webapp/node_modules/range-parser/HISTORY.md +56 -0
- gaia/eval/webapp/node_modules/range-parser/LICENSE +23 -0
- gaia/eval/webapp/node_modules/range-parser/README.md +84 -0
- gaia/eval/webapp/node_modules/range-parser/index.js +162 -0
- gaia/eval/webapp/node_modules/range-parser/package.json +44 -0
- gaia/eval/webapp/node_modules/raw-body/HISTORY.md +308 -0
- gaia/eval/webapp/node_modules/raw-body/LICENSE +22 -0
- gaia/eval/webapp/node_modules/raw-body/README.md +223 -0
- gaia/eval/webapp/node_modules/raw-body/SECURITY.md +24 -0
- gaia/eval/webapp/node_modules/raw-body/index.d.ts +87 -0
- gaia/eval/webapp/node_modules/raw-body/index.js +336 -0
- gaia/eval/webapp/node_modules/raw-body/package.json +49 -0
- gaia/eval/webapp/node_modules/safe-buffer/LICENSE +21 -0
- gaia/eval/webapp/node_modules/safe-buffer/README.md +584 -0
- gaia/eval/webapp/node_modules/safe-buffer/index.d.ts +187 -0
- gaia/eval/webapp/node_modules/safe-buffer/index.js +65 -0
- gaia/eval/webapp/node_modules/safe-buffer/package.json +51 -0
- gaia/eval/webapp/node_modules/safer-buffer/LICENSE +21 -0
- gaia/eval/webapp/node_modules/safer-buffer/Porting-Buffer.md +268 -0
- gaia/eval/webapp/node_modules/safer-buffer/Readme.md +156 -0
- gaia/eval/webapp/node_modules/safer-buffer/dangerous.js +58 -0
- gaia/eval/webapp/node_modules/safer-buffer/package.json +34 -0
- gaia/eval/webapp/node_modules/safer-buffer/safer.js +77 -0
- gaia/eval/webapp/node_modules/safer-buffer/tests.js +406 -0
- gaia/eval/webapp/node_modules/send/HISTORY.md +526 -0
- gaia/eval/webapp/node_modules/send/LICENSE +23 -0
- gaia/eval/webapp/node_modules/send/README.md +327 -0
- gaia/eval/webapp/node_modules/send/SECURITY.md +24 -0
- gaia/eval/webapp/node_modules/send/index.js +1142 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/HISTORY.md +14 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/LICENSE +22 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/README.md +128 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/index.js +60 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/package.json +40 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/index.js +162 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/license.md +21 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/package.json +38 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/readme.md +59 -0
- gaia/eval/webapp/node_modules/send/package.json +62 -0
- gaia/eval/webapp/node_modules/serve-static/HISTORY.md +487 -0
- gaia/eval/webapp/node_modules/serve-static/LICENSE +25 -0
- gaia/eval/webapp/node_modules/serve-static/README.md +257 -0
- gaia/eval/webapp/node_modules/serve-static/index.js +209 -0
- gaia/eval/webapp/node_modules/serve-static/package.json +42 -0
- gaia/eval/webapp/node_modules/setprototypeof/LICENSE +13 -0
- gaia/eval/webapp/node_modules/setprototypeof/README.md +31 -0
- gaia/eval/webapp/node_modules/setprototypeof/index.d.ts +2 -0
- gaia/eval/webapp/node_modules/setprototypeof/index.js +17 -0
- gaia/eval/webapp/node_modules/setprototypeof/package.json +38 -0
- gaia/eval/webapp/node_modules/setprototypeof/test/index.js +24 -0
- gaia/eval/webapp/node_modules/side-channel/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel/.eslintrc +12 -0
- gaia/eval/webapp/node_modules/side-channel/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel/CHANGELOG.md +110 -0
- gaia/eval/webapp/node_modules/side-channel/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel/README.md +61 -0
- gaia/eval/webapp/node_modules/side-channel/index.d.ts +14 -0
- gaia/eval/webapp/node_modules/side-channel/index.js +43 -0
- gaia/eval/webapp/node_modules/side-channel/package.json +85 -0
- gaia/eval/webapp/node_modules/side-channel/test/index.js +104 -0
- gaia/eval/webapp/node_modules/side-channel/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-list/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-list/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/side-channel-list/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-list/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-list/CHANGELOG.md +15 -0
- gaia/eval/webapp/node_modules/side-channel-list/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-list/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-list/index.d.ts +13 -0
- gaia/eval/webapp/node_modules/side-channel-list/index.js +113 -0
- gaia/eval/webapp/node_modules/side-channel-list/list.d.ts +14 -0
- gaia/eval/webapp/node_modules/side-channel-list/package.json +77 -0
- gaia/eval/webapp/node_modules/side-channel-list/test/index.js +104 -0
- gaia/eval/webapp/node_modules/side-channel-list/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-map/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-map/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/side-channel-map/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-map/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-map/CHANGELOG.md +22 -0
- gaia/eval/webapp/node_modules/side-channel-map/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-map/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-map/index.d.ts +15 -0
- gaia/eval/webapp/node_modules/side-channel-map/index.js +68 -0
- gaia/eval/webapp/node_modules/side-channel-map/package.json +80 -0
- gaia/eval/webapp/node_modules/side-channel-map/test/index.js +114 -0
- gaia/eval/webapp/node_modules/side-channel-map/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.eslintrc +12 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/CHANGELOG.md +28 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/index.d.ts +15 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/index.js +84 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/package.json +87 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/test/index.js +114 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/statuses/HISTORY.md +82 -0
- gaia/eval/webapp/node_modules/statuses/LICENSE +23 -0
- gaia/eval/webapp/node_modules/statuses/README.md +136 -0
- gaia/eval/webapp/node_modules/statuses/codes.json +65 -0
- gaia/eval/webapp/node_modules/statuses/index.js +146 -0
- gaia/eval/webapp/node_modules/statuses/package.json +49 -0
- gaia/eval/webapp/node_modules/toidentifier/HISTORY.md +9 -0
- gaia/eval/webapp/node_modules/toidentifier/LICENSE +21 -0
- gaia/eval/webapp/node_modules/toidentifier/README.md +61 -0
- gaia/eval/webapp/node_modules/toidentifier/index.js +32 -0
- gaia/eval/webapp/node_modules/toidentifier/package.json +38 -0
- gaia/eval/webapp/node_modules/type-is/HISTORY.md +259 -0
- gaia/eval/webapp/node_modules/type-is/LICENSE +23 -0
- gaia/eval/webapp/node_modules/type-is/README.md +170 -0
- gaia/eval/webapp/node_modules/type-is/index.js +266 -0
- gaia/eval/webapp/node_modules/type-is/package.json +45 -0
- gaia/eval/webapp/node_modules/unpipe/HISTORY.md +4 -0
- gaia/eval/webapp/node_modules/unpipe/LICENSE +22 -0
- gaia/eval/webapp/node_modules/unpipe/README.md +43 -0
- gaia/eval/webapp/node_modules/unpipe/index.js +69 -0
- gaia/eval/webapp/node_modules/unpipe/package.json +27 -0
- gaia/eval/webapp/node_modules/util/LICENSE +18 -0
- gaia/eval/webapp/node_modules/util/README.md +15 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/LICENSE +16 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/README.md +42 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits.js +7 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits_browser.js +23 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/package.json +29 -0
- gaia/eval/webapp/node_modules/util/package.json +35 -0
- gaia/eval/webapp/node_modules/util/support/isBuffer.js +3 -0
- gaia/eval/webapp/node_modules/util/support/isBufferBrowser.js +6 -0
- gaia/eval/webapp/node_modules/util/util.js +586 -0
- gaia/eval/webapp/node_modules/utils-merge/.npmignore +9 -0
- gaia/eval/webapp/node_modules/utils-merge/LICENSE +20 -0
- gaia/eval/webapp/node_modules/utils-merge/README.md +34 -0
- gaia/eval/webapp/node_modules/utils-merge/index.js +23 -0
- gaia/eval/webapp/node_modules/utils-merge/package.json +40 -0
- gaia/eval/webapp/node_modules/vary/HISTORY.md +39 -0
- gaia/eval/webapp/node_modules/vary/LICENSE +22 -0
- gaia/eval/webapp/node_modules/vary/README.md +101 -0
- gaia/eval/webapp/node_modules/vary/index.js +149 -0
- gaia/eval/webapp/node_modules/vary/package.json +43 -0
- gaia/eval/webapp/package-lock.json +875 -0
- gaia/eval/webapp/package.json +21 -0
- gaia/eval/webapp/public/app.js +3403 -0
- gaia/eval/webapp/public/index.html +88 -0
- gaia/eval/webapp/public/styles.css +3661 -0
- gaia/eval/webapp/server.js +416 -0
- gaia/eval/webapp/test-setup.js +73 -0
- gaia/llm/__init__.py +2 -0
- gaia/llm/lemonade_client.py +3083 -0
- gaia/llm/lemonade_manager.py +269 -0
- gaia/llm/llm_client.py +729 -0
- gaia/llm/vlm_client.py +307 -0
- gaia/logger.py +189 -0
- gaia/mcp/agent_mcp_server.py +245 -0
- gaia/mcp/blender_mcp_client.py +138 -0
- gaia/mcp/blender_mcp_server.py +648 -0
- gaia/mcp/context7_cache.py +332 -0
- gaia/mcp/external_services.py +518 -0
- gaia/mcp/mcp_bridge.py +550 -0
- gaia/mcp/servers/__init__.py +6 -0
- gaia/mcp/servers/docker_mcp.py +83 -0
- gaia/rag/__init__.py +10 -0
- gaia/rag/app.py +293 -0
- gaia/rag/demo.py +304 -0
- gaia/rag/pdf_utils.py +235 -0
- gaia/rag/sdk.py +2194 -0
- gaia/security.py +163 -0
- gaia/talk/app.py +289 -0
- gaia/talk/sdk.py +538 -0
- gaia/util.py +46 -0
- gaia/version.py +100 -0
|
@@ -0,0 +1,3403 @@
|
|
|
1
|
+
// Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
class EvaluationVisualizer {
|
|
5
|
+
constructor() {
|
|
6
|
+
console.log('EvaluationVisualizer constructor called');
|
|
7
|
+
this.loadedReports = new Map();
|
|
8
|
+
this.initializeEventListeners();
|
|
9
|
+
this.loadAvailableFiles();
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
// Helper method to identify main evaluation entries (skip individual meeting files)
|
|
13
|
+
isMainEvaluationEntry(evalData) {
|
|
14
|
+
const name = evalData.experiment_name || evalData.file_path || '';
|
|
15
|
+
|
|
16
|
+
// Skip entries that are individual meeting/email files (have test data prefix)
|
|
17
|
+
// Pattern: "testdata_name.Model-Config.experiment" where testdata_name contains _meeting or _email
|
|
18
|
+
const parts = name.split('.');
|
|
19
|
+
if (parts.length > 1) {
|
|
20
|
+
const prefix = parts[0];
|
|
21
|
+
// If prefix contains meeting/email patterns, it's an individual file
|
|
22
|
+
if (prefix.includes('_meeting') || prefix.includes('_email')) {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Check if file_path indicates it's in a subdirectory (meetings/ or emails/)
|
|
28
|
+
// Handle both forward slashes (Unix) and backslashes (Windows)
|
|
29
|
+
if (evalData.file_path && (evalData.file_path.includes('/') || evalData.file_path.includes('\\'))) {
|
|
30
|
+
return false; // It's an individual file in a subdirectory
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return true;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
initializeEventListeners() {
|
|
37
|
+
const addBtn = document.getElementById('addReportBtn');
|
|
38
|
+
const compareBtn = document.getElementById('compareBtn');
|
|
39
|
+
const clearBtn = document.getElementById('clearBtn');
|
|
40
|
+
|
|
41
|
+
if (!addBtn || !compareBtn || !clearBtn) {
|
|
42
|
+
console.error('One or more buttons not found in DOM');
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
console.log('Adding event listeners to buttons');
|
|
47
|
+
addBtn.addEventListener('click', () => {
|
|
48
|
+
console.log('Add Report button clicked');
|
|
49
|
+
this.addSelectedReports();
|
|
50
|
+
});
|
|
51
|
+
compareBtn.addEventListener('click', () => {
|
|
52
|
+
console.log('Compare button clicked');
|
|
53
|
+
this.compareSelected();
|
|
54
|
+
});
|
|
55
|
+
clearBtn.addEventListener('click', () => {
|
|
56
|
+
console.log('Clear button clicked');
|
|
57
|
+
this.clearAllReports();
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async loadAvailableFiles() {
|
|
62
|
+
try {
|
|
63
|
+
console.log('Loading available files...');
|
|
64
|
+
const [filesResponse, testDataResponse, groundtruthResponse] = await Promise.all([
|
|
65
|
+
fetch('/api/files'),
|
|
66
|
+
fetch('/api/test-data'),
|
|
67
|
+
fetch('/api/groundtruth')
|
|
68
|
+
]);
|
|
69
|
+
|
|
70
|
+
console.log('Responses received:', filesResponse.status, testDataResponse.status, groundtruthResponse.status);
|
|
71
|
+
|
|
72
|
+
if (!filesResponse.ok) {
|
|
73
|
+
throw new Error(`HTTP error! status: ${filesResponse.status}`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const filesData = await filesResponse.json();
|
|
77
|
+
const testData = testDataResponse.ok ? await testDataResponse.json() : { directories: [] };
|
|
78
|
+
const groundtruthData = groundtruthResponse.ok ? await groundtruthResponse.json() : { files: [] };
|
|
79
|
+
|
|
80
|
+
console.log('Data received:', { files: filesData, testData, groundtruthData });
|
|
81
|
+
|
|
82
|
+
this.populateFileSelects({ ...filesData, testData, groundtruthData });
|
|
83
|
+
} catch (error) {
|
|
84
|
+
console.error('Failed to load available files:', error);
|
|
85
|
+
this.showError('Failed to load available files');
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
populateFileSelects(data) {
|
|
90
|
+
console.log('Populating file selects with data:', data);
|
|
91
|
+
const experimentSelect = document.getElementById('experimentSelect');
|
|
92
|
+
const evaluationSelect = document.getElementById('evaluationSelect');
|
|
93
|
+
const testDataSelect = document.getElementById('testDataSelect');
|
|
94
|
+
const groundtruthSelect = document.getElementById('groundtruthSelect');
|
|
95
|
+
const agentOutputSelect = document.getElementById('agentOutputSelect');
|
|
96
|
+
|
|
97
|
+
if (!experimentSelect || !evaluationSelect || !testDataSelect || !groundtruthSelect || !agentOutputSelect) {
|
|
98
|
+
console.error('Select elements not found in DOM');
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Clear existing options
|
|
103
|
+
experimentSelect.innerHTML = '';
|
|
104
|
+
evaluationSelect.innerHTML = '';
|
|
105
|
+
testDataSelect.innerHTML = '';
|
|
106
|
+
groundtruthSelect.innerHTML = '';
|
|
107
|
+
agentOutputSelect.innerHTML = '';
|
|
108
|
+
|
|
109
|
+
// Populate experiments
|
|
110
|
+
if (data.experiments.length === 0) {
|
|
111
|
+
experimentSelect.innerHTML = '<option disabled>No experiment files found</option>';
|
|
112
|
+
} else {
|
|
113
|
+
console.log(`Adding ${data.experiments.length} experiment files`);
|
|
114
|
+
data.experiments.forEach(file => {
|
|
115
|
+
const option = document.createElement('option');
|
|
116
|
+
option.value = file.name;
|
|
117
|
+
option.textContent = file.name.replace('.experiment.json', '');
|
|
118
|
+
option.title = file.name; // Add tooltip showing full filename
|
|
119
|
+
experimentSelect.appendChild(option);
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Populate evaluations
|
|
124
|
+
if (data.evaluations.length === 0) {
|
|
125
|
+
evaluationSelect.innerHTML = '<option disabled>No evaluation files found</option>';
|
|
126
|
+
} else {
|
|
127
|
+
console.log(`Adding ${data.evaluations.length} evaluation files`);
|
|
128
|
+
data.evaluations.forEach(file => {
|
|
129
|
+
const option = document.createElement('option');
|
|
130
|
+
option.value = file.name;
|
|
131
|
+
option.textContent = file.name.replace('.experiment.eval.json', '');
|
|
132
|
+
option.title = file.name; // Add tooltip showing full filename
|
|
133
|
+
evaluationSelect.appendChild(option);
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Display paths
|
|
138
|
+
if (data.paths) {
|
|
139
|
+
document.getElementById('testDataPath').textContent = data.paths.testData || '';
|
|
140
|
+
document.getElementById('groundtruthPath').textContent = data.paths.groundtruth || '';
|
|
141
|
+
document.getElementById('experimentsPath').textContent = data.paths.experiments || '';
|
|
142
|
+
document.getElementById('evaluationsPath').textContent = data.paths.evaluations || '';
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Populate test data
|
|
146
|
+
if (!data.testData || data.testData.directories.length === 0) {
|
|
147
|
+
testDataSelect.innerHTML = '<option disabled>No test data found</option>';
|
|
148
|
+
} else {
|
|
149
|
+
console.log(`Adding ${data.testData.directories.length} test data directories`);
|
|
150
|
+
data.testData.directories.forEach(dir => {
|
|
151
|
+
dir.files.forEach(file => {
|
|
152
|
+
const option = document.createElement('option');
|
|
153
|
+
const fullPath = `${dir.name}/${file}`;
|
|
154
|
+
option.value = fullPath;
|
|
155
|
+
// Remove 'test_data' prefix if present (when files are at root)
|
|
156
|
+
const displayName = dir.name === 'test_data' ? file.replace('.txt', '') : `${dir.name}/${file.replace('.txt', '')}`;
|
|
157
|
+
option.textContent = displayName;
|
|
158
|
+
option.title = fullPath; // Add tooltip showing full path
|
|
159
|
+
testDataSelect.appendChild(option);
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Populate groundtruth
|
|
165
|
+
if (!data.groundtruthData || data.groundtruthData.files.length === 0) {
|
|
166
|
+
groundtruthSelect.innerHTML = '<option disabled>No groundtruth files found</option>';
|
|
167
|
+
} else {
|
|
168
|
+
console.log(`Adding ${data.groundtruthData.files.length} groundtruth files`);
|
|
169
|
+
data.groundtruthData.files.forEach(file => {
|
|
170
|
+
const option = document.createElement('option');
|
|
171
|
+
option.value = file.path;
|
|
172
|
+
const displayName = file.name
|
|
173
|
+
.replace('.summarization.groundtruth.json', '')
|
|
174
|
+
.replace('.qa.groundtruth.json', '')
|
|
175
|
+
.replace('.groundtruth.json', '');
|
|
176
|
+
option.textContent = file.directory === 'root' ? displayName : `${file.directory}/${displayName}`;
|
|
177
|
+
if (file.type === 'consolidated') {
|
|
178
|
+
option.textContent += ' [Consolidated]';
|
|
179
|
+
}
|
|
180
|
+
option.title = file.path; // Add tooltip showing full path
|
|
181
|
+
groundtruthSelect.appendChild(option);
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Populate agent outputs
|
|
186
|
+
if (!data.agentOutputs || data.agentOutputs.length === 0) {
|
|
187
|
+
agentOutputSelect.innerHTML = '<option disabled>No agent outputs found</option>';
|
|
188
|
+
} else {
|
|
189
|
+
console.log(`Adding ${data.agentOutputs.length} agent output files`);
|
|
190
|
+
data.agentOutputs.forEach(file => {
|
|
191
|
+
const option = document.createElement('option');
|
|
192
|
+
option.value = file.name;
|
|
193
|
+
const displayName = file.name
|
|
194
|
+
.replace('agent_output_', '')
|
|
195
|
+
.replace('.json', '');
|
|
196
|
+
option.textContent = file.directory === 'single' ? `${displayName} [Single]` : displayName;
|
|
197
|
+
option.title = file.name; // Add tooltip showing full filename
|
|
198
|
+
agentOutputSelect.appendChild(option);
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
console.log('File selects populated successfully');
|
|
203
|
+
|
|
204
|
+
// Add double-click event listeners to enable direct file loading
|
|
205
|
+
this.addDoubleClickHandlers();
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
async addSelectedReports() {
|
|
209
|
+
console.log('addSelectedReports function called');
|
|
210
|
+
|
|
211
|
+
const experimentSelect = document.getElementById('experimentSelect');
|
|
212
|
+
const evaluationSelect = document.getElementById('evaluationSelect');
|
|
213
|
+
const testDataSelect = document.getElementById('testDataSelect');
|
|
214
|
+
const groundtruthSelect = document.getElementById('groundtruthSelect');
|
|
215
|
+
const agentOutputSelect = document.getElementById('agentOutputSelect');
|
|
216
|
+
|
|
217
|
+
if (!experimentSelect || !evaluationSelect || !testDataSelect || !groundtruthSelect || !agentOutputSelect) {
|
|
218
|
+
console.error('Select elements not found');
|
|
219
|
+
alert('Error: File selection elements not found');
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const selectedExperiments = Array.from(experimentSelect.selectedOptions);
|
|
224
|
+
const selectedEvaluations = Array.from(evaluationSelect.selectedOptions);
|
|
225
|
+
const selectedTestData = Array.from(testDataSelect.selectedOptions);
|
|
226
|
+
const selectedGroundtruth = Array.from(groundtruthSelect.selectedOptions);
|
|
227
|
+
const selectedAgentOutputs = Array.from(agentOutputSelect.selectedOptions);
|
|
228
|
+
|
|
229
|
+
console.log('Selected experiments:', selectedExperiments.length);
|
|
230
|
+
console.log('Selected evaluations:', selectedEvaluations.length);
|
|
231
|
+
console.log('Selected test data:', selectedTestData.length);
|
|
232
|
+
console.log('Selected groundtruth:', selectedGroundtruth.length);
|
|
233
|
+
console.log('Selected agent outputs:', selectedAgentOutputs.length);
|
|
234
|
+
|
|
235
|
+
if (selectedExperiments.length === 0 && selectedEvaluations.length === 0 &&
|
|
236
|
+
selectedTestData.length === 0 && selectedGroundtruth.length === 0 &&
|
|
237
|
+
selectedAgentOutputs.length === 0) {
|
|
238
|
+
alert('Please select at least one file to load');
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Load selected experiments
|
|
243
|
+
for (const option of selectedExperiments) {
|
|
244
|
+
await this.loadExperiment(option.value);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Load selected evaluations
|
|
248
|
+
for (const option of selectedEvaluations) {
|
|
249
|
+
await this.loadEvaluation(option.value);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Load selected test data
|
|
253
|
+
for (const option of selectedTestData) {
|
|
254
|
+
await this.loadTestData(option.value);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Load selected groundtruth
|
|
258
|
+
for (const option of selectedGroundtruth) {
|
|
259
|
+
await this.loadGroundtruth(option.value);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Load selected agent outputs
|
|
263
|
+
for (const option of selectedAgentOutputs) {
|
|
264
|
+
await this.loadAgentOutput(option.value);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Clear selections
|
|
268
|
+
experimentSelect.selectedIndex = -1;
|
|
269
|
+
evaluationSelect.selectedIndex = -1;
|
|
270
|
+
testDataSelect.selectedIndex = -1;
|
|
271
|
+
groundtruthSelect.selectedIndex = -1;
|
|
272
|
+
agentOutputSelect.selectedIndex = -1;
|
|
273
|
+
|
|
274
|
+
this.updateDisplay();
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
addDoubleClickHandlers() {
|
|
278
|
+
const experimentSelect = document.getElementById('experimentSelect');
|
|
279
|
+
const evaluationSelect = document.getElementById('evaluationSelect');
|
|
280
|
+
const testDataSelect = document.getElementById('testDataSelect');
|
|
281
|
+
const groundtruthSelect = document.getElementById('groundtruthSelect');
|
|
282
|
+
const agentOutputSelect = document.getElementById('agentOutputSelect');
|
|
283
|
+
|
|
284
|
+
if (experimentSelect) {
|
|
285
|
+
experimentSelect.addEventListener('dblclick', (e) => {
|
|
286
|
+
if (e.target.tagName === 'OPTION' && !e.target.disabled) {
|
|
287
|
+
this.addSingleReport('experiment', e.target.value);
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (evaluationSelect) {
|
|
293
|
+
evaluationSelect.addEventListener('dblclick', (e) => {
|
|
294
|
+
if (e.target.tagName === 'OPTION' && !e.target.disabled) {
|
|
295
|
+
this.addSingleReport('evaluation', e.target.value);
|
|
296
|
+
}
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (testDataSelect) {
|
|
301
|
+
testDataSelect.addEventListener('dblclick', (e) => {
|
|
302
|
+
if (e.target.tagName === 'OPTION' && !e.target.disabled) {
|
|
303
|
+
this.addSingleReport('testData', e.target.value);
|
|
304
|
+
}
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
if (groundtruthSelect) {
|
|
309
|
+
groundtruthSelect.addEventListener('dblclick', (e) => {
|
|
310
|
+
if (e.target.tagName === 'OPTION' && !e.target.disabled) {
|
|
311
|
+
this.addSingleReport('groundtruth', e.target.value);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if (agentOutputSelect) {
|
|
317
|
+
agentOutputSelect.addEventListener('dblclick', (e) => {
|
|
318
|
+
if (e.target.tagName === 'OPTION' && !e.target.disabled) {
|
|
319
|
+
this.addSingleReport('agentOutput', e.target.value);
|
|
320
|
+
}
|
|
321
|
+
});
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
console.log('Double-click handlers added to all select elements');
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
async addSingleReport(type, filename) {
|
|
328
|
+
console.log(`Adding single ${type} report: ${filename}`);
|
|
329
|
+
|
|
330
|
+
try {
|
|
331
|
+
switch (type) {
|
|
332
|
+
case 'experiment':
|
|
333
|
+
await this.loadExperiment(filename);
|
|
334
|
+
break;
|
|
335
|
+
case 'evaluation':
|
|
336
|
+
await this.loadEvaluation(filename);
|
|
337
|
+
break;
|
|
338
|
+
case 'testData':
|
|
339
|
+
await this.loadTestData(filename);
|
|
340
|
+
break;
|
|
341
|
+
case 'groundtruth':
|
|
342
|
+
await this.loadGroundtruth(filename);
|
|
343
|
+
break;
|
|
344
|
+
case 'agentOutput':
|
|
345
|
+
await this.loadAgentOutput(filename);
|
|
346
|
+
break;
|
|
347
|
+
default:
|
|
348
|
+
console.error(`Unknown report type: ${type}`);
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
this.updateDisplay();
|
|
353
|
+
console.log(`Successfully added ${type} report: ${filename}`);
|
|
354
|
+
} catch (error) {
|
|
355
|
+
console.error(`Failed to add ${type} report:`, error);
|
|
356
|
+
alert(`Failed to load ${type} report: ${filename}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
async loadExperiment(filename) {
|
|
361
|
+
try {
|
|
362
|
+
const response = await fetch(`/api/experiment/${filename}`);
|
|
363
|
+
const data = await response.json();
|
|
364
|
+
|
|
365
|
+
const reportId = filename.replace('.experiment.json', '');
|
|
366
|
+
this.loadedReports.set(reportId, {
|
|
367
|
+
...this.loadedReports.get(reportId),
|
|
368
|
+
experiment: data,
|
|
369
|
+
filename: filename,
|
|
370
|
+
type: 'experiment'
|
|
371
|
+
});
|
|
372
|
+
} catch (error) {
|
|
373
|
+
console.error(`Failed to load experiment ${filename}:`, error);
|
|
374
|
+
this.showError(`Failed to load experiment ${filename}`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
async loadEvaluation(filename) {
|
|
379
|
+
try {
|
|
380
|
+
const response = await fetch(`/api/evaluation/${filename}`);
|
|
381
|
+
const data = await response.json();
|
|
382
|
+
|
|
383
|
+
// Check if this is a consolidated evaluation report
|
|
384
|
+
if (data.metadata && data.metadata.report_type === 'consolidated_evaluations') {
|
|
385
|
+
// No need to load experiment files - consolidated report has all the data
|
|
386
|
+
const reportId = filename.replace('.json', '');
|
|
387
|
+
this.loadedReports.set(reportId, {
|
|
388
|
+
consolidatedEvaluation: data,
|
|
389
|
+
filename: filename,
|
|
390
|
+
type: 'consolidated_evaluation'
|
|
391
|
+
});
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// For individual evaluation files
|
|
396
|
+
const reportId = filename.replace('.experiment.eval.json', '');
|
|
397
|
+
this.loadedReports.set(reportId, {
|
|
398
|
+
...this.loadedReports.get(reportId),
|
|
399
|
+
evaluation: data,
|
|
400
|
+
filename: filename,
|
|
401
|
+
type: 'evaluation'
|
|
402
|
+
});
|
|
403
|
+
} catch (error) {
|
|
404
|
+
console.error(`Failed to load evaluation ${filename}:`, error);
|
|
405
|
+
this.showError(`Failed to load evaluation ${filename}`);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
async loadTestData(fileSpec) {
|
|
410
|
+
try {
|
|
411
|
+
const [type, filename] = fileSpec.split('/');
|
|
412
|
+
const [contentResponse, metadataResponse] = await Promise.all([
|
|
413
|
+
fetch(`/api/test-data/${type}/${filename}`),
|
|
414
|
+
fetch(`/api/test-data/${type}/metadata`)
|
|
415
|
+
]);
|
|
416
|
+
|
|
417
|
+
if (!contentResponse.ok) {
|
|
418
|
+
throw new Error(`Failed to load test data: ${contentResponse.status}`);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const contentData = await contentResponse.json();
|
|
422
|
+
const metadataData = metadataResponse.ok ? await metadataResponse.json() : null;
|
|
423
|
+
|
|
424
|
+
const reportId = `testdata-${type}-${filename.replace('.txt', '')}`;
|
|
425
|
+
this.loadedReports.set(reportId, {
|
|
426
|
+
testData: {
|
|
427
|
+
content: contentData.content, // Extract just the content string
|
|
428
|
+
metadata: metadataData,
|
|
429
|
+
type: type,
|
|
430
|
+
filename: filename,
|
|
431
|
+
isPdf: contentData.isPdf, // Pass through PDF flag
|
|
432
|
+
message: contentData.message // Pass through any message
|
|
433
|
+
},
|
|
434
|
+
filename: fileSpec,
|
|
435
|
+
type: 'testdata'
|
|
436
|
+
});
|
|
437
|
+
} catch (error) {
|
|
438
|
+
console.error(`Failed to load test data ${fileSpec}:`, error);
|
|
439
|
+
this.showError(`Failed to load test data ${fileSpec}`);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
async loadGroundtruth(filename) {
|
|
444
|
+
try {
|
|
445
|
+
const response = await fetch(`/api/groundtruth/${filename}`);
|
|
446
|
+
|
|
447
|
+
if (!response.ok) {
|
|
448
|
+
throw new Error(`Failed to load groundtruth: ${response.status}`);
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
const data = await response.json();
|
|
452
|
+
|
|
453
|
+
const reportId = `groundtruth-${filename.replace(/\.(summarization|qa)\.groundtruth\.json$/, '').replace(/\//g, '-')}`;
|
|
454
|
+
this.loadedReports.set(reportId, {
|
|
455
|
+
groundtruth: data,
|
|
456
|
+
filename: filename,
|
|
457
|
+
type: 'groundtruth'
|
|
458
|
+
});
|
|
459
|
+
} catch (error) {
|
|
460
|
+
console.error(`Failed to load groundtruth ${filename}:`, error);
|
|
461
|
+
this.showError(`Failed to load groundtruth ${filename}`);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
async loadAgentOutput(filename) {
|
|
466
|
+
try {
|
|
467
|
+
const response = await fetch(`/api/agent-output/${filename}`);
|
|
468
|
+
|
|
469
|
+
if (!response.ok) {
|
|
470
|
+
throw new Error(`Failed to load agent output: ${response.status}`);
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
const data = await response.json();
|
|
474
|
+
|
|
475
|
+
const reportId = `agent-${filename.replace('agent_output_', '').replace('.json', '')}`;
|
|
476
|
+
this.loadedReports.set(reportId, {
|
|
477
|
+
agentOutput: data,
|
|
478
|
+
filename: filename,
|
|
479
|
+
type: 'agent_output'
|
|
480
|
+
});
|
|
481
|
+
} catch (error) {
|
|
482
|
+
console.error(`Failed to load agent output ${filename}:`, error);
|
|
483
|
+
this.showError(`Failed to load agent output ${filename}`);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
updateDisplay() {
|
|
488
|
+
const reportsGrid = document.getElementById('reportsGrid');
|
|
489
|
+
|
|
490
|
+
if (this.loadedReports.size === 0) {
|
|
491
|
+
reportsGrid.innerHTML = `
|
|
492
|
+
<div class="empty-state">
|
|
493
|
+
<h3>No reports loaded</h3>
|
|
494
|
+
<p>Select experiment, evaluation, test data, and/or groundtruth files to visualize results</p>
|
|
495
|
+
</div>
|
|
496
|
+
`;
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
let html = '';
|
|
501
|
+
this.loadedReports.forEach((report, reportId) => {
|
|
502
|
+
html += this.generateReportCard(reportId, report);
|
|
503
|
+
});
|
|
504
|
+
|
|
505
|
+
reportsGrid.innerHTML = html;
|
|
506
|
+
|
|
507
|
+
// Add event listeners for close buttons
|
|
508
|
+
document.querySelectorAll('.report-close').forEach(btn => {
|
|
509
|
+
btn.addEventListener('click', (e) => {
|
|
510
|
+
const reportId = e.target.dataset.reportId;
|
|
511
|
+
this.removeReport(reportId);
|
|
512
|
+
});
|
|
513
|
+
});
|
|
514
|
+
|
|
515
|
+
// Add event listeners for collapsible sections
|
|
516
|
+
document.querySelectorAll('.collapsible-header').forEach(header => {
|
|
517
|
+
header.addEventListener('click', (e) => {
|
|
518
|
+
// Don't toggle if clicking on the view source button
|
|
519
|
+
if (e.target.classList.contains('view-source-btn')) {
|
|
520
|
+
return;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
const section = e.currentTarget.parentElement;
|
|
524
|
+
const content = section.querySelector('.collapsible-content');
|
|
525
|
+
const toggle = section.querySelector('.collapsible-toggle');
|
|
526
|
+
|
|
527
|
+
if (content.classList.contains('expanded')) {
|
|
528
|
+
content.classList.remove('expanded');
|
|
529
|
+
toggle.classList.remove('expanded');
|
|
530
|
+
toggle.textContent = '▶';
|
|
531
|
+
} else {
|
|
532
|
+
content.classList.add('expanded');
|
|
533
|
+
toggle.classList.add('expanded');
|
|
534
|
+
toggle.textContent = '▼';
|
|
535
|
+
}
|
|
536
|
+
});
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
// Add event listeners for view source buttons
|
|
540
|
+
document.querySelectorAll('.view-source-btn').forEach(btn => {
|
|
541
|
+
btn.addEventListener('click', (e) => {
|
|
542
|
+
e.stopPropagation(); // Prevent collapsible toggle
|
|
543
|
+
const sourcePath = e.target.dataset.sourcePath;
|
|
544
|
+
this.viewSourceFile(sourcePath);
|
|
545
|
+
});
|
|
546
|
+
});
|
|
547
|
+
|
|
548
|
+
// Add event listeners for export dropdowns
|
|
549
|
+
document.querySelectorAll('.export-btn').forEach(btn => {
|
|
550
|
+
btn.addEventListener('click', (e) => {
|
|
551
|
+
e.stopPropagation();
|
|
552
|
+
const dropdown = btn.closest('.export-dropdown');
|
|
553
|
+
const menu = dropdown.querySelector('.export-menu');
|
|
554
|
+
|
|
555
|
+
// Close all other open menus
|
|
556
|
+
document.querySelectorAll('.export-menu.show').forEach(m => {
|
|
557
|
+
if (m !== menu) m.classList.remove('show');
|
|
558
|
+
});
|
|
559
|
+
|
|
560
|
+
menu.classList.toggle('show');
|
|
561
|
+
});
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
// Add event listeners for export options
|
|
565
|
+
document.querySelectorAll('.export-option').forEach(btn => {
|
|
566
|
+
btn.addEventListener('click', (e) => {
|
|
567
|
+
e.stopPropagation();
|
|
568
|
+
const format = e.target.dataset.format;
|
|
569
|
+
const reportId = e.target.dataset.reportId;
|
|
570
|
+
const menu = e.target.closest('.export-menu');
|
|
571
|
+
menu.classList.remove('show');
|
|
572
|
+
|
|
573
|
+
if (format === 'png') {
|
|
574
|
+
this.exportReportAsPNG(reportId);
|
|
575
|
+
} else if (format === 'pdf') {
|
|
576
|
+
this.exportReportAsPDF(reportId);
|
|
577
|
+
}
|
|
578
|
+
});
|
|
579
|
+
});
|
|
580
|
+
|
|
581
|
+
// Close export menus when clicking elsewhere
|
|
582
|
+
document.addEventListener('click', () => {
|
|
583
|
+
document.querySelectorAll('.export-menu.show').forEach(menu => {
|
|
584
|
+
menu.classList.remove('show');
|
|
585
|
+
});
|
|
586
|
+
});
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
generateReportCard(reportId, report) {
|
|
590
|
+
const hasExperiment = report.experiment !== undefined;
|
|
591
|
+
const hasEvaluation = report.evaluation !== undefined;
|
|
592
|
+
const hasTestData = report.testData !== undefined;
|
|
593
|
+
const hasGroundtruth = report.groundtruth !== undefined;
|
|
594
|
+
const hasAgentOutput = report.agentOutput !== undefined;
|
|
595
|
+
const hasConsolidatedEvaluation = report.consolidatedEvaluation !== undefined;
|
|
596
|
+
|
|
597
|
+
// Handle consolidated evaluation reports separately
|
|
598
|
+
if (hasConsolidatedEvaluation) {
|
|
599
|
+
return this.generateConsolidatedReportCard(reportId, report.consolidatedEvaluation, report.filename);
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Handle agent outputs separately
|
|
603
|
+
if (hasAgentOutput) {
|
|
604
|
+
return this.generateAgentOutputReportCard(reportId, report.agentOutput, report.filename);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
let title = reportId;
|
|
608
|
+
let subtitle = '';
|
|
609
|
+
let fullPath = report.filename || reportId; // Use filename if available, otherwise reportId
|
|
610
|
+
|
|
611
|
+
if (hasGroundtruth) {
|
|
612
|
+
const gtFile = report.filename;
|
|
613
|
+
title = gtFile.replace(/\.(summarization|qa)\.groundtruth\.json$/, '').replace(/\//g, '/');
|
|
614
|
+
subtitle = 'Groundtruth';
|
|
615
|
+
if (gtFile.includes('consolidated')) {
|
|
616
|
+
subtitle += ' [Consolidated]';
|
|
617
|
+
}
|
|
618
|
+
} else if (hasTestData) {
|
|
619
|
+
title = `${report.testData.type}/${report.testData.filename.replace('.txt', '')}`;
|
|
620
|
+
subtitle = 'Test Data';
|
|
621
|
+
fullPath = `${report.testData.type}/${report.testData.filename}`; // Full path for test data
|
|
622
|
+
} else if (hasExperiment && hasEvaluation) {
|
|
623
|
+
subtitle = 'Experiment + Evaluation';
|
|
624
|
+
} else if (hasExperiment) {
|
|
625
|
+
subtitle = 'Experiment Only';
|
|
626
|
+
} else if (hasEvaluation) {
|
|
627
|
+
subtitle = 'Evaluation Only';
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
return `
|
|
631
|
+
<div class="report-card" data-report-id="${reportId}">
|
|
632
|
+
<div class="report-header">
|
|
633
|
+
<h3 title="${fullPath}">${title}</h3>
|
|
634
|
+
<div class="meta">${subtitle}</div>
|
|
635
|
+
<div class="report-actions">
|
|
636
|
+
<div class="export-dropdown">
|
|
637
|
+
<button class="export-btn" title="Export report">
|
|
638
|
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
|
639
|
+
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
|
640
|
+
<polyline points="7 10 12 15 17 10"/>
|
|
641
|
+
<line x1="12" y1="15" x2="12" y2="3"/>
|
|
642
|
+
</svg>
|
|
643
|
+
</button>
|
|
644
|
+
<div class="export-menu">
|
|
645
|
+
<button class="export-option" data-format="png" data-report-id="${reportId}">📷 Export as PNG</button>
|
|
646
|
+
<button class="export-option" data-format="pdf" data-report-id="${reportId}">📄 Export as PDF</button>
|
|
647
|
+
</div>
|
|
648
|
+
</div>
|
|
649
|
+
<button class="report-close" data-report-id="${reportId}">×</button>
|
|
650
|
+
</div>
|
|
651
|
+
</div>
|
|
652
|
+
<div class="report-content">
|
|
653
|
+
${hasGroundtruth ? this.generateGroundtruthSection(report.groundtruth) :
|
|
654
|
+
hasTestData ? this.generateTestDataSection(report.testData) : this.generateMetricsSection(report)}
|
|
655
|
+
${hasEvaluation ? this.generateEvaluationSummary(report.evaluation) : ''}
|
|
656
|
+
${hasEvaluation ? this.generateQualitySection(report.evaluation) : ''}
|
|
657
|
+
${this.generateCostBreakdownSection(report)}
|
|
658
|
+
${this.generateTimingSection(report)}
|
|
659
|
+
${hasExperiment ? this.generateExperimentDetails(report.experiment) : ''}
|
|
660
|
+
${hasExperiment ? this.generateExperimentSummaries(report.experiment) : ''}
|
|
661
|
+
</div>
|
|
662
|
+
</div>
|
|
663
|
+
`;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
generateAgentOutputReportCard(reportId, agentData, filename) {
|
|
667
|
+
const metadata = agentData.metadata || {};
|
|
668
|
+
const conversation = agentData.conversation || [];
|
|
669
|
+
const systemPrompt = agentData.system_prompt || '';
|
|
670
|
+
const systemPromptTokens = agentData.system_prompt_tokens || null;
|
|
671
|
+
|
|
672
|
+
// Extract performance metrics from conversation
|
|
673
|
+
const performanceStats = [];
|
|
674
|
+
let totalInputTokens = 0;
|
|
675
|
+
let totalOutputTokens = 0;
|
|
676
|
+
let avgTokensPerSecond = 0;
|
|
677
|
+
let avgTimeToFirstToken = 0;
|
|
678
|
+
let stepCount = 0;
|
|
679
|
+
|
|
680
|
+
conversation.forEach(msg => {
|
|
681
|
+
if (msg.role === 'system' && msg.content?.type === 'stats' && msg.content.performance_stats) {
|
|
682
|
+
const stats = msg.content.performance_stats;
|
|
683
|
+
performanceStats.push(stats);
|
|
684
|
+
totalInputTokens += stats.input_tokens || 0;
|
|
685
|
+
totalOutputTokens += stats.output_tokens || 0;
|
|
686
|
+
if (stats.tokens_per_second) avgTokensPerSecond += stats.tokens_per_second;
|
|
687
|
+
if (stats.time_to_first_token) avgTimeToFirstToken += stats.time_to_first_token;
|
|
688
|
+
stepCount++;
|
|
689
|
+
}
|
|
690
|
+
});
|
|
691
|
+
|
|
692
|
+
if (stepCount > 0) {
|
|
693
|
+
avgTokensPerSecond /= stepCount;
|
|
694
|
+
avgTimeToFirstToken /= stepCount;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Extract tool calls
|
|
698
|
+
const toolCalls = [];
|
|
699
|
+
conversation.forEach(msg => {
|
|
700
|
+
if (msg.role === 'assistant' && msg.content?.tool) {
|
|
701
|
+
toolCalls.push({
|
|
702
|
+
tool: msg.content.tool,
|
|
703
|
+
args: msg.content.tool_args,
|
|
704
|
+
thought: msg.content.thought,
|
|
705
|
+
goal: msg.content.goal
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
// Generate summary
|
|
711
|
+
const summary = {
|
|
712
|
+
status: agentData.status || 'unknown',
|
|
713
|
+
result: agentData.result || 'N/A',
|
|
714
|
+
steps_taken: agentData.steps_taken || 0,
|
|
715
|
+
error_count: agentData.error_count || 0,
|
|
716
|
+
conversation_length: conversation.length,
|
|
717
|
+
tool_calls_count: toolCalls.length,
|
|
718
|
+
has_performance_stats: performanceStats.length > 0
|
|
719
|
+
};
|
|
720
|
+
|
|
721
|
+
const displayName = filename?.replace('agent_output_', '').replace('.json', '') || reportId;
|
|
722
|
+
|
|
723
|
+
return `
|
|
724
|
+
<div class="report-card agent-output" data-report-id="${reportId}">
|
|
725
|
+
<div class="report-header">
|
|
726
|
+
<h3 title="${filename || 'N/A'}">${displayName}</h3>
|
|
727
|
+
<div class="meta">Agent Output Analysis</div>
|
|
728
|
+
<div class="report-actions">
|
|
729
|
+
<div class="export-dropdown">
|
|
730
|
+
<button class="export-btn" title="Export report">
|
|
731
|
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
|
732
|
+
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
|
733
|
+
<polyline points="7 10 12 15 17 10"/>
|
|
734
|
+
<line x1="12" y1="15" x2="12" y2="3"/>
|
|
735
|
+
</svg>
|
|
736
|
+
</button>
|
|
737
|
+
<div class="export-menu">
|
|
738
|
+
<button class="export-option" data-format="png" data-report-id="${reportId}">📷 Export as PNG</button>
|
|
739
|
+
<button class="export-option" data-format="pdf" data-report-id="${reportId}">📄 Export as PDF</button>
|
|
740
|
+
</div>
|
|
741
|
+
</div>
|
|
742
|
+
<button class="report-close" data-report-id="${reportId}">×</button>
|
|
743
|
+
</div>
|
|
744
|
+
</div>
|
|
745
|
+
<div class="report-content">
|
|
746
|
+
${this.generateAgentSummarySection(summary)}
|
|
747
|
+
${this.generateConversationFlowSection(conversation)}
|
|
748
|
+
${this.generatePerformanceMetricsSection(performanceStats, totalInputTokens, totalOutputTokens, avgTokensPerSecond, avgTimeToFirstToken)}
|
|
749
|
+
${this.generateToolExecutionSection(toolCalls)}
|
|
750
|
+
${this.generateSystemPromptSection(systemPrompt, systemPromptTokens)}
|
|
751
|
+
</div>
|
|
752
|
+
</div>
|
|
753
|
+
`;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
generateMetricsSection(report) {
|
|
757
|
+
const metrics = [];
|
|
758
|
+
|
|
759
|
+
if (report.experiment) {
|
|
760
|
+
const exp = report.experiment;
|
|
761
|
+
const isLocal = exp.metadata.inference_type === 'local' || exp.metadata.llm_type?.toLowerCase() === 'lemonade';
|
|
762
|
+
const totalCost = exp.metadata.total_cost?.total_cost || 0;
|
|
763
|
+
|
|
764
|
+
// Add inference type indicator
|
|
765
|
+
metrics.push({
|
|
766
|
+
label: '<span data-tooltip="Whether the model runs locally or remotely">Inference</span>',
|
|
767
|
+
value: isLocal ?
|
|
768
|
+
'<span style="color: #28a745; font-weight: bold;" data-tooltip="Running on your local machine">🖥️ Local</span>' :
|
|
769
|
+
'<span style="color: #007bff; font-weight: bold;" data-tooltip="Running on cloud servers">☁️ Cloud</span>'
|
|
770
|
+
});
|
|
771
|
+
|
|
772
|
+
// Show cost with special formatting for local (free) inference
|
|
773
|
+
metrics.push({
|
|
774
|
+
label: '<span data-tooltip="Cost of generating summaries (not evaluation cost)">Total Cost</span>',
|
|
775
|
+
value: isLocal ?
|
|
776
|
+
'<span style="color: #28a745; font-weight: bold;" data-tooltip="No cost for local models">FREE</span>' :
|
|
777
|
+
`<span data-tooltip="API usage cost">$${totalCost.toFixed(4)}</span>`
|
|
778
|
+
});
|
|
779
|
+
|
|
780
|
+
metrics.push(
|
|
781
|
+
{ label: '<span data-tooltip="Total tokens processed (input + output)">Total Tokens</span>', value: exp.metadata.total_usage?.total_tokens?.toLocaleString() || 'N/A' },
|
|
782
|
+
{ label: '<span data-tooltip="Number of test cases processed">Items</span>', value: exp.metadata.total_items || 0 }
|
|
783
|
+
);
|
|
784
|
+
|
|
785
|
+
// Add experiment timing metrics
|
|
786
|
+
if (exp.metadata.timing) {
|
|
787
|
+
const timing = exp.metadata.timing;
|
|
788
|
+
if (timing.total_experiment_time_seconds) {
|
|
789
|
+
metrics.push({ label: 'Total Time', value: this.formatTime(timing.total_experiment_time_seconds) });
|
|
790
|
+
}
|
|
791
|
+
if (timing.average_per_item_seconds) {
|
|
792
|
+
metrics.push({ label: 'Avg/Item', value: this.formatTime(timing.average_per_item_seconds) });
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
if (report.evaluation) {
|
|
798
|
+
const evalData = report.evaluation;
|
|
799
|
+
const metrics_data = evalData.overall_rating?.metrics;
|
|
800
|
+
if (metrics_data) {
|
|
801
|
+
// Check if this is a Q&A evaluation (has accuracy_percentage) or summarization (has quality_score)
|
|
802
|
+
if (metrics_data.accuracy_percentage !== undefined) {
|
|
803
|
+
// Q&A evaluation metrics
|
|
804
|
+
metrics.push(
|
|
805
|
+
{ label: 'Accuracy', value: `${metrics_data.accuracy_percentage}%` },
|
|
806
|
+
{ label: 'Pass Rate', value: `${(metrics_data.pass_rate * 100).toFixed(1)}%` },
|
|
807
|
+
{ label: 'Questions', value: metrics_data.num_questions || 0 },
|
|
808
|
+
{ label: 'Passed', value: metrics_data.num_passed || 0 },
|
|
809
|
+
{ label: 'Failed', value: metrics_data.num_failed || 0 }
|
|
810
|
+
);
|
|
811
|
+
} else if (metrics_data.quality_score !== undefined) {
|
|
812
|
+
// Summarization evaluation metrics
|
|
813
|
+
metrics.push(
|
|
814
|
+
{ label: 'Grade', value: this.formatQualityScore(metrics_data.quality_score) },
|
|
815
|
+
{ label: 'Excellent', value: metrics_data.excellent_count || 0 },
|
|
816
|
+
{ label: 'Good', value: metrics_data.good_count || 0 },
|
|
817
|
+
{ label: 'Fair', value: metrics_data.fair_count || 0 },
|
|
818
|
+
{ label: 'Poor', value: metrics_data.poor_count || 0 }
|
|
819
|
+
);
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// Add evaluation cost and usage metrics
|
|
824
|
+
if (evalData.total_cost) {
|
|
825
|
+
metrics.push(
|
|
826
|
+
{ label: 'Eval Cost', value: `$${evalData.total_cost.total_cost?.toFixed(4) || 'N/A'}` }
|
|
827
|
+
);
|
|
828
|
+
}
|
|
829
|
+
if (evalData.total_usage) {
|
|
830
|
+
metrics.push(
|
|
831
|
+
{ label: 'Eval Tokens', value: evalData.total_usage.total_tokens?.toLocaleString() || 'N/A' }
|
|
832
|
+
);
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
// Add evaluation timing metrics
|
|
836
|
+
if (evalData.timing) {
|
|
837
|
+
const timing = evalData.timing;
|
|
838
|
+
if (timing.total_processing_time_seconds) {
|
|
839
|
+
metrics.push({ label: 'Eval Time', value: this.formatTime(timing.total_processing_time_seconds) });
|
|
840
|
+
}
|
|
841
|
+
if (timing.average_per_question_seconds) {
|
|
842
|
+
metrics.push({ label: 'Avg/Q', value: this.formatTime(timing.average_per_question_seconds) });
|
|
843
|
+
} else if (timing.average_per_summary_seconds) {
|
|
844
|
+
metrics.push({ label: 'Avg/Summary', value: this.formatTime(timing.average_per_summary_seconds) });
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
// Add report generation time if available
|
|
849
|
+
if (evalData.metadata?.report_generation_time_seconds) {
|
|
850
|
+
metrics.push({ label: 'Report Gen', value: this.formatTime(evalData.metadata.report_generation_time_seconds) });
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
if (metrics.length === 0) {
|
|
855
|
+
return '<p>No metrics available</p>';
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
return `
|
|
859
|
+
<div class="metrics-grid">
|
|
860
|
+
${metrics.map(metric => `
|
|
861
|
+
<div class="metric-card">
|
|
862
|
+
<div class="metric-value">${metric.value}</div>
|
|
863
|
+
<div class="metric-label">${metric.label}</div>
|
|
864
|
+
</div>
|
|
865
|
+
`).join('')}
|
|
866
|
+
</div>
|
|
867
|
+
`;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
formatQualityScore(score) {
|
|
871
|
+
// Handle both old (1-4 scale) and new (0-100 percentage) formats
|
|
872
|
+
let percentage;
|
|
873
|
+
if (score <= 4) {
|
|
874
|
+
// Old format: convert from 1-4 scale to percentage
|
|
875
|
+
percentage = ((score - 1) / 3) * 100;
|
|
876
|
+
} else {
|
|
877
|
+
// New format: already a percentage
|
|
878
|
+
percentage = score;
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
// Add qualitative label based on percentage ranges
|
|
882
|
+
let label, cssClass;
|
|
883
|
+
if (percentage >= 85) {
|
|
884
|
+
label = 'Excellent';
|
|
885
|
+
cssClass = 'quality-excellent';
|
|
886
|
+
} else if (percentage >= 67) {
|
|
887
|
+
label = 'Good';
|
|
888
|
+
cssClass = 'quality-good';
|
|
889
|
+
} else if (percentage >= 34) {
|
|
890
|
+
label = 'Fair';
|
|
891
|
+
cssClass = 'quality-fair';
|
|
892
|
+
} else {
|
|
893
|
+
label = 'Poor';
|
|
894
|
+
cssClass = 'quality-poor';
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
return `${percentage.toFixed(1)}% <span class="${cssClass}">${label}</span>`;
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
formatTime(seconds) {
|
|
901
|
+
// Format time values with appropriate precision
|
|
902
|
+
if (seconds === undefined || seconds === null) {
|
|
903
|
+
return 'N/A';
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
// For very small values, show more precision
|
|
907
|
+
if (seconds < 1) {
|
|
908
|
+
return `${(seconds * 1000).toFixed(0)}ms`;
|
|
909
|
+
} else if (seconds < 10) {
|
|
910
|
+
return `${seconds.toFixed(2)}s`;
|
|
911
|
+
} else if (seconds < 60) {
|
|
912
|
+
return `${seconds.toFixed(1)}s`;
|
|
913
|
+
} else {
|
|
914
|
+
// Convert to minutes:seconds for longer durations
|
|
915
|
+
const minutes = Math.floor(seconds / 60);
|
|
916
|
+
const remainingSeconds = (seconds % 60).toFixed(0);
|
|
917
|
+
return `${minutes}m ${remainingSeconds}s`;
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
generateCostBreakdownSection(report) {
|
|
922
|
+
if (!report.experiment) return '';
|
|
923
|
+
|
|
924
|
+
const exp = report.experiment;
|
|
925
|
+
const isLocal = exp.metadata.inference_type === 'local' || exp.metadata.llm_type?.toLowerCase() === 'lemonade';
|
|
926
|
+
const totalCost = exp.metadata.total_cost?.total_cost || 0;
|
|
927
|
+
const totalItems = exp.metadata.total_items || 1;
|
|
928
|
+
const costPerItem = totalCost / totalItems;
|
|
929
|
+
|
|
930
|
+
// Don't show cost breakdown for local inference since it's free
|
|
931
|
+
if (isLocal) {
|
|
932
|
+
return `
|
|
933
|
+
<div class="cost-breakdown-section">
|
|
934
|
+
<div class="cost-banner-free">
|
|
935
|
+
<div class="cost-banner-icon">🎉</div>
|
|
936
|
+
<div class="cost-banner-text">
|
|
937
|
+
<div class="cost-banner-title">Local Inference - No Cost!</div>
|
|
938
|
+
<div class="cost-banner-subtitle">Running on your hardware with Lemonade</div>
|
|
939
|
+
</div>
|
|
940
|
+
</div>
|
|
941
|
+
</div>
|
|
942
|
+
`;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
return `
|
|
946
|
+
<div class="cost-breakdown-section">
|
|
947
|
+
<h4>💰 Cost Breakdown</h4>
|
|
948
|
+
<div class="cost-grid">
|
|
949
|
+
<div class="cost-card">
|
|
950
|
+
<div class="cost-value">$${totalCost.toFixed(4)}</div>
|
|
951
|
+
<div class="cost-label">Total Cost</div>
|
|
952
|
+
</div>
|
|
953
|
+
<div class="cost-card">
|
|
954
|
+
<div class="cost-value">$${costPerItem.toFixed(5)}</div>
|
|
955
|
+
<div class="cost-label">Per Item</div>
|
|
956
|
+
</div>
|
|
957
|
+
<div class="cost-card">
|
|
958
|
+
<div class="cost-value">$${(exp.metadata.total_cost?.input_cost || 0).toFixed(4)}</div>
|
|
959
|
+
<div class="cost-label">Input Tokens</div>
|
|
960
|
+
</div>
|
|
961
|
+
<div class="cost-card">
|
|
962
|
+
<div class="cost-value">$${(exp.metadata.total_cost?.output_cost || 0).toFixed(4)}</div>
|
|
963
|
+
<div class="cost-label">Output Tokens</div>
|
|
964
|
+
</div>
|
|
965
|
+
</div>
|
|
966
|
+
</div>
|
|
967
|
+
`;
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
generateTimingSection(report) {
|
|
971
|
+
const timingData = [];
|
|
972
|
+
let hasTimingData = false;
|
|
973
|
+
|
|
974
|
+
// Collect experiment timing data
|
|
975
|
+
if (report.experiment?.metadata?.timing) {
|
|
976
|
+
const timing = report.experiment.metadata.timing;
|
|
977
|
+
hasTimingData = true;
|
|
978
|
+
|
|
979
|
+
if (timing.total_experiment_time_seconds) {
|
|
980
|
+
timingData.push({
|
|
981
|
+
label: 'Experiment Execution',
|
|
982
|
+
total: timing.total_experiment_time_seconds,
|
|
983
|
+
average: timing.average_per_item_seconds,
|
|
984
|
+
min: timing.min_per_item_seconds,
|
|
985
|
+
max: timing.max_per_item_seconds,
|
|
986
|
+
count: timing.per_item_times_seconds?.length || 0,
|
|
987
|
+
type: 'items'
|
|
988
|
+
});
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
// Collect evaluation timing data
|
|
993
|
+
if (report.evaluation?.timing) {
|
|
994
|
+
const timing = report.evaluation.timing;
|
|
995
|
+
hasTimingData = true;
|
|
996
|
+
|
|
997
|
+
const type = timing.per_question_times_seconds ? 'questions' : 'summaries';
|
|
998
|
+
const avgKey = type === 'questions' ? 'average_per_question_seconds' : 'average_per_summary_seconds';
|
|
999
|
+
const minKey = type === 'questions' ? 'min_per_question_seconds' : 'min_per_summary_seconds';
|
|
1000
|
+
const maxKey = type === 'questions' ? 'max_per_question_seconds' : 'max_per_summary_seconds';
|
|
1001
|
+
const itemsKey = type === 'questions' ? 'per_question_times_seconds' : 'per_summary_times_seconds';
|
|
1002
|
+
|
|
1003
|
+
timingData.push({
|
|
1004
|
+
label: 'Evaluation Analysis',
|
|
1005
|
+
total: timing.total_processing_time_seconds,
|
|
1006
|
+
average: timing[avgKey],
|
|
1007
|
+
min: timing[minKey],
|
|
1008
|
+
max: timing[maxKey],
|
|
1009
|
+
count: timing[itemsKey]?.length || 0,
|
|
1010
|
+
type: type
|
|
1011
|
+
});
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
// Add report generation time if available
|
|
1015
|
+
if (report.evaluation?.metadata?.report_generation_time_seconds) {
|
|
1016
|
+
hasTimingData = true;
|
|
1017
|
+
timingData.push({
|
|
1018
|
+
label: 'Report Generation',
|
|
1019
|
+
total: report.evaluation.metadata.report_generation_time_seconds,
|
|
1020
|
+
type: 'single'
|
|
1021
|
+
});
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
if (!hasTimingData) {
|
|
1025
|
+
return '';
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
return `
|
|
1029
|
+
<div class="collapsible-section">
|
|
1030
|
+
<div class="collapsible-header">
|
|
1031
|
+
<h4>⏱️ Performance Timing</h4>
|
|
1032
|
+
<span class="collapsible-toggle">▶</span>
|
|
1033
|
+
</div>
|
|
1034
|
+
<div class="collapsible-content">
|
|
1035
|
+
<div class="collapsible-body">
|
|
1036
|
+
<div class="timing-grid">
|
|
1037
|
+
${timingData.map(item => `
|
|
1038
|
+
<div class="timing-card">
|
|
1039
|
+
<div class="timing-header">${item.label}</div>
|
|
1040
|
+
<div class="timing-metrics">
|
|
1041
|
+
<div class="timing-stat">
|
|
1042
|
+
<span class="timing-value">${this.formatTime(item.total)}</span>
|
|
1043
|
+
<span class="timing-label">Total</span>
|
|
1044
|
+
</div>
|
|
1045
|
+
${item.type !== 'single' ? `
|
|
1046
|
+
<div class="timing-stat">
|
|
1047
|
+
<span class="timing-value">${this.formatTime(item.average)}</span>
|
|
1048
|
+
<span class="timing-label">Average</span>
|
|
1049
|
+
</div>
|
|
1050
|
+
<div class="timing-stat">
|
|
1051
|
+
<span class="timing-value">${this.formatTime(item.min)}</span>
|
|
1052
|
+
<span class="timing-label">Min</span>
|
|
1053
|
+
</div>
|
|
1054
|
+
<div class="timing-stat">
|
|
1055
|
+
<span class="timing-value">${this.formatTime(item.max)}</span>
|
|
1056
|
+
<span class="timing-label">Max</span>
|
|
1057
|
+
</div>
|
|
1058
|
+
<div class="timing-stat">
|
|
1059
|
+
<span class="timing-value">${item.count}</span>
|
|
1060
|
+
<span class="timing-label">${item.type.charAt(0).toUpperCase() + item.type.slice(1)}</span>
|
|
1061
|
+
</div>
|
|
1062
|
+
` : ''}
|
|
1063
|
+
</div>
|
|
1064
|
+
</div>
|
|
1065
|
+
`).join('')}
|
|
1066
|
+
</div>
|
|
1067
|
+
</div>
|
|
1068
|
+
</div>
|
|
1069
|
+
</div>
|
|
1070
|
+
`;
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
generateEvaluationSummary(evaluation) {
|
|
1074
|
+
if (!evaluation) return '';
|
|
1075
|
+
|
|
1076
|
+
const hasOverallAnalysis = evaluation.overall_analysis;
|
|
1077
|
+
const hasStrengths = evaluation.strengths && evaluation.strengths.length > 0;
|
|
1078
|
+
const hasWeaknesses = evaluation.weaknesses && evaluation.weaknesses.length > 0;
|
|
1079
|
+
const hasRecommendations = evaluation.recommendations && evaluation.recommendations.length > 0;
|
|
1080
|
+
const hasUseCaseFit = evaluation.use_case_fit;
|
|
1081
|
+
|
|
1082
|
+
if (!hasOverallAnalysis && !hasStrengths && !hasWeaknesses && !hasRecommendations && !hasUseCaseFit) {
|
|
1083
|
+
return '';
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
return `
|
|
1087
|
+
<div class="evaluation-summary">
|
|
1088
|
+
<h4>Evaluation Summary</h4>
|
|
1089
|
+
${hasOverallAnalysis ? `
|
|
1090
|
+
<div class="summary-item">
|
|
1091
|
+
<div class="summary-label">Overall Analysis</div>
|
|
1092
|
+
<div class="summary-text">${this.escapeHtml(evaluation.overall_analysis)}</div>
|
|
1093
|
+
</div>
|
|
1094
|
+
` : ''}
|
|
1095
|
+
${hasStrengths ? `
|
|
1096
|
+
<div class="summary-item">
|
|
1097
|
+
<div class="summary-label">Strengths</div>
|
|
1098
|
+
<ul class="summary-list">
|
|
1099
|
+
${evaluation.strengths.map(strength => `<li>${this.escapeHtml(strength)}</li>`).join('')}
|
|
1100
|
+
</ul>
|
|
1101
|
+
</div>
|
|
1102
|
+
` : ''}
|
|
1103
|
+
${hasWeaknesses ? `
|
|
1104
|
+
<div class="summary-item">
|
|
1105
|
+
<div class="summary-label">Weaknesses</div>
|
|
1106
|
+
<ul class="summary-list">
|
|
1107
|
+
${evaluation.weaknesses.map(weakness => `<li>${this.escapeHtml(weakness)}</li>`).join('')}
|
|
1108
|
+
</ul>
|
|
1109
|
+
</div>
|
|
1110
|
+
` : ''}
|
|
1111
|
+
${hasRecommendations ? `
|
|
1112
|
+
<div class="summary-item">
|
|
1113
|
+
<div class="summary-label">Recommendations</div>
|
|
1114
|
+
<ul class="summary-list">
|
|
1115
|
+
${evaluation.recommendations.map(rec => `<li>${this.escapeHtml(rec)}</li>`).join('')}
|
|
1116
|
+
</ul>
|
|
1117
|
+
</div>
|
|
1118
|
+
` : ''}
|
|
1119
|
+
${hasUseCaseFit ? `
|
|
1120
|
+
<div class="summary-item">
|
|
1121
|
+
<div class="summary-label">Use Case Fit</div>
|
|
1122
|
+
<div class="summary-text">${this.escapeHtml(evaluation.use_case_fit)}</div>
|
|
1123
|
+
</div>
|
|
1124
|
+
` : ''}
|
|
1125
|
+
</div>
|
|
1126
|
+
`;
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
generateQualitySection(evaluation) {
|
|
1130
|
+
if (!evaluation.per_question || evaluation.per_question.length === 0) {
|
|
1131
|
+
return '';
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// Show overall quality score first
|
|
1135
|
+
let overallSection = '';
|
|
1136
|
+
if (evaluation.overall_rating) {
|
|
1137
|
+
const rating = evaluation.overall_rating;
|
|
1138
|
+
const metrics = rating.metrics || {};
|
|
1139
|
+
|
|
1140
|
+
overallSection = `
|
|
1141
|
+
<div class="quality-overview">
|
|
1142
|
+
<h4>📊 Overall Quality Assessment <span class="info-icon" data-tooltip="Summary of evaluation results across all test cases">?</span></h4>
|
|
1143
|
+
<div class="quality-score-card">
|
|
1144
|
+
<div class="quality-score-main">
|
|
1145
|
+
<span class="quality-score-value" data-tooltip="Weighted average score: (Excellent×4 + Good×3 + Fair×2 + Poor×1) normalized to 0-100%">${metrics.quality_score ? Math.round(metrics.quality_score) : 'N/A'}%</span>
|
|
1146
|
+
<span class="quality-score-rating rating-${rating.rating}" data-tooltip="Overall rating: Excellent (≥70% excellent), Good (≥70% good+), Fair (≥70% fair+), or Poor">${rating.rating.toUpperCase()}</span>
|
|
1147
|
+
</div>
|
|
1148
|
+
<div class="quality-distribution">
|
|
1149
|
+
<div class="quality-counts">
|
|
1150
|
+
<span class="count-item excellent" data-tooltip="Summaries with excellent quality: comprehensive, accurate, and well-structured">Excellent: ${metrics.excellent_count || 0}</span>
|
|
1151
|
+
<span class="count-item good" data-tooltip="Summaries with good quality: mostly accurate with minor issues">Good: ${metrics.good_count || 0}</span>
|
|
1152
|
+
<span class="count-item fair" data-tooltip="Summaries with fair quality: acceptable but missing key details">Fair: ${metrics.fair_count || 0}</span>
|
|
1153
|
+
<span class="count-item poor" data-tooltip="Summaries with poor quality: significant errors or omissions">Poor: ${metrics.poor_count || 0}</span>
|
|
1154
|
+
</div>
|
|
1155
|
+
<div class="quality-explanation">${this.escapeHtml(rating.explanation || '')}</div>
|
|
1156
|
+
</div>
|
|
1157
|
+
</div>
|
|
1158
|
+
</div>
|
|
1159
|
+
`;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// Show detailed analysis for each item
|
|
1163
|
+
let detailsSection = '';
|
|
1164
|
+
evaluation.per_question.forEach((item, index) => {
|
|
1165
|
+
const analysis = item.analysis;
|
|
1166
|
+
if (!analysis) return;
|
|
1167
|
+
|
|
1168
|
+
const sourceFile = item.source_file ? item.source_file.split('\\').pop().split('/').pop() : `Item ${index + 1}`;
|
|
1169
|
+
const fullSourcePath = item.source_file || null;
|
|
1170
|
+
|
|
1171
|
+
// Support both old and new field names from evaluation structure
|
|
1172
|
+
// New format (from Claude evaluations): uses "accuracy" terminology
|
|
1173
|
+
// Old format (legacy): uses "quality" terminology
|
|
1174
|
+
// This ensures backward compatibility while supporting the latest evaluation format
|
|
1175
|
+
const qualityItems = [
|
|
1176
|
+
// New field names (accuracy-based) - Current evaluation format
|
|
1177
|
+
{ key: 'executive_summary_accuracy', label: 'Executive Summary Accuracy', data: analysis.executive_summary_accuracy },
|
|
1178
|
+
{ key: 'completeness', label: 'Completeness', data: analysis.completeness },
|
|
1179
|
+
{ key: 'action_items_accuracy', label: 'Action Items Accuracy', data: analysis.action_items_accuracy },
|
|
1180
|
+
{ key: 'key_decisions_accuracy', label: 'Key Decisions Accuracy', data: analysis.key_decisions_accuracy },
|
|
1181
|
+
{ key: 'participant_identification', label: 'Participant Identification', data: analysis.participant_identification },
|
|
1182
|
+
{ key: 'topic_coverage', label: 'Topic Coverage', data: analysis.topic_coverage },
|
|
1183
|
+
// Old field names (quality-based) for backward compatibility
|
|
1184
|
+
{ key: 'executive_summary_quality', label: 'Executive Summary Quality', data: analysis.executive_summary_quality },
|
|
1185
|
+
{ key: 'detail_completeness', label: 'Detail Completeness', data: analysis.detail_completeness },
|
|
1186
|
+
{ key: 'action_items_structure', label: 'Action Items Structure', data: analysis.action_items_structure },
|
|
1187
|
+
{ key: 'key_decisions_clarity', label: 'Key Decisions Clarity', data: analysis.key_decisions_clarity },
|
|
1188
|
+
{ key: 'participant_information', label: 'Participant Information', data: analysis.participant_information },
|
|
1189
|
+
{ key: 'topic_organization', label: 'Topic Organization', data: analysis.topic_organization }
|
|
1190
|
+
].filter(item => item.data && item.data.rating);
|
|
1191
|
+
|
|
1192
|
+
if (qualityItems.length > 0) {
|
|
1193
|
+
detailsSection += `
|
|
1194
|
+
<div class="quality-details">
|
|
1195
|
+
<div class="collapsible-section" data-section="quality-${index}">
|
|
1196
|
+
<div class="collapsible-header">
|
|
1197
|
+
<h5>🎯 Detailed Analysis - ${sourceFile}</h5>
|
|
1198
|
+
${fullSourcePath ? `<button class="view-source-btn" data-source-path="${fullSourcePath}" title="View source file: ${fullSourcePath}">📄 View Source</button>` : ''}
|
|
1199
|
+
<span class="collapsible-toggle">▶</span>
|
|
1200
|
+
</div>
|
|
1201
|
+
<div class="collapsible-content">
|
|
1202
|
+
<div class="collapsible-body">
|
|
1203
|
+
<div class="quality-grid">
|
|
1204
|
+
${qualityItems.map(item => `
|
|
1205
|
+
<div class="quality-detail-card expanded">
|
|
1206
|
+
<div class="quality-detail-header">
|
|
1207
|
+
<span class="quality-detail-label">${item.label}</span>
|
|
1208
|
+
<span class="quality-rating rating-${item.data.rating}">${item.data.rating}</span>
|
|
1209
|
+
</div>
|
|
1210
|
+
<div class="quality-detail-explanation full">
|
|
1211
|
+
${this.escapeHtml(item.data.explanation || 'No explanation provided')}
|
|
1212
|
+
</div>
|
|
1213
|
+
</div>
|
|
1214
|
+
`).join('')}
|
|
1215
|
+
</div>
|
|
1216
|
+
<div class="overall-item-rating">
|
|
1217
|
+
Overall Item Quality: <span class="quality-rating rating-${item.overall_quality || analysis.overall_quality}">${(item.overall_quality || analysis.overall_quality || 'N/A').toUpperCase()}</span>
|
|
1218
|
+
</div>
|
|
1219
|
+
</div>
|
|
1220
|
+
</div>
|
|
1221
|
+
</div>
|
|
1222
|
+
</div>
|
|
1223
|
+
`;
|
|
1224
|
+
}
|
|
1225
|
+
});
|
|
1226
|
+
|
|
1227
|
+
return `
|
|
1228
|
+
<div class="quality-section">
|
|
1229
|
+
${overallSection}
|
|
1230
|
+
${detailsSection}
|
|
1231
|
+
</div>
|
|
1232
|
+
`;
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
generateExperimentDetails(experiment) {
|
|
1236
|
+
const metadata = experiment.metadata;
|
|
1237
|
+
const isLocal = metadata.inference_type === 'local' || metadata.llm_type?.toLowerCase() === 'lemonade';
|
|
1238
|
+
|
|
1239
|
+
return `
|
|
1240
|
+
<div class="experiment-details">
|
|
1241
|
+
<h4>Experiment Details</h4>
|
|
1242
|
+
<div class="detail-grid">
|
|
1243
|
+
<div><strong>Tested Model:</strong> ${metadata.tested_model || metadata.model || 'N/A'}</div>
|
|
1244
|
+
<div><strong>Inference Type:</strong> ${isLocal ?
|
|
1245
|
+
'<span style="color: #28a745;">🖥️ Local (Free)</span>' :
|
|
1246
|
+
'<span style="color: #007bff;">☁️ Cloud (Paid)</span>'}</div>
|
|
1247
|
+
<div><strong>Temperature:</strong> ${metadata.temperature || 'N/A'}</div>
|
|
1248
|
+
<div><strong>Max Tokens:</strong> ${metadata.max_tokens || 'N/A'}</div>
|
|
1249
|
+
<div><strong>Date:</strong> ${metadata.timestamp || 'N/A'}</div>
|
|
1250
|
+
${metadata.errors && metadata.errors.length > 0 ?
|
|
1251
|
+
`<div style="color: #dc3545;"><strong>Errors:</strong> ${metadata.errors.length}</div>` :
|
|
1252
|
+
''}
|
|
1253
|
+
</div>
|
|
1254
|
+
</div>
|
|
1255
|
+
`;
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
generateExperimentSummaries(experiment) {
|
|
1259
|
+
if (!experiment.analysis) {
|
|
1260
|
+
return '';
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
let contentHtml = '';
|
|
1264
|
+
|
|
1265
|
+
// Handle Q&A results
|
|
1266
|
+
if (experiment.analysis.qa_results) {
|
|
1267
|
+
const qaResults = experiment.analysis.qa_results;
|
|
1268
|
+
if (qaResults.length > 0) {
|
|
1269
|
+
contentHtml += `
|
|
1270
|
+
<div class="collapsible-section" data-section="qa-results">
|
|
1271
|
+
<div class="collapsible-header">
|
|
1272
|
+
<h4>Q&A Results</h4>
|
|
1273
|
+
<span class="collapsible-toggle">▶</span>
|
|
1274
|
+
</div>
|
|
1275
|
+
<div class="collapsible-content">
|
|
1276
|
+
<div class="collapsible-body">
|
|
1277
|
+
${qaResults.map((qa, index) => `
|
|
1278
|
+
<div class="qa-item" style="margin-bottom: 20px; padding: 15px; border: 1px solid #e0e0e0; border-radius: 5px;">
|
|
1279
|
+
<div style="margin-bottom: 10px;">
|
|
1280
|
+
<strong>Question ${index + 1}:</strong>
|
|
1281
|
+
<div style="color: #333; margin-top: 5px;">${this.escapeHtml(qa.query)}</div>
|
|
1282
|
+
</div>
|
|
1283
|
+
<div style="margin-bottom: 10px;">
|
|
1284
|
+
<strong>Model Response:</strong>
|
|
1285
|
+
<div style="color: #444; margin-top: 5px; white-space: pre-wrap;">${this.escapeHtml(qa.response)}</div>
|
|
1286
|
+
</div>
|
|
1287
|
+
<div style="margin-bottom: 10px;">
|
|
1288
|
+
<strong>Ground Truth:</strong>
|
|
1289
|
+
<div style="color: #666; margin-top: 5px;">${this.escapeHtml(qa.ground_truth)}</div>
|
|
1290
|
+
</div>
|
|
1291
|
+
${qa.processing_time_seconds ? `
|
|
1292
|
+
<div style="color: #888; font-size: 0.9em;">
|
|
1293
|
+
<strong>Processing Time:</strong> ${qa.processing_time_seconds.toFixed(2)}s
|
|
1294
|
+
</div>
|
|
1295
|
+
` : ''}
|
|
1296
|
+
</div>
|
|
1297
|
+
`).join('')}
|
|
1298
|
+
</div>
|
|
1299
|
+
</div>
|
|
1300
|
+
</div>
|
|
1301
|
+
`;
|
|
1302
|
+
}
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
// Handle summarization results
|
|
1306
|
+
if (experiment.analysis.summarization_results) {
|
|
1307
|
+
const results = experiment.analysis.summarization_results;
|
|
1308
|
+
if (results.length > 0) {
|
|
1309
|
+
// Generate content for all summarization results
|
|
1310
|
+
results.forEach((result, index) => {
|
|
1311
|
+
if (result.generated_summaries) {
|
|
1312
|
+
const summaries = result.generated_summaries;
|
|
1313
|
+
const sourceFile = result.source_file ? result.source_file.split('\\').pop().split('/').pop() : `Item ${index + 1}`;
|
|
1314
|
+
|
|
1315
|
+
contentHtml += `
|
|
1316
|
+
<div class="collapsible-section" data-section="summaries-${index}">
|
|
1317
|
+
<div class="collapsible-header">
|
|
1318
|
+
<h4>Generated Summaries - ${sourceFile}</h4>
|
|
1319
|
+
<span class="collapsible-toggle">▶</span>
|
|
1320
|
+
</div>
|
|
1321
|
+
<div class="collapsible-content">
|
|
1322
|
+
<div class="collapsible-body">
|
|
1323
|
+
${Object.entries(summaries).map(([key, value]) => `
|
|
1324
|
+
<div class="summary-item">
|
|
1325
|
+
<div class="summary-label">${key.replace(/_/g, ' ').toUpperCase()}</div>
|
|
1326
|
+
<div class="summary-text">${this.escapeHtml(value)}</div>
|
|
1327
|
+
</div>
|
|
1328
|
+
`).join('')}
|
|
1329
|
+
</div>
|
|
1330
|
+
</div>
|
|
1331
|
+
</div>
|
|
1332
|
+
`;
|
|
1333
|
+
}
|
|
1334
|
+
});
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
return contentHtml;
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
generateEvaluationExplanations(evaluation) {
|
|
1342
|
+
if (!evaluation.per_question || evaluation.per_question.length === 0) {
|
|
1343
|
+
return '';
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
let explanationsHtml = '';
|
|
1347
|
+
evaluation.per_question.forEach((item, index) => {
|
|
1348
|
+
if (item.analysis) {
|
|
1349
|
+
const analysis = item.analysis;
|
|
1350
|
+
const sourceFile = item.source_file ? item.source_file.split('\\').pop().split('/').pop() : `Item ${index + 1}`;
|
|
1351
|
+
|
|
1352
|
+
// Use correct field names from actual evaluation structure
|
|
1353
|
+
const explanationItems = [
|
|
1354
|
+
{ key: 'executive_summary_quality', label: 'Executive Summary Quality' },
|
|
1355
|
+
{ key: 'detail_completeness', label: 'Detail Completeness' },
|
|
1356
|
+
{ key: 'action_items_structure', label: 'Action Items Structure' },
|
|
1357
|
+
{ key: 'key_decisions_clarity', label: 'Key Decisions Clarity' },
|
|
1358
|
+
{ key: 'participant_information', label: 'Participant Information' },
|
|
1359
|
+
{ key: 'topic_organization', label: 'Topic Organization' }
|
|
1360
|
+
].filter(item => analysis[item.key] && analysis[item.key].explanation);
|
|
1361
|
+
|
|
1362
|
+
if (explanationItems.length > 0) {
|
|
1363
|
+
explanationsHtml += `
|
|
1364
|
+
<div class="collapsible-section" data-section="explanations-${index}">
|
|
1365
|
+
<div class="collapsible-header">
|
|
1366
|
+
<h4>📝 Detailed Quality Explanations - ${sourceFile}</h4>
|
|
1367
|
+
<span class="collapsible-toggle">▶</span>
|
|
1368
|
+
</div>
|
|
1369
|
+
<div class="collapsible-content">
|
|
1370
|
+
<div class="collapsible-body">
|
|
1371
|
+
${explanationItems.map(item => {
|
|
1372
|
+
const data = analysis[item.key];
|
|
1373
|
+
return `
|
|
1374
|
+
<div class="explanation-item">
|
|
1375
|
+
<div class="explanation-label">${item.label}</div>
|
|
1376
|
+
<div class="explanation-rating">
|
|
1377
|
+
<span class="quality-rating rating-${data.rating}">${data.rating}</span>
|
|
1378
|
+
</div>
|
|
1379
|
+
<div class="explanation-text">${this.escapeHtml(data.explanation)}</div>
|
|
1380
|
+
</div>
|
|
1381
|
+
`;
|
|
1382
|
+
}).join('')}
|
|
1383
|
+
</div>
|
|
1384
|
+
</div>
|
|
1385
|
+
</div>
|
|
1386
|
+
`;
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
});
|
|
1390
|
+
|
|
1391
|
+
return explanationsHtml;
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1394
|
+
generateTestDataSection(testData) {
|
|
1395
|
+
const { content, metadata, type, filename, isPdf, message } = testData;
|
|
1396
|
+
|
|
1397
|
+
let metadataInfo = '';
|
|
1398
|
+
if (metadata) {
|
|
1399
|
+
const info = metadata.generation_info || {};
|
|
1400
|
+
const fileInfo = metadata[type === 'emails' ? 'emails' : 'transcripts']?.find(
|
|
1401
|
+
item => item.filename === filename
|
|
1402
|
+
);
|
|
1403
|
+
|
|
1404
|
+
metadataInfo = `
|
|
1405
|
+
<div class="metrics-grid">
|
|
1406
|
+
<div class="metric-card">
|
|
1407
|
+
<div class="metric-value">${type}</div>
|
|
1408
|
+
<div class="metric-label">Type</div>
|
|
1409
|
+
</div>
|
|
1410
|
+
<div class="metric-card">
|
|
1411
|
+
<div class="metric-value">${fileInfo?.estimated_tokens || 'N/A'}</div>
|
|
1412
|
+
<div class="metric-label">Est. Tokens</div>
|
|
1413
|
+
</div>
|
|
1414
|
+
<div class="metric-card">
|
|
1415
|
+
<div class="metric-value">$${fileInfo?.claude_cost?.total_cost?.toFixed(4) || 'N/A'}</div>
|
|
1416
|
+
<div class="metric-label">Generation Cost</div>
|
|
1417
|
+
</div>
|
|
1418
|
+
<div class="metric-card">
|
|
1419
|
+
<div class="metric-value">${info.claude_model || 'N/A'}</div>
|
|
1420
|
+
<div class="metric-label">Model</div>
|
|
1421
|
+
</div>
|
|
1422
|
+
</div>
|
|
1423
|
+
`;
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
return `
|
|
1427
|
+
${metadataInfo}
|
|
1428
|
+
<div class="collapsible-section">
|
|
1429
|
+
<div class="collapsible-header">
|
|
1430
|
+
<h4>Content</h4>
|
|
1431
|
+
<span class="collapsible-toggle">▶</span>
|
|
1432
|
+
</div>
|
|
1433
|
+
<div class="collapsible-content">
|
|
1434
|
+
<div class="collapsible-body">
|
|
1435
|
+
<div class="summary-item">
|
|
1436
|
+
<div class="summary-label">${filename}</div>
|
|
1437
|
+
<div class="summary-text">
|
|
1438
|
+
${isPdf ?
|
|
1439
|
+
`<div style="padding: 20px; background: #f0f0f0; border-radius: 5px; text-align: center;">
|
|
1440
|
+
<span style="font-size: 48px;">📄</span>
|
|
1441
|
+
<p style="margin-top: 10px; color: #666;">${message || 'PDF file - preview not available'}</p>
|
|
1442
|
+
<p style="color: #888; font-size: 0.9em;">Size: ${testData.size ? (testData.size / 1024).toFixed(2) + ' KB' : 'Unknown'}</p>
|
|
1443
|
+
</div>` :
|
|
1444
|
+
this.escapeHtml(content || '')
|
|
1445
|
+
}
|
|
1446
|
+
</div>
|
|
1447
|
+
</div>
|
|
1448
|
+
</div>
|
|
1449
|
+
</div>
|
|
1450
|
+
</div>
|
|
1451
|
+
${metadata ? this.generateTestDataMetadataSection(metadata, type) : ''}
|
|
1452
|
+
`;
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
generateTestDataMetadataSection(metadata, type) {
|
|
1456
|
+
const info = metadata.generation_info || {};
|
|
1457
|
+
const items = metadata[type === 'emails' ? 'emails' : 'transcripts'] || [];
|
|
1458
|
+
|
|
1459
|
+
return `
|
|
1460
|
+
<div class="collapsible-section">
|
|
1461
|
+
<div class="collapsible-header">
|
|
1462
|
+
<h4>Generation Metadata</h4>
|
|
1463
|
+
<span class="collapsible-toggle">▶</span>
|
|
1464
|
+
</div>
|
|
1465
|
+
<div class="collapsible-content">
|
|
1466
|
+
<div class="collapsible-body">
|
|
1467
|
+
<div class="detail-grid">
|
|
1468
|
+
<div><strong>Generated:</strong> ${new Date(info.generated_date).toLocaleString()}</div>
|
|
1469
|
+
<div><strong>Total Files:</strong> ${info.total_files}</div>
|
|
1470
|
+
<div><strong>Target Tokens:</strong> ${info.target_tokens_per_file}</div>
|
|
1471
|
+
<div><strong>Total Cost:</strong> $${info.total_claude_cost?.total_cost?.toFixed(4) || 'N/A'}</div>
|
|
1472
|
+
<div><strong>Total Tokens:</strong> ${info.total_claude_usage?.total_tokens || 'N/A'}</div>
|
|
1473
|
+
<div><strong>Model:</strong> ${info.claude_model}</div>
|
|
1474
|
+
</div>
|
|
1475
|
+
${items.length > 0 ? `
|
|
1476
|
+
<h5 style="margin-top: 15px;">All ${type === 'emails' ? 'Emails' : 'Transcripts'}</h5>
|
|
1477
|
+
<div class="detail-grid">
|
|
1478
|
+
${items.map(item => `
|
|
1479
|
+
<div style="grid-column: 1 / -1; margin-bottom: 10px; padding: 10px; background: #f8f9fa; border-radius: 5px;">
|
|
1480
|
+
<strong>${item.filename}</strong><br>
|
|
1481
|
+
<small>${item.description}</small><br>
|
|
1482
|
+
<small>Tokens: ${item.estimated_tokens}, Cost: $${item.claude_cost?.total_cost?.toFixed(4) || 'N/A'}</small>
|
|
1483
|
+
</div>
|
|
1484
|
+
`).join('')}
|
|
1485
|
+
</div>
|
|
1486
|
+
` : ''}
|
|
1487
|
+
</div>
|
|
1488
|
+
</div>
|
|
1489
|
+
</div>
|
|
1490
|
+
`;
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
generateGroundtruthSection(groundtruth) {
|
|
1494
|
+
const { metadata, analysis } = groundtruth;
|
|
1495
|
+
|
|
1496
|
+
let metadataInfo = '';
|
|
1497
|
+
if (metadata) {
|
|
1498
|
+
metadataInfo = `
|
|
1499
|
+
<div class="metrics-grid">
|
|
1500
|
+
<div class="metric-card">
|
|
1501
|
+
<div class="metric-value">${metadata.use_case || 'N/A'}</div>
|
|
1502
|
+
<div class="metric-label">Use Case</div>
|
|
1503
|
+
</div>
|
|
1504
|
+
<div class="metric-card">
|
|
1505
|
+
<div class="metric-value">${metadata.inference_usage?.total_tokens || 'N/A'}</div>
|
|
1506
|
+
<div class="metric-label">Generation Tokens</div>
|
|
1507
|
+
</div>
|
|
1508
|
+
<div class="metric-card">
|
|
1509
|
+
<div class="metric-value">$${metadata.cost?.total_cost?.toFixed(4) || 'N/A'}</div>
|
|
1510
|
+
<div class="metric-label">Generation Cost</div>
|
|
1511
|
+
</div>
|
|
1512
|
+
<div class="metric-card">
|
|
1513
|
+
<div class="metric-value">${metadata.tested_model || metadata.model || 'N/A'}</div>
|
|
1514
|
+
<div class="metric-label">Tested Model</div>
|
|
1515
|
+
</div>
|
|
1516
|
+
</div>
|
|
1517
|
+
`;
|
|
1518
|
+
}
|
|
1519
|
+
|
|
1520
|
+
return `
|
|
1521
|
+
${metadataInfo}
|
|
1522
|
+
${analysis?.summaries ? this.generateGroundtruthSummaries(analysis.summaries) : ''}
|
|
1523
|
+
${analysis?.evaluation_criteria ? this.generateGroundtruthCriteria(analysis.evaluation_criteria) : ''}
|
|
1524
|
+
${metadata ? this.generateGroundtruthMetadataSection(metadata, analysis) : ''}
|
|
1525
|
+
`;
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1528
|
+
generateGroundtruthSummaries(summaries) {
|
|
1529
|
+
return `
|
|
1530
|
+
<div class="collapsible-section">
|
|
1531
|
+
<div class="collapsible-header">
|
|
1532
|
+
<h4>Ground Truth Summaries</h4>
|
|
1533
|
+
<span class="collapsible-toggle">▶</span>
|
|
1534
|
+
</div>
|
|
1535
|
+
<div class="collapsible-content">
|
|
1536
|
+
<div class="collapsible-body">
|
|
1537
|
+
${Object.entries(summaries).map(([key, value]) => {
|
|
1538
|
+
if (Array.isArray(value)) {
|
|
1539
|
+
return `
|
|
1540
|
+
<div class="summary-item">
|
|
1541
|
+
<div class="summary-label">${key.replace(/_/g, ' ').toUpperCase()}</div>
|
|
1542
|
+
<div class="summary-text">${value.map(item => `• ${this.escapeHtml(item)}`).join('\n')}</div>
|
|
1543
|
+
</div>
|
|
1544
|
+
`;
|
|
1545
|
+
} else {
|
|
1546
|
+
return `
|
|
1547
|
+
<div class="summary-item">
|
|
1548
|
+
<div class="summary-label">${key.replace(/_/g, ' ').toUpperCase()}</div>
|
|
1549
|
+
<div class="summary-text">${this.escapeHtml(value)}</div>
|
|
1550
|
+
</div>
|
|
1551
|
+
`;
|
|
1552
|
+
}
|
|
1553
|
+
}).join('')}
|
|
1554
|
+
</div>
|
|
1555
|
+
</div>
|
|
1556
|
+
</div>
|
|
1557
|
+
`;
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
generateGroundtruthCriteria(criteria) {
|
|
1561
|
+
return `
|
|
1562
|
+
<div class="collapsible-section">
|
|
1563
|
+
<div class="collapsible-header">
|
|
1564
|
+
<h4>Evaluation Criteria</h4>
|
|
1565
|
+
<span class="collapsible-toggle">▶</span>
|
|
1566
|
+
</div>
|
|
1567
|
+
<div class="collapsible-content">
|
|
1568
|
+
<div class="collapsible-body">
|
|
1569
|
+
${Object.entries(criteria).map(([key, value]) => `
|
|
1570
|
+
<div class="summary-item">
|
|
1571
|
+
<div class="summary-label">${key.replace(/_/g, ' ').toUpperCase()}</div>
|
|
1572
|
+
<div class="summary-text">${this.escapeHtml(value)}</div>
|
|
1573
|
+
</div>
|
|
1574
|
+
`).join('')}
|
|
1575
|
+
</div>
|
|
1576
|
+
</div>
|
|
1577
|
+
</div>
|
|
1578
|
+
`;
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
generateGroundtruthMetadataSection(metadata, analysis) {
|
|
1582
|
+
return `
|
|
1583
|
+
<div class="collapsible-section">
|
|
1584
|
+
<div class="collapsible-header">
|
|
1585
|
+
<h4>Generation Details</h4>
|
|
1586
|
+
<span class="collapsible-toggle">▶</span>
|
|
1587
|
+
</div>
|
|
1588
|
+
<div class="collapsible-content">
|
|
1589
|
+
<div class="collapsible-body">
|
|
1590
|
+
<div class="detail-grid">
|
|
1591
|
+
<div><strong>Generated:</strong> ${metadata.timestamp}</div>
|
|
1592
|
+
<div><strong>Source File:</strong> ${metadata.source_file}</div>
|
|
1593
|
+
<div><strong>Tested Model:</strong> ${metadata.tested_model || metadata.model || 'N/A'}</div>
|
|
1594
|
+
<div><strong>Evaluator:</strong> ${metadata.evaluator_model || 'N/A'}</div>
|
|
1595
|
+
<div><strong>Use Case:</strong> ${metadata.use_case}</div>
|
|
1596
|
+
<div><strong>Input Tokens:</strong> ${metadata.usage?.input_tokens || 'N/A'}</div>
|
|
1597
|
+
<div><strong>Output Tokens:</strong> ${metadata.usage?.output_tokens || 'N/A'}</div>
|
|
1598
|
+
<div><strong>Input Cost:</strong> $${metadata.cost?.input_cost?.toFixed(4) || 'N/A'}</div>
|
|
1599
|
+
<div><strong>Output Cost:</strong> $${metadata.cost?.output_cost?.toFixed(4) || 'N/A'}</div>
|
|
1600
|
+
</div>
|
|
1601
|
+
${analysis?.transcript_metadata ? `
|
|
1602
|
+
<h5 style="margin-top: 15px;">Content Metadata</h5>
|
|
1603
|
+
<div class="detail-grid">
|
|
1604
|
+
${Object.entries(analysis.transcript_metadata).map(([key, value]) => `
|
|
1605
|
+
<div><strong>${key.replace(/_/g, ' ')}:</strong> ${value}</div>
|
|
1606
|
+
`).join('')}
|
|
1607
|
+
</div>
|
|
1608
|
+
` : ''}
|
|
1609
|
+
</div>
|
|
1610
|
+
</div>
|
|
1611
|
+
</div>
|
|
1612
|
+
`;
|
|
1613
|
+
}
|
|
1614
|
+
|
|
1615
|
+
escapeHtml(text) {
|
|
1616
|
+
const div = document.createElement('div');
|
|
1617
|
+
div.textContent = text;
|
|
1618
|
+
return div.innerHTML;
|
|
1619
|
+
}
|
|
1620
|
+
|
|
1621
|
+
async viewSourceFile(sourcePath) {
|
|
1622
|
+
try {
|
|
1623
|
+
// Convert Windows-style path to API path
|
|
1624
|
+
// Example: "test_data\meetings\all_hands_meeting_1.txt" → "meetings/all_hands_meeting_1.txt"
|
|
1625
|
+
const pathParts = sourcePath.replace(/\\/g, '/').split('/');
|
|
1626
|
+
let apiPath = '';
|
|
1627
|
+
let fileName = '';
|
|
1628
|
+
|
|
1629
|
+
// Find the test_data or meetings part
|
|
1630
|
+
const testDataIndex = pathParts.findIndex(part => part === 'test_data');
|
|
1631
|
+
if (testDataIndex !== -1 && testDataIndex < pathParts.length - 2) {
|
|
1632
|
+
// Format: test_data/meetings/filename.txt → API: /api/test-data/meetings/filename.txt
|
|
1633
|
+
const type = pathParts[testDataIndex + 1]; // e.g., "meetings"
|
|
1634
|
+
fileName = pathParts[testDataIndex + 2]; // e.g., "all_hands_meeting_1.txt"
|
|
1635
|
+
apiPath = `/api/test-data/${type}/${fileName}`;
|
|
1636
|
+
} else {
|
|
1637
|
+
// Fallback: try to extract last two parts (type/filename)
|
|
1638
|
+
if (pathParts.length >= 2) {
|
|
1639
|
+
const type = pathParts[pathParts.length - 2];
|
|
1640
|
+
fileName = pathParts[pathParts.length - 1];
|
|
1641
|
+
apiPath = `/api/test-data/${type}/${fileName}`;
|
|
1642
|
+
} else {
|
|
1643
|
+
throw new Error('Invalid source path format');
|
|
1644
|
+
}
|
|
1645
|
+
}
|
|
1646
|
+
|
|
1647
|
+
// Fetch the source file content
|
|
1648
|
+
const response = await fetch(apiPath);
|
|
1649
|
+
if (!response.ok) {
|
|
1650
|
+
throw new Error(`Failed to load source file: ${response.status}`);
|
|
1651
|
+
}
|
|
1652
|
+
|
|
1653
|
+
const data = await response.json();
|
|
1654
|
+
this.showSourceModal(data.content, fileName, sourcePath);
|
|
1655
|
+
} catch (error) {
|
|
1656
|
+
console.error('Error loading source file:', error);
|
|
1657
|
+
this.showError(`Failed to load source file: ${error.message}`);
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
|
|
1661
|
+
showSourceModal(content, fileName, fullPath) {
|
|
1662
|
+
// Remove any existing modal
|
|
1663
|
+
const existingModal = document.getElementById('sourceModal');
|
|
1664
|
+
if (existingModal) {
|
|
1665
|
+
existingModal.remove();
|
|
1666
|
+
}
|
|
1667
|
+
|
|
1668
|
+
// Create modal HTML
|
|
1669
|
+
const modal = document.createElement('div');
|
|
1670
|
+
modal.id = 'sourceModal';
|
|
1671
|
+
modal.className = 'source-modal';
|
|
1672
|
+
modal.innerHTML = `
|
|
1673
|
+
<div class="source-modal-content">
|
|
1674
|
+
<div class="source-modal-header">
|
|
1675
|
+
<h3>📄 Source File: ${fileName}</h3>
|
|
1676
|
+
<span class="source-modal-path">${fullPath}</span>
|
|
1677
|
+
<button class="source-modal-close">×</button>
|
|
1678
|
+
</div>
|
|
1679
|
+
<div class="source-modal-body">
|
|
1680
|
+
<pre class="source-content">${this.escapeHtml(content)}</pre>
|
|
1681
|
+
</div>
|
|
1682
|
+
<div class="source-modal-footer">
|
|
1683
|
+
<div class="source-stats">
|
|
1684
|
+
${content.split('\n').length} lines | ${content.length} characters
|
|
1685
|
+
</div>
|
|
1686
|
+
<button class="source-modal-copy">📋 Copy to Clipboard</button>
|
|
1687
|
+
</div>
|
|
1688
|
+
</div>
|
|
1689
|
+
`;
|
|
1690
|
+
|
|
1691
|
+
document.body.appendChild(modal);
|
|
1692
|
+
|
|
1693
|
+
// Add event listeners
|
|
1694
|
+
const closeBtn = modal.querySelector('.source-modal-close');
|
|
1695
|
+
const copyBtn = modal.querySelector('.source-modal-copy');
|
|
1696
|
+
|
|
1697
|
+
closeBtn.addEventListener('click', () => {
|
|
1698
|
+
modal.remove();
|
|
1699
|
+
});
|
|
1700
|
+
|
|
1701
|
+
copyBtn.addEventListener('click', () => {
|
|
1702
|
+
navigator.clipboard.writeText(content).then(() => {
|
|
1703
|
+
copyBtn.textContent = '✓ Copied!';
|
|
1704
|
+
setTimeout(() => {
|
|
1705
|
+
copyBtn.textContent = '📋 Copy to Clipboard';
|
|
1706
|
+
}, 2000);
|
|
1707
|
+
}).catch(err => {
|
|
1708
|
+
console.error('Failed to copy:', err);
|
|
1709
|
+
this.showError('Failed to copy to clipboard');
|
|
1710
|
+
});
|
|
1711
|
+
});
|
|
1712
|
+
|
|
1713
|
+
// Close modal when clicking outside
|
|
1714
|
+
modal.addEventListener('click', (e) => {
|
|
1715
|
+
if (e.target === modal) {
|
|
1716
|
+
modal.remove();
|
|
1717
|
+
}
|
|
1718
|
+
});
|
|
1719
|
+
|
|
1720
|
+
// Close modal with Escape key
|
|
1721
|
+
const escapeHandler = (e) => {
|
|
1722
|
+
if (e.key === 'Escape') {
|
|
1723
|
+
modal.remove();
|
|
1724
|
+
document.removeEventListener('keydown', escapeHandler);
|
|
1725
|
+
}
|
|
1726
|
+
};
|
|
1727
|
+
document.addEventListener('keydown', escapeHandler);
|
|
1728
|
+
}
|
|
1729
|
+
|
|
1730
|
+
compareSelected() {
|
|
1731
|
+
// For now, this just scrolls to show all reports
|
|
1732
|
+
// In a more advanced version, this could create a dedicated comparison view
|
|
1733
|
+
const reportsContainer = document.querySelector('.reports-container');
|
|
1734
|
+
reportsContainer.scrollIntoView({ behavior: 'smooth' });
|
|
1735
|
+
|
|
1736
|
+
if (this.loadedReports.size < 2) {
|
|
1737
|
+
alert('Load at least 2 reports to compare');
|
|
1738
|
+
return;
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
// Show success message
|
|
1742
|
+
this.showMessage(`Comparing ${this.loadedReports.size} reports side-by-side`);
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
removeReport(reportId) {
|
|
1746
|
+
this.loadedReports.delete(reportId);
|
|
1747
|
+
this.updateDisplay();
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
clearAllReports() {
|
|
1751
|
+
this.loadedReports.clear();
|
|
1752
|
+
this.updateDisplay();
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
showError(message) {
|
|
1756
|
+
// Simple error display - could be enhanced with better UI
|
|
1757
|
+
console.error(message);
|
|
1758
|
+
alert(`Error: ${message}`);
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
generateConsolidatedReportCard(reportId, data, filename) {
|
|
1762
|
+
const metadata = data.metadata || {};
|
|
1763
|
+
const evaluations = data.evaluations || [];
|
|
1764
|
+
const fullPath = filename || 'consolidated_evaluations_report.json';
|
|
1765
|
+
|
|
1766
|
+
// Calculate unique models count from metadata.evaluation_files
|
|
1767
|
+
// Filter out files in subdirectories (those with / or \ in path)
|
|
1768
|
+
const evaluationFiles = metadata.evaluation_files || [];
|
|
1769
|
+
const uniqueModelsCount = evaluationFiles.filter(file => {
|
|
1770
|
+
const path = file.file_path || '';
|
|
1771
|
+
return !path.includes('/') && !path.includes('\\');
|
|
1772
|
+
}).length;
|
|
1773
|
+
|
|
1774
|
+
// Group evaluations by model to combine results from different test sets
|
|
1775
|
+
const modelGroups = {};
|
|
1776
|
+
evaluations.forEach(evalData => {
|
|
1777
|
+
// For consolidated reports, only process main evaluation entries to avoid double counting
|
|
1778
|
+
if (!this.isMainEvaluationEntry(evalData)) {
|
|
1779
|
+
return; // Skip individual meeting files
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
// Extract base model name (remove test set prefix like "standup_meeting.")
|
|
1783
|
+
let modelName = evalData.experiment_name.replace('.experiment', '');
|
|
1784
|
+
modelName = modelName.replace(/^[^.]+\./, ''); // Remove any prefix before first dot
|
|
1785
|
+
if (modelName.includes('.')) {
|
|
1786
|
+
// If still has dots, it's likely the original name with prefix
|
|
1787
|
+
modelName = evalData.experiment_name.replace('.experiment', '').replace('standup_meeting.', '');
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
if (!modelGroups[modelName]) {
|
|
1791
|
+
modelGroups[modelName] = {
|
|
1792
|
+
modelName: modelName,
|
|
1793
|
+
evaluations: [],
|
|
1794
|
+
totalScore: 0,
|
|
1795
|
+
totalCost: 0,
|
|
1796
|
+
totalTokens: 0,
|
|
1797
|
+
excellentCount: 0,
|
|
1798
|
+
goodCount: 0,
|
|
1799
|
+
fairCount: 0,
|
|
1800
|
+
poorCount: 0,
|
|
1801
|
+
// Q&A specific metrics
|
|
1802
|
+
totalAccuracy: 0,
|
|
1803
|
+
totalPassRate: 0,
|
|
1804
|
+
totalQuestions: 0,
|
|
1805
|
+
totalPassed: 0,
|
|
1806
|
+
totalFailed: 0,
|
|
1807
|
+
isQAEvaluation: false,
|
|
1808
|
+
testSets: [],
|
|
1809
|
+
totalInferenceInputTokens: 0,
|
|
1810
|
+
totalInferenceOutputTokens: 0,
|
|
1811
|
+
totalInferenceTokens: 0
|
|
1812
|
+
};
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
const group = modelGroups[modelName];
|
|
1816
|
+
group.evaluations.push(evalData);
|
|
1817
|
+
|
|
1818
|
+
// Track which test set this is from
|
|
1819
|
+
const testSetName = evalData.experiment_name.includes('standup_meeting') ? 'meetings' : 'general';
|
|
1820
|
+
if (!group.testSets.includes(testSetName)) {
|
|
1821
|
+
group.testSets.push(testSetName);
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
// Accumulate metrics - check if this is a Q&A or summarization evaluation
|
|
1825
|
+
const metrics = evalData.overall_rating?.metrics || {};
|
|
1826
|
+
|
|
1827
|
+
if (metrics.accuracy_percentage !== undefined) {
|
|
1828
|
+
// Q&A evaluation
|
|
1829
|
+
group.isQAEvaluation = true;
|
|
1830
|
+
group.totalAccuracy += metrics.accuracy_percentage || 0;
|
|
1831
|
+
group.totalPassRate += metrics.pass_rate || 0;
|
|
1832
|
+
group.totalQuestions += metrics.num_questions || 0;
|
|
1833
|
+
group.totalPassed += metrics.num_passed || 0;
|
|
1834
|
+
group.totalFailed += metrics.num_failed || 0;
|
|
1835
|
+
} else {
|
|
1836
|
+
// Summarization evaluation
|
|
1837
|
+
group.totalScore += metrics.quality_score || 0;
|
|
1838
|
+
group.excellentCount += metrics.excellent_count || 0;
|
|
1839
|
+
group.goodCount += metrics.good_count || 0;
|
|
1840
|
+
group.fairCount += metrics.fair_count || 0;
|
|
1841
|
+
group.poorCount += metrics.poor_count || 0;
|
|
1842
|
+
}
|
|
1843
|
+
|
|
1844
|
+
// Use inference cost from consolidated report (actual model cost, not evaluation cost)
|
|
1845
|
+
let experimentCost = 0;
|
|
1846
|
+
if (evalData.inference_cost) {
|
|
1847
|
+
experimentCost = evalData.inference_cost.total_cost || 0;
|
|
1848
|
+
}
|
|
1849
|
+
group.totalCost += experimentCost;
|
|
1850
|
+
|
|
1851
|
+
// Only accumulate inference tokens (actual model usage for generation)
|
|
1852
|
+
// Evaluation tokens are tracked separately and should not be mixed
|
|
1853
|
+
const tokensToAccumulate = evalData.inference_usage?.total_tokens || 0;
|
|
1854
|
+
group.totalTokens += tokensToAccumulate;
|
|
1855
|
+
|
|
1856
|
+
// Accumulate inference token usage
|
|
1857
|
+
if (evalData.inference_usage) {
|
|
1858
|
+
group.totalInferenceInputTokens += evalData.inference_usage.input_tokens || 0;
|
|
1859
|
+
group.totalInferenceOutputTokens += evalData.inference_usage.output_tokens || 0;
|
|
1860
|
+
group.totalInferenceTokens += evalData.inference_usage.total_tokens || 0;
|
|
1861
|
+
}
|
|
1862
|
+
});
|
|
1863
|
+
|
|
1864
|
+
// Convert groups to consolidated evaluations
|
|
1865
|
+
const consolidatedEvaluations = Object.values(modelGroups).map(group => {
|
|
1866
|
+
let avgScore = 0;
|
|
1867
|
+
let overallRating = 'poor';
|
|
1868
|
+
let metricsData = {};
|
|
1869
|
+
|
|
1870
|
+
if (group.isQAEvaluation) {
|
|
1871
|
+
// Q&A evaluation - use accuracy as score
|
|
1872
|
+
const numEvals = group.evaluations.length;
|
|
1873
|
+
avgScore = numEvals > 0 ? group.totalAccuracy / numEvals : 0;
|
|
1874
|
+
const avgPassRate = numEvals > 0 ? group.totalPassRate / numEvals : 0;
|
|
1875
|
+
|
|
1876
|
+
// Determine rating based on accuracy
|
|
1877
|
+
if (avgScore >= 90) overallRating = 'excellent';
|
|
1878
|
+
else if (avgScore >= 75) overallRating = 'good';
|
|
1879
|
+
else if (avgScore >= 60) overallRating = 'fair';
|
|
1880
|
+
|
|
1881
|
+
metricsData = {
|
|
1882
|
+
accuracy_percentage: avgScore,
|
|
1883
|
+
pass_rate: avgPassRate,
|
|
1884
|
+
num_questions: group.totalQuestions,
|
|
1885
|
+
num_passed: group.totalPassed,
|
|
1886
|
+
num_failed: group.totalFailed
|
|
1887
|
+
};
|
|
1888
|
+
} else {
|
|
1889
|
+
// Summarization evaluation
|
|
1890
|
+
const totalRatings = group.excellentCount + group.goodCount + group.fairCount + group.poorCount;
|
|
1891
|
+
|
|
1892
|
+
// Properly calculate quality score from aggregated rating counts using the same formula as Python
|
|
1893
|
+
if (totalRatings > 0) {
|
|
1894
|
+
avgScore = ((group.excellentCount * 4 + group.goodCount * 3 + group.fairCount * 2 + group.poorCount * 1) / totalRatings - 1) * 100 / 3;
|
|
1895
|
+
}
|
|
1896
|
+
|
|
1897
|
+
// Determine overall rating based on average score
|
|
1898
|
+
if (avgScore >= 85) overallRating = 'excellent';
|
|
1899
|
+
else if (avgScore >= 70) overallRating = 'good';
|
|
1900
|
+
else if (avgScore >= 50) overallRating = 'fair';
|
|
1901
|
+
|
|
1902
|
+
metricsData = {
|
|
1903
|
+
quality_score: avgScore,
|
|
1904
|
+
excellent_count: group.excellentCount,
|
|
1905
|
+
good_count: group.goodCount,
|
|
1906
|
+
fair_count: group.fairCount,
|
|
1907
|
+
poor_count: group.poorCount,
|
|
1908
|
+
total_summaries: totalRatings
|
|
1909
|
+
};
|
|
1910
|
+
}
|
|
1911
|
+
|
|
1912
|
+
// Calculate average latency across all evaluations for this model
|
|
1913
|
+
// Use avg_processing_time_seconds from evaluation data
|
|
1914
|
+
// Filter out failed experiments (those with very low processing times and no tokens)
|
|
1915
|
+
let avgLatency = 0;
|
|
1916
|
+
let latencyCount = 0;
|
|
1917
|
+
group.evaluations.forEach(evalData => {
|
|
1918
|
+
if (evalData.avg_processing_time_seconds) {
|
|
1919
|
+
// Filter out failed experiments that have:
|
|
1920
|
+
// - Very low processing time (< 1 second) AND
|
|
1921
|
+
// - No inference tokens (indicating failed inference) AND
|
|
1922
|
+
// - Unknown inference type
|
|
1923
|
+
const isFailedExperiment = (
|
|
1924
|
+
evalData.avg_processing_time_seconds < 1.0 &&
|
|
1925
|
+
(!evalData.inference_usage || evalData.inference_usage.total_tokens === 0) &&
|
|
1926
|
+
evalData.inference_type === 'unknown'
|
|
1927
|
+
);
|
|
1928
|
+
|
|
1929
|
+
if (!isFailedExperiment) {
|
|
1930
|
+
avgLatency += evalData.avg_processing_time_seconds;
|
|
1931
|
+
latencyCount++;
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1934
|
+
});
|
|
1935
|
+
if (latencyCount > 0) {
|
|
1936
|
+
avgLatency = avgLatency / latencyCount;
|
|
1937
|
+
}
|
|
1938
|
+
|
|
1939
|
+
return {
|
|
1940
|
+
experiment_name: group.modelName,
|
|
1941
|
+
test_sets: group.testSets.join(', '),
|
|
1942
|
+
num_evaluations: group.evaluations.length,
|
|
1943
|
+
overall_rating: {
|
|
1944
|
+
rating: overallRating,
|
|
1945
|
+
metrics: metricsData
|
|
1946
|
+
},
|
|
1947
|
+
cost: { total_cost: group.totalCost },
|
|
1948
|
+
usage: { total_tokens: group.totalTokens },
|
|
1949
|
+
// Pass through inference cost for display
|
|
1950
|
+
inference_cost: { total_cost: group.totalCost },
|
|
1951
|
+
// Pass through aggregated inference usage for token chart
|
|
1952
|
+
inference_usage: {
|
|
1953
|
+
input_tokens: group.totalInferenceInputTokens,
|
|
1954
|
+
output_tokens: group.totalInferenceOutputTokens,
|
|
1955
|
+
total_tokens: group.totalInferenceTokens
|
|
1956
|
+
},
|
|
1957
|
+
// Pass through average processing time in seconds
|
|
1958
|
+
avg_processing_time_seconds: avgLatency,
|
|
1959
|
+
// Aggregate aspect summaries from all evaluations
|
|
1960
|
+
aspect_summary: group.evaluations[0]?.aspect_summary || {}
|
|
1961
|
+
};
|
|
1962
|
+
});
|
|
1963
|
+
|
|
1964
|
+
// Sort consolidated evaluations by score (quality_score for summarization, accuracy_percentage for Q&A)
|
|
1965
|
+
const sortedEvaluations = consolidatedEvaluations.sort((a, b) => {
|
|
1966
|
+
const scoreA = a.overall_rating?.metrics?.quality_score || a.overall_rating?.metrics?.accuracy_percentage || 0;
|
|
1967
|
+
const scoreB = b.overall_rating?.metrics?.quality_score || b.overall_rating?.metrics?.accuracy_percentage || 0;
|
|
1968
|
+
return scoreB - scoreA;
|
|
1969
|
+
});
|
|
1970
|
+
|
|
1971
|
+
// Generate comparison table
|
|
1972
|
+
const tableHtml = this.generateComparisonTable(sortedEvaluations);
|
|
1973
|
+
|
|
1974
|
+
// Generate charts
|
|
1975
|
+
const chartsHtml = this.generateComparisonCharts(sortedEvaluations);
|
|
1976
|
+
|
|
1977
|
+
// Generate summary statistics with consolidated data
|
|
1978
|
+
// Pass the original evaluations array to get meeting types from individual meeting files
|
|
1979
|
+
const summaryHtml = this.generateConsolidatedSummary(metadata, evaluations);
|
|
1980
|
+
|
|
1981
|
+
// Generate aspect breakdown with consolidated data
|
|
1982
|
+
const aspectBreakdownHtml = this.generateAspectBreakdown(consolidatedEvaluations);
|
|
1983
|
+
|
|
1984
|
+
return `
|
|
1985
|
+
<div class="report-card consolidated-report" data-report-id="${reportId}">
|
|
1986
|
+
<div class="report-header">
|
|
1987
|
+
<h3 title="${fullPath}">📊 Consolidated Evaluation Report</h3>
|
|
1988
|
+
<div class="meta">
|
|
1989
|
+
${uniqueModelsCount} Unique Models | ${metadata.total_evaluations} Total Evaluations |
|
|
1990
|
+
${metadata.timestamp || 'N/A'}
|
|
1991
|
+
</div>
|
|
1992
|
+
<div class="report-actions">
|
|
1993
|
+
<div class="export-dropdown">
|
|
1994
|
+
<button class="export-btn" title="Export report">
|
|
1995
|
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
|
1996
|
+
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
|
1997
|
+
<polyline points="7 10 12 15 17 10"/>
|
|
1998
|
+
<line x1="12" y1="15" x2="12" y2="3"/>
|
|
1999
|
+
</svg>
|
|
2000
|
+
</button>
|
|
2001
|
+
<div class="export-menu">
|
|
2002
|
+
<button class="export-option" data-format="png" data-report-id="${reportId}">📷 Export as PNG</button>
|
|
2003
|
+
<button class="export-option" data-format="pdf" data-report-id="${reportId}">📄 Export as PDF</button>
|
|
2004
|
+
</div>
|
|
2005
|
+
</div>
|
|
2006
|
+
<button class="report-close" data-report-id="${reportId}">×</button>
|
|
2007
|
+
</div>
|
|
2008
|
+
</div>
|
|
2009
|
+
<div class="report-content">
|
|
2010
|
+
${summaryHtml}
|
|
2011
|
+
${chartsHtml}
|
|
2012
|
+
${tableHtml}
|
|
2013
|
+
${aspectBreakdownHtml}
|
|
2014
|
+
</div>
|
|
2015
|
+
</div>
|
|
2016
|
+
`;
|
|
2017
|
+
}
|
|
2018
|
+
|
|
2019
|
+
generateConsolidatedSummary(metadata, evaluations) {
|
|
2020
|
+
// Use pre-calculated metadata values from the consolidated report
|
|
2021
|
+
const totalCost = metadata.total_cost?.total_cost || 0;
|
|
2022
|
+
const totalTokens = metadata.total_usage?.total_tokens || 0;
|
|
2023
|
+
const inputTokens = metadata.total_usage?.input_tokens || 0;
|
|
2024
|
+
const outputTokens = metadata.total_usage?.output_tokens || 0;
|
|
2025
|
+
|
|
2026
|
+
// Calculate aggregate statistics from main evaluation entries only (to avoid double counting)
|
|
2027
|
+
let excellentCount = 0, goodCount = 0, fairCount = 0, poorCount = 0;
|
|
2028
|
+
let cloudCount = 0, localCount = 0;
|
|
2029
|
+
const meetingTypes = new Set();
|
|
2030
|
+
const uniqueModelNames = new Set();
|
|
2031
|
+
|
|
2032
|
+
evaluations.forEach(evalData => {
|
|
2033
|
+
// Only process main evaluation entries for overall quality metrics to avoid double counting
|
|
2034
|
+
if (this.isMainEvaluationEntry(evalData)) {
|
|
2035
|
+
const metrics = evalData.overall_rating?.metrics || {};
|
|
2036
|
+
excellentCount += metrics.excellent_count || 0;
|
|
2037
|
+
goodCount += metrics.good_count || 0;
|
|
2038
|
+
fairCount += metrics.fair_count || 0;
|
|
2039
|
+
poorCount += metrics.poor_count || 0;
|
|
2040
|
+
|
|
2041
|
+
// Use tested_model field, but fall back to experiment_name if unknown
|
|
2042
|
+
let modelName = evalData.tested_model || 'unknown';
|
|
2043
|
+
if (modelName === 'unknown') {
|
|
2044
|
+
// Fall back to experiment_name and clean it up
|
|
2045
|
+
modelName = evalData.experiment_name.replace('.experiment', '');
|
|
2046
|
+
}
|
|
2047
|
+
|
|
2048
|
+
// Only count if this is a new unique model (avoid double counting)
|
|
2049
|
+
if (!uniqueModelNames.has(modelName)) {
|
|
2050
|
+
uniqueModelNames.add(modelName);
|
|
2051
|
+
|
|
2052
|
+
// Count cloud vs local models
|
|
2053
|
+
// Support both new format (tested_model_inference) and old format (inference from name)
|
|
2054
|
+
const isCloud = evalData.tested_model_inference === 'cloud' ||
|
|
2055
|
+
evalData.tested_model_type === 'anthropic' ||
|
|
2056
|
+
modelName.toLowerCase().includes('claude') ||
|
|
2057
|
+
modelName.toLowerCase().includes('gpt-4') ||
|
|
2058
|
+
modelName.toLowerCase().includes('gemini');
|
|
2059
|
+
if (isCloud) {
|
|
2060
|
+
cloudCount++;
|
|
2061
|
+
} else {
|
|
2062
|
+
localCount++;
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2067
|
+
// Extract meeting types from ALL evaluation entries (including individual meeting files)
|
|
2068
|
+
const expName = evalData.experiment_name || '';
|
|
2069
|
+
if (expName.includes('.')) {
|
|
2070
|
+
const meetingName = expName.split('.')[0];
|
|
2071
|
+
// Clean up meeting type name - only count actual meetings, not metadata
|
|
2072
|
+
if (meetingName.includes('_meeting')) {
|
|
2073
|
+
// Extract base meeting type (e.g., "standup_meeting" from "standup_meeting.Model")
|
|
2074
|
+
const baseType = meetingName.replace(/_\d+$/, ''); // Remove numeric suffix if present
|
|
2075
|
+
if (baseType !== 'transcript_metadata') {
|
|
2076
|
+
meetingTypes.add(baseType);
|
|
2077
|
+
}
|
|
2078
|
+
}
|
|
2079
|
+
// Note: transcript_metadata is excluded as it's not a meeting type
|
|
2080
|
+
}
|
|
2081
|
+
});
|
|
2082
|
+
|
|
2083
|
+
const totalRatings = excellentCount + goodCount + fairCount + poorCount;
|
|
2084
|
+
const uniqueModels = uniqueModelNames.size;
|
|
2085
|
+
const costPerSummary = totalRatings > 0 ? (totalCost / totalRatings).toFixed(3) : 0;
|
|
2086
|
+
|
|
2087
|
+
return `
|
|
2088
|
+
<div class="consolidated-summary enhanced">
|
|
2089
|
+
<h4>📈 Overall Summary</h4>
|
|
2090
|
+
|
|
2091
|
+
<!-- Primary Statistics -->
|
|
2092
|
+
<div class="summary-grid primary-stats">
|
|
2093
|
+
<div class="summary-card">
|
|
2094
|
+
<div class="summary-value">${uniqueModels || 0}</div>
|
|
2095
|
+
<div class="summary-label" data-tooltip="Number of unique AI models tested">Models Evaluated</div>
|
|
2096
|
+
<div class="summary-subcaption">${cloudCount} Cloud, ${localCount} Local</div>
|
|
2097
|
+
</div>
|
|
2098
|
+
<div class="summary-card">
|
|
2099
|
+
<div class="summary-value">${totalRatings}</div>
|
|
2100
|
+
<div class="summary-label" data-tooltip="Total summaries evaluated">Total Summaries</div>
|
|
2101
|
+
<div class="summary-subcaption">Across ${meetingTypes.size} meeting types</div>
|
|
2102
|
+
</div>
|
|
2103
|
+
<div class="summary-card">
|
|
2104
|
+
<div class="summary-value">$${totalCost.toFixed(4)}</div>
|
|
2105
|
+
<div class="summary-label" data-tooltip="Claude evaluation cost">Evaluation Cost</div>
|
|
2106
|
+
<div class="summary-subcaption">$${costPerSummary}/summary</div>
|
|
2107
|
+
</div>
|
|
2108
|
+
<div class="summary-card">
|
|
2109
|
+
<div class="summary-value">${(totalTokens / 1000).toFixed(1)}K</div>
|
|
2110
|
+
<div class="summary-label" data-tooltip="Total tokens processed (input + output). Note: Input tokens represent content sent to the model; models may apply prompt caching which reduces processing time and costs for repeated content without affecting this count.">Tokens Used</div>
|
|
2111
|
+
<div class="summary-subcaption">${(inputTokens/1000).toFixed(0)}K in, ${(outputTokens/1000).toFixed(0)}K out</div>
|
|
2112
|
+
</div>
|
|
2113
|
+
</div>
|
|
2114
|
+
</div>
|
|
2115
|
+
`;
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2118
|
+
generateComparisonTable(evaluations) {
|
|
2119
|
+
// NOTE: This generates the "Model Performance Comparison" table which shows the Score.
|
|
2120
|
+
// The Score here is the same as the Score shown in the "Model Performance Summary" table.
|
|
2121
|
+
// For summarization: Score = quality_score = ((E×4 + G×3 + F×2 + P×1) / Total - 1) / 3 × 100
|
|
2122
|
+
// For Q&A: Score = accuracy_percentage (pass rate)
|
|
2123
|
+
// The Performance column shows the rating counts (E, G, F, P) used to calculate the Score.
|
|
2124
|
+
|
|
2125
|
+
let tableRows = '';
|
|
2126
|
+
|
|
2127
|
+
evaluations.forEach((evalData, index) => {
|
|
2128
|
+
// For consolidated reports, only process main evaluation entries to avoid double counting
|
|
2129
|
+
if (!this.isMainEvaluationEntry(evalData)) {
|
|
2130
|
+
return; // Skip individual meeting files
|
|
2131
|
+
}
|
|
2132
|
+
|
|
2133
|
+
const rating = evalData.overall_rating || {};
|
|
2134
|
+
const metrics = rating.metrics || {};
|
|
2135
|
+
|
|
2136
|
+
// Check if this is Q&A (has accuracy_percentage) or summarization (has quality_score)
|
|
2137
|
+
const isQA = metrics.accuracy_percentage !== undefined;
|
|
2138
|
+
const score = isQA ? metrics.accuracy_percentage : (metrics.quality_score || 0);
|
|
2139
|
+
// Only use inference cost and tokens (actual model usage for generation)
|
|
2140
|
+
// Do NOT mix with evaluation/analysis metrics
|
|
2141
|
+
const cost = evalData.inference_cost?.total_cost || 0;
|
|
2142
|
+
const tokens = evalData.inference_usage?.total_tokens || 0;
|
|
2143
|
+
|
|
2144
|
+
// Extract model name from experiment name
|
|
2145
|
+
let fullModelName = evalData.experiment_name.replace('.experiment', '');
|
|
2146
|
+
fullModelName = fullModelName.replace('standup_meeting.', '');
|
|
2147
|
+
|
|
2148
|
+
// Create display name (truncated if needed)
|
|
2149
|
+
let displayName = fullModelName;
|
|
2150
|
+
if (displayName.length > 50) {
|
|
2151
|
+
displayName = displayName.substring(0, 47) + '...';
|
|
2152
|
+
}
|
|
2153
|
+
|
|
2154
|
+
// Determine if it's a local or cloud model
|
|
2155
|
+
// Check inference_cost to determine if it's local (cost = 0) or cloud
|
|
2156
|
+
const inferenceType = evalData.inference_type || '';
|
|
2157
|
+
const inferenceCost = evalData.inference_cost?.total_cost || 0;
|
|
2158
|
+
const isLocal = inferenceType === 'local' ||
|
|
2159
|
+
fullModelName.toLowerCase().includes('lemonade') ||
|
|
2160
|
+
(inferenceCost === 0 && !fullModelName.toLowerCase().includes('claude'));
|
|
2161
|
+
|
|
2162
|
+
// Add test sets indicator if this is combined data
|
|
2163
|
+
const testSetsIndicator = evalData.test_sets ? ` (${evalData.test_sets})` : '';
|
|
2164
|
+
const numEvals = evalData.num_evaluations || 1;
|
|
2165
|
+
|
|
2166
|
+
tableRows += `
|
|
2167
|
+
<tr class="quality-row-${rating.rating}">
|
|
2168
|
+
<td class="rank-cell">${index + 1}</td>
|
|
2169
|
+
<td class="model-cell">
|
|
2170
|
+
<div class="model-name-wrapper" title="${fullModelName}${testSetsIndicator}">
|
|
2171
|
+
<span class="model-name">${displayName}</span>
|
|
2172
|
+
${isLocal ? '<span class="badge-local" data-tooltip="Model runs locally on your machine - no API costs">LOCAL</span>' : '<span class="badge-cloud" data-tooltip="Model runs on cloud servers - usage-based pricing">CLOUD</span>'}
|
|
2173
|
+
${numEvals > 1 ? `<span class="badge-count" title="Combined from ${numEvals} evaluations">${numEvals}×</span>` : ''}
|
|
2174
|
+
</div>
|
|
2175
|
+
</td>
|
|
2176
|
+
<td class="score-cell">
|
|
2177
|
+
<div class="score-bar-container">
|
|
2178
|
+
<div class="score-bar" style="width: ${score}%"></div>
|
|
2179
|
+
<span class="score-text">${Math.round(score)}%</span>
|
|
2180
|
+
</div>
|
|
2181
|
+
</td>
|
|
2182
|
+
<td class="rating-cell">
|
|
2183
|
+
<span class="quality-rating rating-${rating.rating}">${rating.rating}</span>
|
|
2184
|
+
</td>
|
|
2185
|
+
<td class="distribution-cell">
|
|
2186
|
+
<div class="mini-distribution">
|
|
2187
|
+
${isQA ? `
|
|
2188
|
+
<span class="mini-count excellent" data-tooltip="Pass Rate: ${((metrics.pass_rate || 0) * 100).toFixed(1)}%">✓ ${metrics.num_passed || 0}</span>
|
|
2189
|
+
<span class="mini-count poor" data-tooltip="Failed Questions">✗ ${metrics.num_failed || 0}</span>
|
|
2190
|
+
<span class="mini-count" data-tooltip="Total Questions">Σ ${metrics.num_questions || 0}</span>
|
|
2191
|
+
` : `
|
|
2192
|
+
<span class="mini-count excellent" data-tooltip="${metrics.excellent_count || 0} summaries rated Excellent (comprehensive & accurate)">${metrics.excellent_count || 0}</span>
|
|
2193
|
+
<span class="mini-count good" data-tooltip="${metrics.good_count || 0} summaries rated Good (mostly accurate, minor issues)">${metrics.good_count || 0}</span>
|
|
2194
|
+
<span class="mini-count fair" data-tooltip="${metrics.fair_count || 0} summaries rated Fair (acceptable but missing details)">${metrics.fair_count || 0}</span>
|
|
2195
|
+
<span class="mini-count poor" data-tooltip="${metrics.poor_count || 0} summaries rated Poor (significant errors or omissions)">${metrics.poor_count || 0}</span>
|
|
2196
|
+
`}
|
|
2197
|
+
</div>
|
|
2198
|
+
</td>
|
|
2199
|
+
<td class="cost-cell">
|
|
2200
|
+
${isLocal ? '<span style="color: #28a745;">FREE</span>' :
|
|
2201
|
+
(cost > 0 ? '$' + cost.toFixed(4) : '<span style="color: #28a745;">FREE</span>')}
|
|
2202
|
+
</td>
|
|
2203
|
+
<td class="tokens-cell">${(tokens / 1000).toFixed(1)}K</td>
|
|
2204
|
+
</tr>
|
|
2205
|
+
`;
|
|
2206
|
+
});
|
|
2207
|
+
|
|
2208
|
+
return `
|
|
2209
|
+
<div class="comparison-table-section">
|
|
2210
|
+
<h4>🏆 Model Performance Comparison</h4>
|
|
2211
|
+
<div class="table-container">
|
|
2212
|
+
<table class="comparison-table">
|
|
2213
|
+
<thead>
|
|
2214
|
+
<tr>
|
|
2215
|
+
<th class="rank-header" data-tooltip="Model ranking based on Score">Rank</th>
|
|
2216
|
+
<th class="model-header" data-tooltip="AI model name and type (LOCAL runs on your machine, CLOUD runs remotely)">Model</th>
|
|
2217
|
+
<th class="score-header" data-tooltip="Quality score (0-100%): Calculated from performance rating counts using formula ((E×4 + G×3 + F×2 + P×1) / Total - 1) / 3 × 100. For Q&A tasks, shows accuracy percentage instead. See Model Performance Summary table below for detailed breakdown.">Score</th>
|
|
2218
|
+
<th class="rating-header" data-tooltip="Overall rating (Excellent/Good/Fair/Poor)">Rating</th>
|
|
2219
|
+
<th class="distribution-header" data-tooltip="Performance breakdown: Excellent, Good, Fair, Poor counts used to calculate Score above">Performance</th>
|
|
2220
|
+
<th class="cost-header" data-tooltip="Inference cost (FREE for local models)">Cost</th>
|
|
2221
|
+
<th class="tokens-header" data-tooltip="Number of tokens processed (1K = 1,000 tokens)">Tokens</th>
|
|
2222
|
+
</tr>
|
|
2223
|
+
</thead>
|
|
2224
|
+
<tbody>
|
|
2225
|
+
${tableRows}
|
|
2226
|
+
</tbody>
|
|
2227
|
+
</table>
|
|
2228
|
+
</div>
|
|
2229
|
+
</div>
|
|
2230
|
+
`;
|
|
2231
|
+
}
|
|
2232
|
+
|
|
2233
|
+
generateComparisonCharts(evaluations) {
|
|
2234
|
+
|
|
2235
|
+
// Prepare data for charts
|
|
2236
|
+
const labels = [];
|
|
2237
|
+
const scores = [];
|
|
2238
|
+
const costs = [];
|
|
2239
|
+
const tokens = [];
|
|
2240
|
+
const latencies = [];
|
|
2241
|
+
const inputTokens = [];
|
|
2242
|
+
const outputTokens = [];
|
|
2243
|
+
const totalTokens = [];
|
|
2244
|
+
|
|
2245
|
+
evaluations.forEach(evalData => {
|
|
2246
|
+
// For consolidated reports, only process main evaluation entries to avoid double counting
|
|
2247
|
+
if (!this.isMainEvaluationEntry(evalData)) {
|
|
2248
|
+
return; // Skip individual meeting files
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
let fullModelName = evalData.experiment_name.replace('.experiment', '');
|
|
2252
|
+
fullModelName = fullModelName.replace('standup_meeting.', '');
|
|
2253
|
+
|
|
2254
|
+
// Create a shorter label for display
|
|
2255
|
+
let shortName = fullModelName;
|
|
2256
|
+
if (fullModelName.includes('Lemonade-')) {
|
|
2257
|
+
shortName = fullModelName.replace('Lemonade-', '');
|
|
2258
|
+
}
|
|
2259
|
+
if (fullModelName.includes('Claude-')) {
|
|
2260
|
+
shortName = fullModelName.replace('Claude-', 'Claude ');
|
|
2261
|
+
}
|
|
2262
|
+
if (shortName.includes('-Basic-Summary')) {
|
|
2263
|
+
shortName = shortName.replace('-Basic-Summary', '');
|
|
2264
|
+
}
|
|
2265
|
+
// Handle Llama model names
|
|
2266
|
+
if (shortName.includes('Llama-3.2-3B-Instruct-Hybrid')) {
|
|
2267
|
+
shortName = 'Llama-3.2-3B-Instruct';
|
|
2268
|
+
}
|
|
2269
|
+
if (shortName.includes('LFM2-1.2B-Basic-Summary')) {
|
|
2270
|
+
shortName = 'LFM2-1.2B';
|
|
2271
|
+
}
|
|
2272
|
+
|
|
2273
|
+
// Further shorten if still too long
|
|
2274
|
+
if (shortName.length > 30) {
|
|
2275
|
+
shortName = shortName.substring(0, 27) + '...';
|
|
2276
|
+
}
|
|
2277
|
+
|
|
2278
|
+
// Use inference cost from consolidated report (actual model cost, not evaluation cost)
|
|
2279
|
+
let experimentCost = 0;
|
|
2280
|
+
if (evalData.inference_cost) {
|
|
2281
|
+
experimentCost = evalData.inference_cost.total_cost || 0;
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2284
|
+
// Extract latency data from avg_processing_time_seconds in consolidated report
|
|
2285
|
+
let avgLatency = evalData.avg_processing_time_seconds || 0;
|
|
2286
|
+
|
|
2287
|
+
// Extract token usage data (use inference_usage for actual model usage)
|
|
2288
|
+
const inferenceUsage = evalData.inference_usage || {};
|
|
2289
|
+
const inputToks = inferenceUsage.input_tokens || 0;
|
|
2290
|
+
const outputToks = inferenceUsage.output_tokens || 0;
|
|
2291
|
+
const totalToks = inferenceUsage.total_tokens || 0;
|
|
2292
|
+
|
|
2293
|
+
labels.push({ short: shortName, full: fullModelName });
|
|
2294
|
+
const metrics = evalData.overall_rating?.metrics || {};
|
|
2295
|
+
const score = metrics.accuracy_percentage !== undefined ? metrics.accuracy_percentage : (metrics.quality_score || 0);
|
|
2296
|
+
scores.push(score);
|
|
2297
|
+
costs.push(experimentCost);
|
|
2298
|
+
// Only use inference tokens for the leaderboard (actual model usage for generation)
|
|
2299
|
+
// Do NOT fallback to evaluation tokens as they are completely different
|
|
2300
|
+
tokens.push(totalToks / 1000); // in K
|
|
2301
|
+
latencies.push(avgLatency);
|
|
2302
|
+
inputTokens.push(inputToks);
|
|
2303
|
+
outputTokens.push(outputToks);
|
|
2304
|
+
totalTokens.push(totalToks);
|
|
2305
|
+
});
|
|
2306
|
+
|
|
2307
|
+
// Find max values for scaling
|
|
2308
|
+
const maxScore = 100;
|
|
2309
|
+
const maxCost = Math.max(...costs) * 1.1 || 1;
|
|
2310
|
+
const maxTokens = Math.max(...totalTokens) * 1.1 || 1;
|
|
2311
|
+
const maxLatency = Math.max(...latencies) * 1.1 || 1;
|
|
2312
|
+
|
|
2313
|
+
// Generate bar charts using CSS
|
|
2314
|
+
const scoreChartBars = labels.map((labelObj, i) => {
|
|
2315
|
+
const height = (scores[i] / maxScore) * 100;
|
|
2316
|
+
const rating = evaluations[i].overall_rating?.rating || 'fair';
|
|
2317
|
+
return `
|
|
2318
|
+
<div class="chart-bar-group">
|
|
2319
|
+
<div class="chart-bar-container">
|
|
2320
|
+
<div class="chart-bar quality-${rating}" style="height: ${height}%; position: relative;"
|
|
2321
|
+
data-tooltip="Grade: ${Math.round(scores[i])}%">
|
|
2322
|
+
<span class="bar-value-top percentage-value">${Math.round(scores[i])}%</span>
|
|
2323
|
+
</div>
|
|
2324
|
+
</div>
|
|
2325
|
+
<div class="chart-label" title="${labelObj.full}">${labelObj.short}</div>
|
|
2326
|
+
</div>
|
|
2327
|
+
`;
|
|
2328
|
+
}).join('');
|
|
2329
|
+
|
|
2330
|
+
const costChartBars = labels.map((labelObj, i) => {
|
|
2331
|
+
const isFree = costs[i] === 0;
|
|
2332
|
+
const height = isFree ? 5 : (costs[i] / maxCost) * 100; // Min height for free models
|
|
2333
|
+
const costDisplay = isFree ? 'FREE' : '$' + costs[i].toFixed(4);
|
|
2334
|
+
return `
|
|
2335
|
+
<div class="chart-bar-group">
|
|
2336
|
+
<div class="chart-bar-container">
|
|
2337
|
+
<div class="chart-bar cost-bar ${isFree ? 'free-model' : ''}" style="height: ${height}%; position: relative;"
|
|
2338
|
+
data-tooltip="Inference Cost: ${costDisplay}${isFree ? ' (Local Model)' : ' (Cloud API)'}">
|
|
2339
|
+
<span class="bar-value-top" style="${isFree ? 'color: #28a745;' : ''}">${costDisplay}</span>
|
|
2340
|
+
</div>
|
|
2341
|
+
</div>
|
|
2342
|
+
<div class="chart-label" title="${labelObj.full}">${labelObj.short}</div>
|
|
2343
|
+
</div>
|
|
2344
|
+
`;
|
|
2345
|
+
}).join('');
|
|
2346
|
+
|
|
2347
|
+
const latencyChartBars = labels.map((labelObj, i) => {
|
|
2348
|
+
const latency = latencies[i];
|
|
2349
|
+
const height = latency > 0 ? (latency / maxLatency) * 100 : 0;
|
|
2350
|
+
|
|
2351
|
+
// Format latency display
|
|
2352
|
+
let latencyDisplay = 'N/A';
|
|
2353
|
+
if (latency > 0) {
|
|
2354
|
+
if (latency < 1) {
|
|
2355
|
+
latencyDisplay = `${(latency * 1000).toFixed(0)}ms`;
|
|
2356
|
+
} else if (latency < 10) {
|
|
2357
|
+
latencyDisplay = `${latency.toFixed(2)}s`;
|
|
2358
|
+
} else {
|
|
2359
|
+
latencyDisplay = `${latency.toFixed(1)}s`;
|
|
2360
|
+
}
|
|
2361
|
+
}
|
|
2362
|
+
|
|
2363
|
+
return `
|
|
2364
|
+
<div class="chart-bar-group">
|
|
2365
|
+
<div class="chart-bar-container">
|
|
2366
|
+
<div class="chart-bar latency-bar" style="height: ${height}%; position: relative;"
|
|
2367
|
+
data-tooltip="Average inference time: ${latencyDisplay}">
|
|
2368
|
+
<span class="bar-value-top">${latencyDisplay}</span>
|
|
2369
|
+
</div>
|
|
2370
|
+
</div>
|
|
2371
|
+
<div class="chart-label" title="${labelObj.full}">${labelObj.short}</div>
|
|
2372
|
+
</div>
|
|
2373
|
+
`;
|
|
2374
|
+
}).join('');
|
|
2375
|
+
|
|
2376
|
+
// Generate stacked token usage chart
|
|
2377
|
+
const tokenChartBars = labels.map((labelObj, i) => {
|
|
2378
|
+
const inputHeight = (inputTokens[i] / maxTokens) * 100;
|
|
2379
|
+
const outputHeight = (outputTokens[i] / maxTokens) * 100;
|
|
2380
|
+
const totalHeight = (totalTokens[i] / maxTokens) * 100;
|
|
2381
|
+
|
|
2382
|
+
// Format token display
|
|
2383
|
+
const formatTokens = (num) => {
|
|
2384
|
+
if (num >= 1000) {
|
|
2385
|
+
return `${(num / 1000).toFixed(1)}K`;
|
|
2386
|
+
}
|
|
2387
|
+
return num.toString();
|
|
2388
|
+
};
|
|
2389
|
+
|
|
2390
|
+
const inputPercentage = totalTokens[i] > 0 ? ((inputTokens[i] / totalTokens[i]) * 100).toFixed(1) : 0;
|
|
2391
|
+
const outputPercentage = totalTokens[i] > 0 ? ((outputTokens[i] / totalTokens[i]) * 100).toFixed(1) : 0;
|
|
2392
|
+
|
|
2393
|
+
return `
|
|
2394
|
+
<div class="chart-bar-group">
|
|
2395
|
+
<div class="chart-bar-container">
|
|
2396
|
+
<div class="stacked-bar-wrapper" style="height: ${totalHeight}%; position: relative;">
|
|
2397
|
+
<span class="bar-value-top">${formatTokens(totalTokens[i])}</span>
|
|
2398
|
+
<div class="stacked-bar input-tokens"
|
|
2399
|
+
style="height: ${(inputHeight / totalHeight) * 100}%"
|
|
2400
|
+
data-tooltip="Input Tokens: ${formatTokens(inputTokens[i])} (${inputPercentage}% of total). Note: Models may use prompt caching to reduce processing time and costs for repeated content.">
|
|
2401
|
+
</div>
|
|
2402
|
+
<div class="stacked-bar output-tokens"
|
|
2403
|
+
style="height: ${(outputHeight / totalHeight) * 100}%"
|
|
2404
|
+
data-tooltip="Output Tokens: ${formatTokens(outputTokens[i])} (${outputPercentage}% of total)">
|
|
2405
|
+
</div>
|
|
2406
|
+
</div>
|
|
2407
|
+
</div>
|
|
2408
|
+
<div class="chart-label" title="${labelObj.full}">${labelObj.short}</div>
|
|
2409
|
+
</div>
|
|
2410
|
+
`;
|
|
2411
|
+
}).join('');
|
|
2412
|
+
|
|
2413
|
+
return `
|
|
2414
|
+
<div class="charts-section-vertical">
|
|
2415
|
+
<div class="chart-container-full">
|
|
2416
|
+
<h5>📊 Grade Comparison</h5>
|
|
2417
|
+
<div class="bar-chart quality-chart">
|
|
2418
|
+
${scoreChartBars}
|
|
2419
|
+
</div>
|
|
2420
|
+
</div>
|
|
2421
|
+
<div class="chart-container-full">
|
|
2422
|
+
<h5>⏱️ Latency Comparison</h5>
|
|
2423
|
+
<div class="bar-chart latency-chart">
|
|
2424
|
+
${latencyChartBars}
|
|
2425
|
+
</div>
|
|
2426
|
+
</div>
|
|
2427
|
+
<div class="chart-container-full">
|
|
2428
|
+
<h5>📈 Token Usage Comparison</h5>
|
|
2429
|
+
<div class="token-legend">
|
|
2430
|
+
<span class="legend-item"><span class="legend-color input-color"></span> <span data-tooltip="Input tokens processed. Note: Actual tokens processed may differ due to prompt caching, which can significantly reduce repeated content processing time and costs.">Input Tokens ℹ️</span></span>
|
|
2431
|
+
<span class="legend-item"><span class="legend-color output-color"></span> Output Tokens</span>
|
|
2432
|
+
</div>
|
|
2433
|
+
<div class="bar-chart token-chart">
|
|
2434
|
+
${tokenChartBars}
|
|
2435
|
+
</div>
|
|
2436
|
+
</div>
|
|
2437
|
+
<div class="chart-container-full">
|
|
2438
|
+
<h5>💰 Cost Comparison</h5>
|
|
2439
|
+
<div class="bar-chart cost-chart">
|
|
2440
|
+
${costChartBars}
|
|
2441
|
+
</div>
|
|
2442
|
+
</div>
|
|
2443
|
+
</div>
|
|
2444
|
+
`;
|
|
2445
|
+
}
|
|
2446
|
+
|
|
2447
|
+
generateAspectBreakdown(evaluations) {
|
|
2448
|
+
// Define the quality aspects we're tracking
|
|
2449
|
+
const aspects = [
|
|
2450
|
+
{ key: 'executive_summary_quality', label: 'Executive Summary', icon: '📋', tooltip: 'Quality of high-level summary and key takeaways' },
|
|
2451
|
+
{ key: 'detail_completeness', label: 'Detail Completeness', icon: '📝', tooltip: 'How well important details are captured and preserved' },
|
|
2452
|
+
{ key: 'action_items_structure', label: 'Action Items', icon: '✅', tooltip: 'Identification and clarity of action items and next steps' },
|
|
2453
|
+
{ key: 'key_decisions_clarity', label: 'Key Decisions', icon: '🎯', tooltip: 'Recognition and documentation of key decisions made' },
|
|
2454
|
+
{ key: 'participant_information', label: 'Participant Info', icon: '👥', tooltip: 'Quality of identifying participants and their contributions' },
|
|
2455
|
+
{ key: 'topic_organization', label: 'Topic Organization', icon: '📂', tooltip: 'Logical structure and organization of topics and themes' }
|
|
2456
|
+
];
|
|
2457
|
+
|
|
2458
|
+
// Collect aspect data from evaluations
|
|
2459
|
+
const aspectData = {};
|
|
2460
|
+
const modelScores = {};
|
|
2461
|
+
const modelGrades = {}; // Store grade scores for each model
|
|
2462
|
+
|
|
2463
|
+
evaluations.forEach(evalData => {
|
|
2464
|
+
// For consolidated reports, only process main evaluation entries to avoid double counting
|
|
2465
|
+
if (!this.isMainEvaluationEntry(evalData)) {
|
|
2466
|
+
return; // Skip individual meeting files
|
|
2467
|
+
}
|
|
2468
|
+
|
|
2469
|
+
// For consolidated reports, the actual evaluation data might be nested
|
|
2470
|
+
let modelName = evalData.tested_model || evalData.experiment_name || evalData.model || 'Unknown';
|
|
2471
|
+
modelName = modelName.replace('.experiment', '').replace('standup_meeting.', '');
|
|
2472
|
+
|
|
2473
|
+
// Get shortened display name
|
|
2474
|
+
let displayName = modelName;
|
|
2475
|
+
if (displayName.includes('Lemonade-')) {
|
|
2476
|
+
displayName = displayName.replace('Lemonade-', '');
|
|
2477
|
+
}
|
|
2478
|
+
if (displayName.includes('Claude-')) {
|
|
2479
|
+
displayName = displayName.replace('Claude-', 'Claude ');
|
|
2480
|
+
}
|
|
2481
|
+
if (displayName.includes('-Basic-Summary')) {
|
|
2482
|
+
displayName = displayName.replace('-Basic-Summary', '');
|
|
2483
|
+
}
|
|
2484
|
+
|
|
2485
|
+
modelScores[displayName] = {};
|
|
2486
|
+
|
|
2487
|
+
// Store the score (accuracy for Q&A, quality score for summarization) for this model
|
|
2488
|
+
if (evalData.overall_rating && evalData.overall_rating.metrics) {
|
|
2489
|
+
const metrics = evalData.overall_rating.metrics;
|
|
2490
|
+
const score = metrics.accuracy_percentage !== undefined ? metrics.accuracy_percentage : (metrics.quality_score || 0);
|
|
2491
|
+
modelGrades[displayName] = score;
|
|
2492
|
+
}
|
|
2493
|
+
|
|
2494
|
+
// Check if this evaluation has aspect_summary data (from consolidated report)
|
|
2495
|
+
if (evalData.aspect_summary) {
|
|
2496
|
+
aspects.forEach(aspect => {
|
|
2497
|
+
const aspectSummary = evalData.aspect_summary[aspect.key];
|
|
2498
|
+
if (aspectSummary && aspectSummary.most_common_rating) {
|
|
2499
|
+
const modeRating = aspectSummary.most_common_rating;
|
|
2500
|
+
modelScores[displayName][aspect.key] = modeRating;
|
|
2501
|
+
|
|
2502
|
+
// Track overall aspect performance using distribution
|
|
2503
|
+
if (!aspectData[aspect.key]) {
|
|
2504
|
+
aspectData[aspect.key] = { excellent: 0, good: 0, fair: 0, poor: 0 };
|
|
2505
|
+
}
|
|
2506
|
+
|
|
2507
|
+
// Add all ratings from the distribution
|
|
2508
|
+
const distribution = aspectSummary.rating_distribution || {};
|
|
2509
|
+
Object.entries(distribution).forEach(([rating, count]) => {
|
|
2510
|
+
if (aspectData[aspect.key][rating] !== undefined) {
|
|
2511
|
+
aspectData[aspect.key][rating] += count;
|
|
2512
|
+
}
|
|
2513
|
+
});
|
|
2514
|
+
}
|
|
2515
|
+
});
|
|
2516
|
+
}
|
|
2517
|
+
// Fallback: Check if this evaluation has per_question data (individual reports)
|
|
2518
|
+
else if (evalData.per_question && evalData.per_question.length > 0) {
|
|
2519
|
+
// Aggregate scores across all questions for this model
|
|
2520
|
+
const questionScores = {};
|
|
2521
|
+
|
|
2522
|
+
evalData.per_question.forEach(question => {
|
|
2523
|
+
if (question.analysis) {
|
|
2524
|
+
aspects.forEach(aspect => {
|
|
2525
|
+
const aspectResult = question.analysis[aspect.key];
|
|
2526
|
+
if (aspectResult && aspectResult.rating) {
|
|
2527
|
+
if (!questionScores[aspect.key]) {
|
|
2528
|
+
questionScores[aspect.key] = [];
|
|
2529
|
+
}
|
|
2530
|
+
questionScores[aspect.key].push(aspectResult.rating);
|
|
2531
|
+
}
|
|
2532
|
+
});
|
|
2533
|
+
}
|
|
2534
|
+
});
|
|
2535
|
+
|
|
2536
|
+
// Calculate mode (most common rating) for each aspect
|
|
2537
|
+
aspects.forEach(aspect => {
|
|
2538
|
+
if (questionScores[aspect.key] && questionScores[aspect.key].length > 0) {
|
|
2539
|
+
const ratings = questionScores[aspect.key];
|
|
2540
|
+
const ratingCounts = {};
|
|
2541
|
+
ratings.forEach(r => {
|
|
2542
|
+
ratingCounts[r] = (ratingCounts[r] || 0) + 1;
|
|
2543
|
+
});
|
|
2544
|
+
// Find most common rating
|
|
2545
|
+
let maxCount = 0;
|
|
2546
|
+
let modeRating = 'fair';
|
|
2547
|
+
Object.entries(ratingCounts).forEach(([rating, count]) => {
|
|
2548
|
+
if (count > maxCount) {
|
|
2549
|
+
maxCount = count;
|
|
2550
|
+
modeRating = rating;
|
|
2551
|
+
}
|
|
2552
|
+
});
|
|
2553
|
+
modelScores[displayName][aspect.key] = modeRating;
|
|
2554
|
+
|
|
2555
|
+
// Track overall aspect performance
|
|
2556
|
+
if (!aspectData[aspect.key]) {
|
|
2557
|
+
aspectData[aspect.key] = { excellent: 0, good: 0, fair: 0, poor: 0 };
|
|
2558
|
+
}
|
|
2559
|
+
aspectData[aspect.key][modeRating]++;
|
|
2560
|
+
}
|
|
2561
|
+
});
|
|
2562
|
+
}
|
|
2563
|
+
});
|
|
2564
|
+
|
|
2565
|
+
// Generate the aspect breakdown visualization
|
|
2566
|
+
let aspectRows = '';
|
|
2567
|
+
aspects.forEach(aspect => {
|
|
2568
|
+
const data = aspectData[aspect.key] || { excellent: 0, good: 0, fair: 0, poor: 0 };
|
|
2569
|
+
const total = data.excellent + data.good + data.fair + data.poor;
|
|
2570
|
+
|
|
2571
|
+
if (total > 0) {
|
|
2572
|
+
// Group models by their rating for this aspect
|
|
2573
|
+
const modelsByRating = {
|
|
2574
|
+
excellent: [],
|
|
2575
|
+
good: [],
|
|
2576
|
+
fair: [],
|
|
2577
|
+
poor: []
|
|
2578
|
+
};
|
|
2579
|
+
|
|
2580
|
+
Object.entries(modelScores).forEach(([model, scores]) => {
|
|
2581
|
+
const rating = scores[aspect.key];
|
|
2582
|
+
if (rating && modelsByRating[rating]) {
|
|
2583
|
+
modelsByRating[rating].push(model);
|
|
2584
|
+
}
|
|
2585
|
+
});
|
|
2586
|
+
|
|
2587
|
+
// Create detailed tooltips for each rating level
|
|
2588
|
+
const excellentTooltip = data.excellent > 0 ?
|
|
2589
|
+
`Excellent (${data.excellent} ${data.excellent === 1 ? 'summary' : 'summaries'}):
|
|
2590
|
+
|
|
2591
|
+
Models with excellent ${aspect.label.toLowerCase()}:
|
|
2592
|
+
• ${modelsByRating.excellent.join('\n• ')}
|
|
2593
|
+
|
|
2594
|
+
These models excel at ${aspect.tooltip.toLowerCase()}` : '';
|
|
2595
|
+
|
|
2596
|
+
const goodTooltip = data.good > 0 ?
|
|
2597
|
+
`Good (${data.good} ${data.good === 1 ? 'summary' : 'summaries'}):
|
|
2598
|
+
|
|
2599
|
+
Models with good ${aspect.label.toLowerCase()}:
|
|
2600
|
+
• ${modelsByRating.good.join('\n• ')}
|
|
2601
|
+
|
|
2602
|
+
These models perform well at ${aspect.tooltip.toLowerCase()}` : '';
|
|
2603
|
+
|
|
2604
|
+
const fairTooltip = data.fair > 0 ?
|
|
2605
|
+
`Fair (${data.fair} ${data.fair === 1 ? 'summary' : 'summaries'}):
|
|
2606
|
+
|
|
2607
|
+
Models with fair ${aspect.label.toLowerCase()}:
|
|
2608
|
+
• ${modelsByRating.fair.join('\n• ')}
|
|
2609
|
+
|
|
2610
|
+
These models need improvement at ${aspect.tooltip.toLowerCase()}` : '';
|
|
2611
|
+
|
|
2612
|
+
const poorTooltip = data.poor > 0 ?
|
|
2613
|
+
`Poor (${data.poor} ${data.poor === 1 ? 'summary' : 'summaries'}):
|
|
2614
|
+
|
|
2615
|
+
Models with poor ${aspect.label.toLowerCase()}:
|
|
2616
|
+
• ${modelsByRating.poor.join('\n• ')}
|
|
2617
|
+
|
|
2618
|
+
These models struggle with ${aspect.tooltip.toLowerCase()}` : '';
|
|
2619
|
+
|
|
2620
|
+
aspectRows += `
|
|
2621
|
+
<div class="aspect-row">
|
|
2622
|
+
<div class="aspect-header">
|
|
2623
|
+
<span class="aspect-icon">${aspect.icon}</span>
|
|
2624
|
+
<span class="aspect-label" data-tooltip="${aspect.tooltip}">${aspect.label}</span>
|
|
2625
|
+
</div>
|
|
2626
|
+
<div class="aspect-distribution">
|
|
2627
|
+
${data.excellent > 0 ? `<div class="aspect-bar excellent"
|
|
2628
|
+
style="width: ${(data.excellent/total*100)}%"
|
|
2629
|
+
data-tooltip="${excellentTooltip}">
|
|
2630
|
+
${data.excellent}
|
|
2631
|
+
</div>` : ''}
|
|
2632
|
+
${data.good > 0 ? `<div class="aspect-bar good"
|
|
2633
|
+
style="width: ${(data.good/total*100)}%"
|
|
2634
|
+
data-tooltip="${goodTooltip}">
|
|
2635
|
+
${data.good}
|
|
2636
|
+
</div>` : ''}
|
|
2637
|
+
${data.fair > 0 ? `<div class="aspect-bar fair"
|
|
2638
|
+
style="width: ${(data.fair/total*100)}%"
|
|
2639
|
+
data-tooltip="${fairTooltip}">
|
|
2640
|
+
${data.fair}
|
|
2641
|
+
</div>` : ''}
|
|
2642
|
+
${data.poor > 0 ? `<div class="aspect-bar poor"
|
|
2643
|
+
style="width: ${(data.poor/total*100)}%"
|
|
2644
|
+
data-tooltip="${poorTooltip}">
|
|
2645
|
+
${data.poor}
|
|
2646
|
+
</div>` : ''}
|
|
2647
|
+
</div>
|
|
2648
|
+
</div>
|
|
2649
|
+
`;
|
|
2650
|
+
}
|
|
2651
|
+
});
|
|
2652
|
+
|
|
2653
|
+
// Create model-aspect matrix with clean table format (Model Performance Summary table)
|
|
2654
|
+
// NOTE: The Score column here is calculated the same way as in the Model Performance Comparison table above.
|
|
2655
|
+
// Both tables use the same formula: Score = ((E×4 + G×3 + F×2 + P×1) / Total - 1) / 3 × 100
|
|
2656
|
+
// The counts E, G, F, P shown in the Performance column of the Comparison table are used here.
|
|
2657
|
+
let matrixHtml = '';
|
|
2658
|
+
if (Object.keys(modelScores).length > 0) {
|
|
2659
|
+
// Create table header with Score column
|
|
2660
|
+
let headerRow = '<tr><th class="model-header">Model</th>';
|
|
2661
|
+
headerRow += '<th class="grade-header" data-tooltip="Quality score: ((E×4 + G×3 + F×2 + P×1) / Total - 1) / 3 × 100. Normalizes 1-4 scale to 0-100%. Excellent=100%, Good=67%, Fair=33%, Poor=0%">Score ℹ️</th>'; // Add Score column with tooltip
|
|
2662
|
+
aspects.forEach(aspect => {
|
|
2663
|
+
headerRow += `<th class="aspect-header" data-tooltip="${aspect.tooltip}">${aspect.label.replace(' Quality', '').replace(' Structure', '').replace(' Information', '')}</th>`;
|
|
2664
|
+
});
|
|
2665
|
+
headerRow += '</tr>';
|
|
2666
|
+
|
|
2667
|
+
// Create table rows
|
|
2668
|
+
let tableRows = '';
|
|
2669
|
+
Object.entries(modelScores).forEach(([model, scores]) => {
|
|
2670
|
+
tableRows += `<tr><td class="model-name">${model}</td>`;
|
|
2671
|
+
|
|
2672
|
+
// Add score cell with detailed calculation
|
|
2673
|
+
const score = modelGrades[model] || 0;
|
|
2674
|
+
const scoreClass = score >= 85 ? 'cell-excellent' :
|
|
2675
|
+
score >= 70 ? 'cell-good' :
|
|
2676
|
+
score >= 50 ? 'cell-fair' : 'cell-poor';
|
|
2677
|
+
|
|
2678
|
+
// Find the evaluation data for this model to get rating counts
|
|
2679
|
+
const evalForModel = evaluations.find(e => {
|
|
2680
|
+
const expName = e.experiment_name || '';
|
|
2681
|
+
return expName.includes(model.split(' ')[0]);
|
|
2682
|
+
});
|
|
2683
|
+
const metrics = evalForModel?.overall_rating?.metrics || {};
|
|
2684
|
+
const exc = metrics.excellent_count || 0;
|
|
2685
|
+
const good = metrics.good_count || 0;
|
|
2686
|
+
const fair = metrics.fair_count || 0;
|
|
2687
|
+
const poor = metrics.poor_count || 0;
|
|
2688
|
+
const total = exc + good + fair + poor;
|
|
2689
|
+
|
|
2690
|
+
// Calculate raw score and show actual formula
|
|
2691
|
+
const rawScore = total > 0 ? (exc * 4 + good * 3 + fair * 2 + poor * 1) / total : 0;
|
|
2692
|
+
const tooltip = `Calculation: ((E:${exc}×4 + G:${good}×3 + F:${fair}×2 + P:${poor}×1) / ${total} - 1) / 3 × 100 = ((${rawScore.toFixed(2)} - 1) / 3) × 100 = ${Math.round(score)}%`;
|
|
2693
|
+
tableRows += `<td class="${scoreClass} grade-cell" title="${tooltip}">${Math.round(score)}%</td>`;
|
|
2694
|
+
|
|
2695
|
+
// Add aspect rating cells
|
|
2696
|
+
aspects.forEach(aspect => {
|
|
2697
|
+
const rating = scores[aspect.key] || 'unknown';
|
|
2698
|
+
const ratingClass = `cell-${rating}`;
|
|
2699
|
+
tableRows += `<td class="${ratingClass}" title="${aspect.label}: ${rating}">${rating}</td>`;
|
|
2700
|
+
});
|
|
2701
|
+
tableRows += '</tr>';
|
|
2702
|
+
});
|
|
2703
|
+
|
|
2704
|
+
// Build score calculation details as collapsible section
|
|
2705
|
+
let scoreDetails = `
|
|
2706
|
+
<div class="grade-calculation-details">
|
|
2707
|
+
<div class="grade-calc-header" onclick="this.parentElement.classList.toggle('expanded')">
|
|
2708
|
+
<h6>📐 Score Calculation Formula</h6>
|
|
2709
|
+
<span class="toggle-icon">▶</span>
|
|
2710
|
+
</div>
|
|
2711
|
+
<div class="grade-calc-content">
|
|
2712
|
+
<div class="formula-explanation">
|
|
2713
|
+
<code>Score = ((E×4 + G×3 + F×2 + P×1) / Total - 1) / 3 × 100</code>
|
|
2714
|
+
<div class="formula-legend">
|
|
2715
|
+
<span><strong>E</strong> = Excellent count</span>
|
|
2716
|
+
<span><strong>G</strong> = Good count</span>
|
|
2717
|
+
<span><strong>F</strong> = Fair count</span>
|
|
2718
|
+
<span><strong>P</strong> = Poor count</span>
|
|
2719
|
+
<span><strong>Total</strong> = E + G + F + P</span>
|
|
2720
|
+
</div>
|
|
2721
|
+
<div class="formula-note">
|
|
2722
|
+
<p><strong>Why this formula?</strong> Evaluations use a 1-4 rating scale (Poor=1, Fair=2, Good=3, Excellent=4). To convert to a 0-100% score, we calculate the average rating, subtract 1 (making it 0-3), divide by 3 (normalizing to 0-1), then multiply by 100.</p>
|
|
2723
|
+
<p><strong>Result:</strong> Excellent=100%, Good=67%, Fair=33%, Poor=0%</p>
|
|
2724
|
+
<p><strong>Note:</strong> This Score is the same as the "Score" column shown in the Model Performance Comparison table above, which is computed from the same performance rating counts (Excellent, Good, Fair, Poor).</p>
|
|
2725
|
+
</div>
|
|
2726
|
+
</div>
|
|
2727
|
+
`;
|
|
2728
|
+
|
|
2729
|
+
// Add calculation for each model
|
|
2730
|
+
Object.entries(modelScores).forEach(([model, scores]) => {
|
|
2731
|
+
const evalForModel = evaluations.find(e => {
|
|
2732
|
+
const expName = e.experiment_name || '';
|
|
2733
|
+
return expName.includes(model.split(' ')[0]);
|
|
2734
|
+
});
|
|
2735
|
+
const metrics = evalForModel?.overall_rating?.metrics || {};
|
|
2736
|
+
const exc = metrics.excellent_count || 0;
|
|
2737
|
+
const good = metrics.good_count || 0;
|
|
2738
|
+
const fair = metrics.fair_count || 0;
|
|
2739
|
+
const poor = metrics.poor_count || 0;
|
|
2740
|
+
const total = exc + good + fair + poor;
|
|
2741
|
+
const score = modelGrades[model] || 0;
|
|
2742
|
+
|
|
2743
|
+
if (total > 0) {
|
|
2744
|
+
const rawScore = (exc * 4 + good * 3 + fair * 2 + poor * 1) / total;
|
|
2745
|
+
const normalized = (rawScore - 1) / 3 * 100;
|
|
2746
|
+
scoreDetails += `
|
|
2747
|
+
<div class="grade-calc-item">
|
|
2748
|
+
<div class="model-calc-header"><strong>${model}</strong></div>
|
|
2749
|
+
<div class="calc-step">
|
|
2750
|
+
<span class="step-label">With actual data:</span>
|
|
2751
|
+
<code>((E:${exc}×4 + G:${good}×3 + F:${fair}×2 + P:${poor}×1) / ${total} - 1) / 3 × 100</code>
|
|
2752
|
+
</div>
|
|
2753
|
+
<div class="calc-step">
|
|
2754
|
+
<span class="step-label">Simplified:</span>
|
|
2755
|
+
<code>((${rawScore.toFixed(2)} - 1) / 3) × 100</code>
|
|
2756
|
+
= <code>${normalized.toFixed(2)}%</code>
|
|
2757
|
+
≈ <strong>${Math.round(score)}%</strong>
|
|
2758
|
+
</div>
|
|
2759
|
+
</div>
|
|
2760
|
+
`;
|
|
2761
|
+
}
|
|
2762
|
+
});
|
|
2763
|
+
|
|
2764
|
+
scoreDetails += `
|
|
2765
|
+
</div>
|
|
2766
|
+
</div>
|
|
2767
|
+
`;
|
|
2768
|
+
|
|
2769
|
+
matrixHtml = `
|
|
2770
|
+
<div class="clean-matrix-container">
|
|
2771
|
+
<h5>🎯 Model Performance Summary</h5>
|
|
2772
|
+
<table class="clean-performance-matrix">
|
|
2773
|
+
<thead>${headerRow}</thead>
|
|
2774
|
+
<tbody>${tableRows}</tbody>
|
|
2775
|
+
</table>
|
|
2776
|
+
${scoreDetails}
|
|
2777
|
+
</div>
|
|
2778
|
+
`;
|
|
2779
|
+
}
|
|
2780
|
+
|
|
2781
|
+
return aspectRows ? `
|
|
2782
|
+
<div class="aspect-breakdown-section">
|
|
2783
|
+
<h4>🔍 Quality Aspect Analysis</h4>
|
|
2784
|
+
<div class="aspect-breakdown">
|
|
2785
|
+
${aspectRows}
|
|
2786
|
+
</div>
|
|
2787
|
+
${matrixHtml}
|
|
2788
|
+
</div>
|
|
2789
|
+
` : '';
|
|
2790
|
+
}
|
|
2791
|
+
|
|
2792
|
+
showMessage(message) {
|
|
2793
|
+
// Simple message display
|
|
2794
|
+
console.log(message);
|
|
2795
|
+
const msg = document.createElement('div');
|
|
2796
|
+
msg.textContent = message;
|
|
2797
|
+
msg.style.cssText = `
|
|
2798
|
+
position: fixed;
|
|
2799
|
+
top: 20px;
|
|
2800
|
+
right: 20px;
|
|
2801
|
+
background: #28a745;
|
|
2802
|
+
color: white;
|
|
2803
|
+
padding: 10px 20px;
|
|
2804
|
+
border-radius: 5px;
|
|
2805
|
+
z-index: 1000;
|
|
2806
|
+
`;
|
|
2807
|
+
document.body.appendChild(msg);
|
|
2808
|
+
setTimeout(() => msg.remove(), 3000);
|
|
2809
|
+
}
|
|
2810
|
+
|
|
2811
|
+
showProgress(message) {
|
|
2812
|
+
const progress = document.createElement('div');
|
|
2813
|
+
progress.className = 'export-progress';
|
|
2814
|
+
progress.innerHTML = `
|
|
2815
|
+
<h3>${message}</h3>
|
|
2816
|
+
<div class="export-spinner"></div>
|
|
2817
|
+
`;
|
|
2818
|
+
document.body.appendChild(progress);
|
|
2819
|
+
return progress;
|
|
2820
|
+
}
|
|
2821
|
+
|
|
2822
|
+
async exportReportAsPNG(reportId) {
|
|
2823
|
+
const progress = this.showProgress('Generating PNG...');
|
|
2824
|
+
|
|
2825
|
+
try {
|
|
2826
|
+
// Get the specific report card
|
|
2827
|
+
const element = document.querySelector(`.report-card[data-report-id="${reportId}"]`);
|
|
2828
|
+
|
|
2829
|
+
if (!element) {
|
|
2830
|
+
throw new Error('Report not found');
|
|
2831
|
+
}
|
|
2832
|
+
|
|
2833
|
+
// Clone the element to manipulate it without affecting the display
|
|
2834
|
+
const clonedElement = element.cloneNode(true);
|
|
2835
|
+
|
|
2836
|
+
// Add export-ready class to ensure proper styling
|
|
2837
|
+
clonedElement.classList.add('export-ready');
|
|
2838
|
+
|
|
2839
|
+
// Create a temporary container off-screen
|
|
2840
|
+
const tempContainer = document.createElement('div');
|
|
2841
|
+
tempContainer.style.cssText = `
|
|
2842
|
+
position: absolute;
|
|
2843
|
+
left: -9999px;
|
|
2844
|
+
top: 0;
|
|
2845
|
+
width: ${Math.max(element.scrollWidth, 1600)}px;
|
|
2846
|
+
background: white;
|
|
2847
|
+
`;
|
|
2848
|
+
document.body.appendChild(tempContainer);
|
|
2849
|
+
tempContainer.appendChild(clonedElement);
|
|
2850
|
+
|
|
2851
|
+
// Expand all collapsible sections in the clone
|
|
2852
|
+
const collapsibleContents = clonedElement.querySelectorAll('.collapsible-content');
|
|
2853
|
+
const collapsibleToggles = clonedElement.querySelectorAll('.collapsible-toggle');
|
|
2854
|
+
|
|
2855
|
+
collapsibleContents.forEach(content => {
|
|
2856
|
+
content.classList.add('expanded');
|
|
2857
|
+
content.style.maxHeight = 'none';
|
|
2858
|
+
content.style.overflow = 'visible';
|
|
2859
|
+
});
|
|
2860
|
+
|
|
2861
|
+
collapsibleToggles.forEach(toggle => {
|
|
2862
|
+
toggle.classList.add('expanded');
|
|
2863
|
+
toggle.textContent = '▼';
|
|
2864
|
+
});
|
|
2865
|
+
|
|
2866
|
+
// Ensure all content is visible
|
|
2867
|
+
clonedElement.style.overflow = 'visible';
|
|
2868
|
+
clonedElement.style.width = '100%';
|
|
2869
|
+
clonedElement.style.maxWidth = 'none';
|
|
2870
|
+
|
|
2871
|
+
const reportContent = clonedElement.querySelector('.report-content');
|
|
2872
|
+
if (reportContent) {
|
|
2873
|
+
reportContent.style.overflow = 'visible';
|
|
2874
|
+
reportContent.style.maxWidth = 'none';
|
|
2875
|
+
}
|
|
2876
|
+
|
|
2877
|
+
// Ensure tables and charts are fully visible
|
|
2878
|
+
const tables = clonedElement.querySelectorAll('table');
|
|
2879
|
+
tables.forEach(table => {
|
|
2880
|
+
table.style.width = '100%';
|
|
2881
|
+
table.style.maxWidth = 'none';
|
|
2882
|
+
});
|
|
2883
|
+
|
|
2884
|
+
const chartContainers = clonedElement.querySelectorAll('.chart-container, .chart-container-full, .charts-section-vertical');
|
|
2885
|
+
chartContainers.forEach(container => {
|
|
2886
|
+
container.style.overflow = 'visible';
|
|
2887
|
+
container.style.maxWidth = 'none';
|
|
2888
|
+
});
|
|
2889
|
+
|
|
2890
|
+
// Wait a bit for any dynamic content to render
|
|
2891
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
2892
|
+
|
|
2893
|
+
// Force layout recalculation
|
|
2894
|
+
clonedElement.offsetHeight;
|
|
2895
|
+
|
|
2896
|
+
// Get actual dimensions after all styles are applied
|
|
2897
|
+
const actualWidth = Math.max(clonedElement.scrollWidth, clonedElement.offsetWidth, 1600);
|
|
2898
|
+
const actualHeight = clonedElement.scrollHeight;
|
|
2899
|
+
|
|
2900
|
+
// Use html2canvas to capture the cloned element
|
|
2901
|
+
const canvas = await html2canvas(clonedElement, {
|
|
2902
|
+
scale: 2, // Higher quality
|
|
2903
|
+
logging: false,
|
|
2904
|
+
backgroundColor: '#ffffff',
|
|
2905
|
+
width: actualWidth,
|
|
2906
|
+
height: actualHeight,
|
|
2907
|
+
windowWidth: actualWidth,
|
|
2908
|
+
windowHeight: actualHeight,
|
|
2909
|
+
useCORS: true,
|
|
2910
|
+
allowTaint: true,
|
|
2911
|
+
scrollX: 0,
|
|
2912
|
+
scrollY: 0
|
|
2913
|
+
});
|
|
2914
|
+
|
|
2915
|
+
// Clean up the temporary container
|
|
2916
|
+
document.body.removeChild(tempContainer);
|
|
2917
|
+
|
|
2918
|
+
// Convert to blob and download
|
|
2919
|
+
canvas.toBlob((blob) => {
|
|
2920
|
+
const url = URL.createObjectURL(blob);
|
|
2921
|
+
const link = document.createElement('a');
|
|
2922
|
+
link.download = `gaia-report-${reportId}-${new Date().toISOString().slice(0,10)}.png`;
|
|
2923
|
+
link.href = url;
|
|
2924
|
+
link.click();
|
|
2925
|
+
URL.revokeObjectURL(url);
|
|
2926
|
+
|
|
2927
|
+
progress.remove();
|
|
2928
|
+
this.showMessage('PNG exported successfully!');
|
|
2929
|
+
});
|
|
2930
|
+
} catch (error) {
|
|
2931
|
+
console.error('Failed to export PNG:', error);
|
|
2932
|
+
progress.remove();
|
|
2933
|
+
this.showError(`Failed to export PNG: ${error.message}`);
|
|
2934
|
+
}
|
|
2935
|
+
}
|
|
2936
|
+
|
|
2937
|
+
async exportReportAsPDF(reportId) {
|
|
2938
|
+
const progress = this.showProgress('Generating PDF...');
|
|
2939
|
+
|
|
2940
|
+
try {
|
|
2941
|
+
// Get the specific report card
|
|
2942
|
+
const element = document.querySelector(`.report-card[data-report-id="${reportId}"]`);
|
|
2943
|
+
|
|
2944
|
+
if (!element) {
|
|
2945
|
+
throw new Error('Report not found');
|
|
2946
|
+
}
|
|
2947
|
+
|
|
2948
|
+
// Initialize jsPDF
|
|
2949
|
+
const { jsPDF } = window.jspdf;
|
|
2950
|
+
const pdf = new jsPDF('p', 'mm', 'a4');
|
|
2951
|
+
|
|
2952
|
+
// PDF dimensions
|
|
2953
|
+
const pdfWidth = 210; // A4 width in mm
|
|
2954
|
+
const pdfHeight = 297; // A4 height in mm
|
|
2955
|
+
const margin = 10; // mm margin
|
|
2956
|
+
const contentWidth = pdfWidth - (2 * margin);
|
|
2957
|
+
const contentHeight = pdfHeight - (2 * margin);
|
|
2958
|
+
|
|
2959
|
+
// Capture the entire report first
|
|
2960
|
+
const fullCanvas = await html2canvas(element, {
|
|
2961
|
+
scale: 2,
|
|
2962
|
+
logging: false,
|
|
2963
|
+
backgroundColor: '#ffffff',
|
|
2964
|
+
windowWidth: element.scrollWidth,
|
|
2965
|
+
windowHeight: element.scrollHeight,
|
|
2966
|
+
useCORS: true,
|
|
2967
|
+
allowTaint: true
|
|
2968
|
+
});
|
|
2969
|
+
|
|
2970
|
+
// Calculate total height needed
|
|
2971
|
+
const totalHeight = (fullCanvas.height * contentWidth) / fullCanvas.width;
|
|
2972
|
+
const pageCount = Math.ceil(totalHeight / contentHeight);
|
|
2973
|
+
|
|
2974
|
+
// Add pages with smart breaks
|
|
2975
|
+
for (let page = 0; page < pageCount; page++) {
|
|
2976
|
+
if (page > 0) {
|
|
2977
|
+
pdf.addPage();
|
|
2978
|
+
}
|
|
2979
|
+
|
|
2980
|
+
// Calculate the portion of the image to use for this page
|
|
2981
|
+
const sourceY = (page * contentHeight * fullCanvas.width) / contentWidth;
|
|
2982
|
+
const sourceHeight = Math.min(
|
|
2983
|
+
(contentHeight * fullCanvas.width) / contentWidth,
|
|
2984
|
+
fullCanvas.height - sourceY
|
|
2985
|
+
);
|
|
2986
|
+
|
|
2987
|
+
// Create a temporary canvas for this page's content
|
|
2988
|
+
const pageCanvas = document.createElement('canvas');
|
|
2989
|
+
const pageCtx = pageCanvas.getContext('2d');
|
|
2990
|
+
pageCanvas.width = fullCanvas.width;
|
|
2991
|
+
pageCanvas.height = sourceHeight;
|
|
2992
|
+
|
|
2993
|
+
// Draw the portion of the full canvas onto the page canvas
|
|
2994
|
+
pageCtx.drawImage(
|
|
2995
|
+
fullCanvas,
|
|
2996
|
+
0, sourceY, fullCanvas.width, sourceHeight,
|
|
2997
|
+
0, 0, fullCanvas.width, sourceHeight
|
|
2998
|
+
);
|
|
2999
|
+
|
|
3000
|
+
// Convert to image and add to PDF
|
|
3001
|
+
const imgData = pageCanvas.toDataURL('image/png');
|
|
3002
|
+
const imgHeight = (sourceHeight * contentWidth) / fullCanvas.width;
|
|
3003
|
+
pdf.addImage(imgData, 'PNG', margin, margin, contentWidth, imgHeight);
|
|
3004
|
+
}
|
|
3005
|
+
|
|
3006
|
+
// Save the PDF
|
|
3007
|
+
pdf.save(`gaia-report-${reportId}-${new Date().toISOString().slice(0,10)}.pdf`);
|
|
3008
|
+
|
|
3009
|
+
progress.remove();
|
|
3010
|
+
this.showMessage('PDF exported successfully!');
|
|
3011
|
+
} catch (error) {
|
|
3012
|
+
console.error('Failed to export PDF:', error);
|
|
3013
|
+
progress.remove();
|
|
3014
|
+
this.showError(`Failed to export PDF: ${error.message}`);
|
|
3015
|
+
}
|
|
3016
|
+
}
|
|
3017
|
+
|
|
3018
|
+
// Agent Output Helper Methods
|
|
3019
|
+
generateAgentSummarySection(summary) {
|
|
3020
|
+
const statusClass = summary.status === 'success' ? 'success' : 'error';
|
|
3021
|
+
const statusIcon = summary.status === 'success' ? '✅' : '❌';
|
|
3022
|
+
|
|
3023
|
+
return `
|
|
3024
|
+
<div class="section">
|
|
3025
|
+
<h4>📊 Execution Summary</h4>
|
|
3026
|
+
<div class="summary-status-banner ${statusClass}">
|
|
3027
|
+
<div class="status-icon">${statusIcon}</div>
|
|
3028
|
+
<div class="status-details">
|
|
3029
|
+
<div class="status-text">${summary.status.toUpperCase()}</div>
|
|
3030
|
+
<div class="status-result">${summary.result}</div>
|
|
3031
|
+
</div>
|
|
3032
|
+
</div>
|
|
3033
|
+
<div class="metrics-grid">
|
|
3034
|
+
<div class="metric">
|
|
3035
|
+
<span class="metric-label">Steps Taken</span>
|
|
3036
|
+
<span class="metric-value">${summary.steps_taken}</span>
|
|
3037
|
+
</div>
|
|
3038
|
+
<div class="metric">
|
|
3039
|
+
<span class="metric-label">Total Messages</span>
|
|
3040
|
+
<span class="metric-value">${summary.conversation_length}</span>
|
|
3041
|
+
</div>
|
|
3042
|
+
<div class="metric">
|
|
3043
|
+
<span class="metric-label">Tool Calls</span>
|
|
3044
|
+
<span class="metric-value">${summary.tool_calls_count}</span>
|
|
3045
|
+
</div>
|
|
3046
|
+
<div class="metric">
|
|
3047
|
+
<span class="metric-label">Error Count</span>
|
|
3048
|
+
<span class="metric-value ${summary.error_count > 0 ? 'error' : 'success'}">${summary.error_count}</span>
|
|
3049
|
+
</div>
|
|
3050
|
+
</div>
|
|
3051
|
+
</div>
|
|
3052
|
+
`;
|
|
3053
|
+
}
|
|
3054
|
+
|
|
3055
|
+
generateConversationFlowSection(conversation) {
|
|
3056
|
+
let flowHtml = '';
|
|
3057
|
+
|
|
3058
|
+
conversation.forEach((msg, index) => {
|
|
3059
|
+
let messageClass = '';
|
|
3060
|
+
let roleLabel = '';
|
|
3061
|
+
let content = '';
|
|
3062
|
+
|
|
3063
|
+
if (msg.role === 'user') {
|
|
3064
|
+
messageClass = 'user-message';
|
|
3065
|
+
roleLabel = 'User';
|
|
3066
|
+
content = `<div class="message-text">${this.escapeHtml(msg.content)}</div>`;
|
|
3067
|
+
} else if (msg.role === 'assistant') {
|
|
3068
|
+
messageClass = 'assistant-message';
|
|
3069
|
+
roleLabel = 'Assistant';
|
|
3070
|
+
if (typeof msg.content === 'object') {
|
|
3071
|
+
if (msg.content.thought && msg.content.goal) {
|
|
3072
|
+
content = `<div class="assistant-reasoning">`;
|
|
3073
|
+
content += `<div class="reasoning-item"><span class="reasoning-label">💭 Thought:</span> ${this.escapeHtml(msg.content.thought)}</div>`;
|
|
3074
|
+
content += `<div class="reasoning-item"><span class="reasoning-label">🎯 Goal:</span> ${this.escapeHtml(msg.content.goal)}</div>`;
|
|
3075
|
+
|
|
3076
|
+
if (msg.content.tool) {
|
|
3077
|
+
content += `<div class="tool-invocation">`;
|
|
3078
|
+
content += `<div class="tool-name-inline">🔧 ${msg.content.tool}</div>`;
|
|
3079
|
+
if (msg.content.tool_args) {
|
|
3080
|
+
content += `<pre class="tool-args-inline">${JSON.stringify(msg.content.tool_args, null, 2)}</pre>`;
|
|
3081
|
+
}
|
|
3082
|
+
content += `</div>`;
|
|
3083
|
+
}
|
|
3084
|
+
if (msg.content.plan) {
|
|
3085
|
+
content += `<details class="plan-details">`;
|
|
3086
|
+
content += `<summary>📋 Execution Plan</summary>`;
|
|
3087
|
+
content += `<pre class="plan-content">${JSON.stringify(msg.content.plan, null, 2)}</pre>`;
|
|
3088
|
+
content += `</details>`;
|
|
3089
|
+
}
|
|
3090
|
+
if (msg.content.answer) {
|
|
3091
|
+
content += `<div class="final-answer">`;
|
|
3092
|
+
content += `<span class="answer-label">✅ Final Answer:</span>`;
|
|
3093
|
+
content += `<div class="answer-text">${this.escapeHtml(msg.content.answer)}</div>`;
|
|
3094
|
+
content += `</div>`;
|
|
3095
|
+
}
|
|
3096
|
+
content += `</div>`;
|
|
3097
|
+
} else {
|
|
3098
|
+
content = `<pre class="json-content">${JSON.stringify(msg.content, null, 2)}</pre>`;
|
|
3099
|
+
}
|
|
3100
|
+
} else {
|
|
3101
|
+
content = `<div class="message-text">${this.escapeHtml(msg.content)}</div>`;
|
|
3102
|
+
}
|
|
3103
|
+
} else if (msg.role === 'system') {
|
|
3104
|
+
messageClass = 'system-message';
|
|
3105
|
+
roleLabel = 'System';
|
|
3106
|
+
if (msg.content?.type === 'stats') {
|
|
3107
|
+
const stats = msg.content.performance_stats;
|
|
3108
|
+
content = `
|
|
3109
|
+
<div class="stats-badge">
|
|
3110
|
+
<div class="stats-header">📊 Performance Metrics (Step ${msg.content.step})</div>
|
|
3111
|
+
<div class="stats-grid">
|
|
3112
|
+
<div class="stat-item">
|
|
3113
|
+
<span class="stat-label">Input</span>
|
|
3114
|
+
<span class="stat-value">${stats.input_tokens.toLocaleString()}</span>
|
|
3115
|
+
</div>
|
|
3116
|
+
<div class="stat-item">
|
|
3117
|
+
<span class="stat-label">Output</span>
|
|
3118
|
+
<span class="stat-value">${stats.output_tokens.toLocaleString()}</span>
|
|
3119
|
+
</div>
|
|
3120
|
+
<div class="stat-item">
|
|
3121
|
+
<span class="stat-label">TTFT</span>
|
|
3122
|
+
<span class="stat-value">${stats.time_to_first_token.toFixed(2)}s</span>
|
|
3123
|
+
</div>
|
|
3124
|
+
<div class="stat-item">
|
|
3125
|
+
<span class="stat-label">Speed</span>
|
|
3126
|
+
<span class="stat-value">${stats.tokens_per_second.toFixed(0)} t/s</span>
|
|
3127
|
+
</div>
|
|
3128
|
+
</div>
|
|
3129
|
+
</div>
|
|
3130
|
+
`;
|
|
3131
|
+
} else if (msg.content?.issues) {
|
|
3132
|
+
const issues = msg.content.issues || [];
|
|
3133
|
+
content = `
|
|
3134
|
+
<div class="tool-result">
|
|
3135
|
+
<div class="result-header">✅ Jira Search Results (${msg.content.total} found)</div>
|
|
3136
|
+
${issues.length > 0 ? `
|
|
3137
|
+
<div class="issues-list">
|
|
3138
|
+
${issues.map(issue => `
|
|
3139
|
+
<div class="issue-item">
|
|
3140
|
+
<span class="issue-key">${issue.key}</span>
|
|
3141
|
+
<span class="issue-summary">${this.escapeHtml(issue.summary)}</span>
|
|
3142
|
+
<span class="issue-status ${issue.status.toLowerCase().replace(' ', '-')}">${issue.status}</span>
|
|
3143
|
+
</div>
|
|
3144
|
+
`).join('')}
|
|
3145
|
+
</div>
|
|
3146
|
+
` : '<div class="no-results">No issues found</div>'}
|
|
3147
|
+
</div>
|
|
3148
|
+
`;
|
|
3149
|
+
} else if (msg.content?.status === 'success') {
|
|
3150
|
+
content = `
|
|
3151
|
+
<div class="tool-result success">
|
|
3152
|
+
<div class="result-header">✅ Tool Execution Success</div>
|
|
3153
|
+
<pre class="result-data">${JSON.stringify(msg.content, null, 2)}</pre>
|
|
3154
|
+
</div>
|
|
3155
|
+
`;
|
|
3156
|
+
} else {
|
|
3157
|
+
content = `<pre class="json-content">${JSON.stringify(msg.content, null, 2)}</pre>`;
|
|
3158
|
+
}
|
|
3159
|
+
}
|
|
3160
|
+
|
|
3161
|
+
flowHtml += `
|
|
3162
|
+
<div class="conversation-message ${messageClass}" data-index="${index}">
|
|
3163
|
+
<div class="message-header">
|
|
3164
|
+
<span class="message-role">${roleLabel}</span>
|
|
3165
|
+
<span class="message-number">#${index + 1}</span>
|
|
3166
|
+
</div>
|
|
3167
|
+
<div class="message-body">
|
|
3168
|
+
${content}
|
|
3169
|
+
</div>
|
|
3170
|
+
</div>
|
|
3171
|
+
`;
|
|
3172
|
+
});
|
|
3173
|
+
|
|
3174
|
+
return `
|
|
3175
|
+
<div class="section">
|
|
3176
|
+
<h4>💬 Conversation Flow</h4>
|
|
3177
|
+
<div class="conversation-flow">
|
|
3178
|
+
${flowHtml}
|
|
3179
|
+
</div>
|
|
3180
|
+
</div>
|
|
3181
|
+
`;
|
|
3182
|
+
}
|
|
3183
|
+
|
|
3184
|
+
escapeHtml(text) {
|
|
3185
|
+
if (!text) return '';
|
|
3186
|
+
const div = document.createElement('div');
|
|
3187
|
+
div.textContent = text;
|
|
3188
|
+
return div.innerHTML;
|
|
3189
|
+
}
|
|
3190
|
+
|
|
3191
|
+
generatePerformanceMetricsSection(performanceStats, totalInputTokens, totalOutputTokens, avgTokensPerSecond, avgTimeToFirstToken) {
|
|
3192
|
+
if (performanceStats.length === 0) {
|
|
3193
|
+
return `
|
|
3194
|
+
<div class="section">
|
|
3195
|
+
<h4>⚡ Performance Metrics</h4>
|
|
3196
|
+
<p style="color: #6c757d; font-style: italic;">No performance statistics available</p>
|
|
3197
|
+
</div>
|
|
3198
|
+
`;
|
|
3199
|
+
}
|
|
3200
|
+
|
|
3201
|
+
// Calculate min, max, and averages
|
|
3202
|
+
const inputTokensList = performanceStats.map(s => s.input_tokens);
|
|
3203
|
+
const outputTokensList = performanceStats.map(s => s.output_tokens);
|
|
3204
|
+
const ttftList = performanceStats.map(s => s.time_to_first_token);
|
|
3205
|
+
const speedList = performanceStats.map(s => s.tokens_per_second);
|
|
3206
|
+
|
|
3207
|
+
const stats = {
|
|
3208
|
+
input: {
|
|
3209
|
+
min: Math.min(...inputTokensList).toLocaleString(),
|
|
3210
|
+
max: Math.max(...inputTokensList).toLocaleString(),
|
|
3211
|
+
avg: Math.round(totalInputTokens / performanceStats.length).toLocaleString()
|
|
3212
|
+
},
|
|
3213
|
+
output: {
|
|
3214
|
+
min: Math.min(...outputTokensList).toLocaleString(),
|
|
3215
|
+
max: Math.max(...outputTokensList).toLocaleString(),
|
|
3216
|
+
avg: Math.round(totalOutputTokens / performanceStats.length).toLocaleString()
|
|
3217
|
+
},
|
|
3218
|
+
ttft: {
|
|
3219
|
+
min: Math.min(...ttftList).toFixed(3),
|
|
3220
|
+
max: Math.max(...ttftList).toFixed(3),
|
|
3221
|
+
avg: avgTimeToFirstToken.toFixed(3)
|
|
3222
|
+
},
|
|
3223
|
+
speed: {
|
|
3224
|
+
min: Math.min(...speedList).toFixed(1),
|
|
3225
|
+
max: Math.max(...speedList).toFixed(1),
|
|
3226
|
+
avg: avgTokensPerSecond.toFixed(1)
|
|
3227
|
+
}
|
|
3228
|
+
};
|
|
3229
|
+
|
|
3230
|
+
const totalTokens = totalInputTokens + totalOutputTokens;
|
|
3231
|
+
const inputPercentage = totalTokens > 0 ? (totalInputTokens / totalTokens * 100).toFixed(1) : 0;
|
|
3232
|
+
const outputPercentage = totalTokens > 0 ? (totalOutputTokens / totalTokens * 100).toFixed(1) : 0;
|
|
3233
|
+
|
|
3234
|
+
let stepsTableHtml = '';
|
|
3235
|
+
performanceStats.forEach((stats, index) => {
|
|
3236
|
+
stepsTableHtml += `
|
|
3237
|
+
<tr>
|
|
3238
|
+
<td class="step-number">${index + 1}</td>
|
|
3239
|
+
<td class="tokens-in">${stats.input_tokens.toLocaleString()}</td>
|
|
3240
|
+
<td class="tokens-out">${stats.output_tokens.toLocaleString()}</td>
|
|
3241
|
+
<td class="ttft">${stats.time_to_first_token.toFixed(2)}s</td>
|
|
3242
|
+
<td class="speed">${stats.tokens_per_second.toFixed(0)} t/s</td>
|
|
3243
|
+
</tr>
|
|
3244
|
+
`;
|
|
3245
|
+
});
|
|
3246
|
+
|
|
3247
|
+
return `
|
|
3248
|
+
<div class="section">
|
|
3249
|
+
<h4>⚡ Performance Metrics</h4>
|
|
3250
|
+
<div class="performance-summary">
|
|
3251
|
+
<div class="token-overview">
|
|
3252
|
+
<h5>Token Summary</h5>
|
|
3253
|
+
<div class="metrics-grid">
|
|
3254
|
+
<div class="metric">
|
|
3255
|
+
<span class="metric-label">Total Tokens</span>
|
|
3256
|
+
<span class="metric-value">${totalTokens.toLocaleString()}</span>
|
|
3257
|
+
</div>
|
|
3258
|
+
<div class="metric">
|
|
3259
|
+
<span class="metric-label">Total Input</span>
|
|
3260
|
+
<span class="metric-value">${totalInputTokens.toLocaleString()} (${inputPercentage}%)</span>
|
|
3261
|
+
</div>
|
|
3262
|
+
<div class="metric">
|
|
3263
|
+
<span class="metric-label">Total Output</span>
|
|
3264
|
+
<span class="metric-value">${totalOutputTokens.toLocaleString()} (${outputPercentage}%)</span>
|
|
3265
|
+
</div>
|
|
3266
|
+
</div>
|
|
3267
|
+
</div>
|
|
3268
|
+
|
|
3269
|
+
<div class="detailed-stats">
|
|
3270
|
+
<h5>Detailed Statistics (Min / Avg / Max)</h5>
|
|
3271
|
+
<table class="stats-summary-table">
|
|
3272
|
+
<thead>
|
|
3273
|
+
<tr>
|
|
3274
|
+
<th>Metric</th>
|
|
3275
|
+
<th>Min</th>
|
|
3276
|
+
<th>Average</th>
|
|
3277
|
+
<th>Max</th>
|
|
3278
|
+
</tr>
|
|
3279
|
+
</thead>
|
|
3280
|
+
<tbody>
|
|
3281
|
+
<tr class="metric-tokens">
|
|
3282
|
+
<td><strong>Input Tokens</strong></td>
|
|
3283
|
+
<td class="stat-min">${stats.input.min}</td>
|
|
3284
|
+
<td class="stat-avg">${stats.input.avg}</td>
|
|
3285
|
+
<td class="stat-max">${stats.input.max}</td>
|
|
3286
|
+
</tr>
|
|
3287
|
+
<tr class="metric-tokens">
|
|
3288
|
+
<td><strong>Output Tokens</strong></td>
|
|
3289
|
+
<td class="stat-min">${stats.output.min}</td>
|
|
3290
|
+
<td class="stat-avg">${stats.output.avg}</td>
|
|
3291
|
+
<td class="stat-max">${stats.output.max}</td>
|
|
3292
|
+
</tr>
|
|
3293
|
+
<tr class="metric-ttft">
|
|
3294
|
+
<td><strong>Time to First Token</strong></td>
|
|
3295
|
+
<td class="stat-min">${stats.ttft.min}s</td>
|
|
3296
|
+
<td class="stat-avg">${stats.ttft.avg}s</td>
|
|
3297
|
+
<td class="stat-max">${stats.ttft.max}s</td>
|
|
3298
|
+
</tr>
|
|
3299
|
+
<tr class="metric-speed">
|
|
3300
|
+
<td><strong>Tokens/Second</strong></td>
|
|
3301
|
+
<td class="stat-min">${stats.speed.min} t/s</td>
|
|
3302
|
+
<td class="stat-avg">${stats.speed.avg} t/s</td>
|
|
3303
|
+
<td class="stat-max">${stats.speed.max} t/s</td>
|
|
3304
|
+
</tr>
|
|
3305
|
+
</tbody>
|
|
3306
|
+
</table>
|
|
3307
|
+
</div>
|
|
3308
|
+
</div>
|
|
3309
|
+
<div class="steps-table-container">
|
|
3310
|
+
<h5>Step-by-Step Breakdown</h5>
|
|
3311
|
+
<table class="steps-table">
|
|
3312
|
+
<thead>
|
|
3313
|
+
<tr>
|
|
3314
|
+
<th>Step</th>
|
|
3315
|
+
<th>Input</th>
|
|
3316
|
+
<th>Output</th>
|
|
3317
|
+
<th>TTFT</th>
|
|
3318
|
+
<th>Speed</th>
|
|
3319
|
+
</tr>
|
|
3320
|
+
</thead>
|
|
3321
|
+
<tbody>
|
|
3322
|
+
${stepsTableHtml}
|
|
3323
|
+
</tbody>
|
|
3324
|
+
</table>
|
|
3325
|
+
</div>
|
|
3326
|
+
</div>
|
|
3327
|
+
</div>
|
|
3328
|
+
`;
|
|
3329
|
+
}
|
|
3330
|
+
|
|
3331
|
+
generateToolExecutionSection(toolCalls) {
|
|
3332
|
+
if (toolCalls.length === 0) {
|
|
3333
|
+
return `
|
|
3334
|
+
<div class="section">
|
|
3335
|
+
<h4>🔧 Tool Executions</h4>
|
|
3336
|
+
<p style="color: #6c757d; font-style: italic;">No tool calls were made during this conversation.</p>
|
|
3337
|
+
</div>
|
|
3338
|
+
`;
|
|
3339
|
+
}
|
|
3340
|
+
|
|
3341
|
+
let toolsHtml = '';
|
|
3342
|
+
toolCalls.forEach((toolCall, index) => {
|
|
3343
|
+
toolsHtml += `
|
|
3344
|
+
<div class="tool-call">
|
|
3345
|
+
<div class="tool-header">
|
|
3346
|
+
<span class="tool-name">🔧 ${toolCall.tool}</span>
|
|
3347
|
+
<span class="tool-index">#${index + 1}</span>
|
|
3348
|
+
</div>
|
|
3349
|
+
<div class="tool-details">
|
|
3350
|
+
<div class="tool-thought"><strong>Thought:</strong> ${toolCall.thought}</div>
|
|
3351
|
+
<div class="tool-goal"><strong>Goal:</strong> ${toolCall.goal}</div>
|
|
3352
|
+
<div class="tool-args"><strong>Arguments:</strong> <code>${JSON.stringify(toolCall.args, null, 2)}</code></div>
|
|
3353
|
+
</div>
|
|
3354
|
+
</div>
|
|
3355
|
+
`;
|
|
3356
|
+
});
|
|
3357
|
+
|
|
3358
|
+
return `
|
|
3359
|
+
<div class="section">
|
|
3360
|
+
<h4>🔧 Tool Executions (${toolCalls.length})</h4>
|
|
3361
|
+
<div class="tool-executions">
|
|
3362
|
+
${toolsHtml}
|
|
3363
|
+
</div>
|
|
3364
|
+
</div>
|
|
3365
|
+
`;
|
|
3366
|
+
}
|
|
3367
|
+
|
|
3368
|
+
generateSystemPromptSection(systemPrompt, systemPromptTokens) {
|
|
3369
|
+
if (!systemPrompt) {
|
|
3370
|
+
return '';
|
|
3371
|
+
}
|
|
3372
|
+
|
|
3373
|
+
// Estimate token count if not provided (rough approximation: ~4 chars per token)
|
|
3374
|
+
const estimatedTokens = Math.round(systemPrompt.length / 4);
|
|
3375
|
+
const tokenCount = systemPromptTokens || estimatedTokens;
|
|
3376
|
+
|
|
3377
|
+
// Note about token counting for local models
|
|
3378
|
+
const tokenNote = systemPromptTokens ? '' :
|
|
3379
|
+
'<div class="token-note">Note: System prompt tokens are included in the total input but may not be reflected in per-step metrics for local models.</div>';
|
|
3380
|
+
|
|
3381
|
+
return `
|
|
3382
|
+
<div class="section">
|
|
3383
|
+
<h4>📋 System Prompt</h4>
|
|
3384
|
+
<div class="system-prompt-info">
|
|
3385
|
+
<span class="prompt-tokens">Estimated Token Count: ~${tokenCount.toLocaleString()}</span>
|
|
3386
|
+
<span class="prompt-chars">(${systemPrompt.length.toLocaleString()} characters)</span>
|
|
3387
|
+
</div>
|
|
3388
|
+
${tokenNote}
|
|
3389
|
+
<pre class="system-prompt">${this.escapeHtml(systemPrompt)}</pre>
|
|
3390
|
+
</div>
|
|
3391
|
+
`;
|
|
3392
|
+
}
|
|
3393
|
+
}
|
|
3394
|
+
|
|
3395
|
+
// Initialize the application when the page loads
|
|
3396
|
+
document.addEventListener('DOMContentLoaded', () => {
|
|
3397
|
+
console.log('DOM Content Loaded, initializing EvaluationVisualizer');
|
|
3398
|
+
try {
|
|
3399
|
+
new EvaluationVisualizer();
|
|
3400
|
+
} catch (error) {
|
|
3401
|
+
console.error('Error initializing EvaluationVisualizer:', error);
|
|
3402
|
+
}
|
|
3403
|
+
});
|