amd-gaia 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amd_gaia-0.14.1.dist-info/METADATA +768 -0
- amd_gaia-0.14.1.dist-info/RECORD +800 -0
- amd_gaia-0.14.1.dist-info/WHEEL +5 -0
- amd_gaia-0.14.1.dist-info/entry_points.txt +5 -0
- amd_gaia-0.14.1.dist-info/licenses/LICENSE.md +21 -0
- amd_gaia-0.14.1.dist-info/top_level.txt +1 -0
- gaia/__init__.py +2 -0
- gaia/agents/__init__.py +19 -0
- gaia/agents/base/__init__.py +9 -0
- gaia/agents/base/agent.py +2072 -0
- gaia/agents/base/api_agent.py +120 -0
- gaia/agents/base/console.py +1457 -0
- gaia/agents/base/mcp_agent.py +86 -0
- gaia/agents/base/tools.py +83 -0
- gaia/agents/blender/agent.py +556 -0
- gaia/agents/blender/agent_simple.py +135 -0
- gaia/agents/blender/app.py +211 -0
- gaia/agents/blender/app_simple.py +41 -0
- gaia/agents/blender/core/__init__.py +16 -0
- gaia/agents/blender/core/materials.py +506 -0
- gaia/agents/blender/core/objects.py +316 -0
- gaia/agents/blender/core/rendering.py +225 -0
- gaia/agents/blender/core/scene.py +220 -0
- gaia/agents/blender/core/view.py +146 -0
- gaia/agents/chat/__init__.py +9 -0
- gaia/agents/chat/agent.py +975 -0
- gaia/agents/chat/app.py +1058 -0
- gaia/agents/chat/session.py +508 -0
- gaia/agents/chat/tools/__init__.py +15 -0
- gaia/agents/chat/tools/file_tools.py +96 -0
- gaia/agents/chat/tools/rag_tools.py +1729 -0
- gaia/agents/chat/tools/shell_tools.py +436 -0
- gaia/agents/code/__init__.py +7 -0
- gaia/agents/code/agent.py +547 -0
- gaia/agents/code/app.py +266 -0
- gaia/agents/code/models.py +135 -0
- gaia/agents/code/orchestration/__init__.py +24 -0
- gaia/agents/code/orchestration/checklist_executor.py +1739 -0
- gaia/agents/code/orchestration/checklist_generator.py +709 -0
- gaia/agents/code/orchestration/factories/__init__.py +9 -0
- gaia/agents/code/orchestration/factories/base.py +63 -0
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -0
- gaia/agents/code/orchestration/factories/python_factory.py +106 -0
- gaia/agents/code/orchestration/orchestrator.py +610 -0
- gaia/agents/code/orchestration/project_analyzer.py +391 -0
- gaia/agents/code/orchestration/steps/__init__.py +67 -0
- gaia/agents/code/orchestration/steps/base.py +188 -0
- gaia/agents/code/orchestration/steps/error_handler.py +314 -0
- gaia/agents/code/orchestration/steps/nextjs.py +828 -0
- gaia/agents/code/orchestration/steps/python.py +307 -0
- gaia/agents/code/orchestration/template_catalog.py +463 -0
- gaia/agents/code/orchestration/workflows/__init__.py +14 -0
- gaia/agents/code/orchestration/workflows/base.py +80 -0
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -0
- gaia/agents/code/orchestration/workflows/python.py +94 -0
- gaia/agents/code/prompts/__init__.py +11 -0
- gaia/agents/code/prompts/base_prompt.py +77 -0
- gaia/agents/code/prompts/code_patterns.py +1925 -0
- gaia/agents/code/prompts/nextjs_prompt.py +40 -0
- gaia/agents/code/prompts/python_prompt.py +109 -0
- gaia/agents/code/schema_inference.py +365 -0
- gaia/agents/code/system_prompt.py +41 -0
- gaia/agents/code/tools/__init__.py +42 -0
- gaia/agents/code/tools/cli_tools.py +1138 -0
- gaia/agents/code/tools/code_formatting.py +319 -0
- gaia/agents/code/tools/code_tools.py +769 -0
- gaia/agents/code/tools/error_fixing.py +1347 -0
- gaia/agents/code/tools/external_tools.py +180 -0
- gaia/agents/code/tools/file_io.py +845 -0
- gaia/agents/code/tools/prisma_tools.py +190 -0
- gaia/agents/code/tools/project_management.py +1016 -0
- gaia/agents/code/tools/testing.py +321 -0
- gaia/agents/code/tools/typescript_tools.py +122 -0
- gaia/agents/code/tools/validation_parsing.py +461 -0
- gaia/agents/code/tools/validation_tools.py +803 -0
- gaia/agents/code/tools/web_dev_tools.py +1744 -0
- gaia/agents/code/validators/__init__.py +16 -0
- gaia/agents/code/validators/antipattern_checker.py +241 -0
- gaia/agents/code/validators/ast_analyzer.py +197 -0
- gaia/agents/code/validators/requirements_validator.py +145 -0
- gaia/agents/code/validators/syntax_validator.py +171 -0
- gaia/agents/docker/__init__.py +7 -0
- gaia/agents/docker/agent.py +642 -0
- gaia/agents/jira/__init__.py +11 -0
- gaia/agents/jira/agent.py +894 -0
- gaia/agents/jira/jql_templates.py +299 -0
- gaia/agents/routing/__init__.py +7 -0
- gaia/agents/routing/agent.py +512 -0
- gaia/agents/routing/system_prompt.py +75 -0
- gaia/api/__init__.py +23 -0
- gaia/api/agent_registry.py +238 -0
- gaia/api/app.py +305 -0
- gaia/api/openai_server.py +575 -0
- gaia/api/schemas.py +186 -0
- gaia/api/sse_handler.py +370 -0
- gaia/apps/__init__.py +4 -0
- gaia/apps/llm/__init__.py +6 -0
- gaia/apps/llm/app.py +169 -0
- gaia/apps/summarize/app.py +633 -0
- gaia/apps/summarize/html_viewer.py +133 -0
- gaia/apps/summarize/pdf_formatter.py +284 -0
- gaia/audio/__init__.py +2 -0
- gaia/audio/audio_client.py +439 -0
- gaia/audio/audio_recorder.py +269 -0
- gaia/audio/kokoro_tts.py +599 -0
- gaia/audio/whisper_asr.py +432 -0
- gaia/chat/__init__.py +16 -0
- gaia/chat/app.py +430 -0
- gaia/chat/prompts.py +522 -0
- gaia/chat/sdk.py +1200 -0
- gaia/cli.py +5621 -0
- gaia/eval/batch_experiment.py +2332 -0
- gaia/eval/claude.py +542 -0
- gaia/eval/config.py +37 -0
- gaia/eval/email_generator.py +512 -0
- gaia/eval/eval.py +3179 -0
- gaia/eval/groundtruth.py +1130 -0
- gaia/eval/transcript_generator.py +582 -0
- gaia/eval/webapp/README.md +168 -0
- gaia/eval/webapp/node_modules/.bin/mime +16 -0
- gaia/eval/webapp/node_modules/.bin/mime.cmd +17 -0
- gaia/eval/webapp/node_modules/.bin/mime.ps1 +28 -0
- gaia/eval/webapp/node_modules/.package-lock.json +865 -0
- gaia/eval/webapp/node_modules/accepts/HISTORY.md +243 -0
- gaia/eval/webapp/node_modules/accepts/LICENSE +23 -0
- gaia/eval/webapp/node_modules/accepts/README.md +140 -0
- gaia/eval/webapp/node_modules/accepts/index.js +238 -0
- gaia/eval/webapp/node_modules/accepts/package.json +47 -0
- gaia/eval/webapp/node_modules/array-flatten/LICENSE +21 -0
- gaia/eval/webapp/node_modules/array-flatten/README.md +43 -0
- gaia/eval/webapp/node_modules/array-flatten/array-flatten.js +64 -0
- gaia/eval/webapp/node_modules/array-flatten/package.json +39 -0
- gaia/eval/webapp/node_modules/body-parser/HISTORY.md +672 -0
- gaia/eval/webapp/node_modules/body-parser/LICENSE +23 -0
- gaia/eval/webapp/node_modules/body-parser/README.md +476 -0
- gaia/eval/webapp/node_modules/body-parser/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/body-parser/index.js +156 -0
- gaia/eval/webapp/node_modules/body-parser/lib/read.js +205 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/json.js +247 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/raw.js +101 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/text.js +121 -0
- gaia/eval/webapp/node_modules/body-parser/lib/types/urlencoded.js +307 -0
- gaia/eval/webapp/node_modules/body-parser/package.json +56 -0
- gaia/eval/webapp/node_modules/bytes/History.md +97 -0
- gaia/eval/webapp/node_modules/bytes/LICENSE +23 -0
- gaia/eval/webapp/node_modules/bytes/Readme.md +152 -0
- gaia/eval/webapp/node_modules/bytes/index.js +170 -0
- gaia/eval/webapp/node_modules/bytes/package.json +42 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.eslintrc +17 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/.nycrc +9 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/CHANGELOG.md +30 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/LICENSE +21 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/README.md +62 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.js +10 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.d.ts +19 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.js +10 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.d.ts +1 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.d.ts +64 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.js +15 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/package.json +85 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.d.ts +3 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.js +4 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/test/index.js +63 -0
- gaia/eval/webapp/node_modules/call-bind-apply-helpers/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/call-bound/.eslintrc +13 -0
- gaia/eval/webapp/node_modules/call-bound/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/call-bound/.nycrc +9 -0
- gaia/eval/webapp/node_modules/call-bound/CHANGELOG.md +42 -0
- gaia/eval/webapp/node_modules/call-bound/LICENSE +21 -0
- gaia/eval/webapp/node_modules/call-bound/README.md +53 -0
- gaia/eval/webapp/node_modules/call-bound/index.d.ts +94 -0
- gaia/eval/webapp/node_modules/call-bound/index.js +19 -0
- gaia/eval/webapp/node_modules/call-bound/package.json +99 -0
- gaia/eval/webapp/node_modules/call-bound/test/index.js +61 -0
- gaia/eval/webapp/node_modules/call-bound/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/content-disposition/HISTORY.md +60 -0
- gaia/eval/webapp/node_modules/content-disposition/LICENSE +22 -0
- gaia/eval/webapp/node_modules/content-disposition/README.md +142 -0
- gaia/eval/webapp/node_modules/content-disposition/index.js +458 -0
- gaia/eval/webapp/node_modules/content-disposition/package.json +44 -0
- gaia/eval/webapp/node_modules/content-type/HISTORY.md +29 -0
- gaia/eval/webapp/node_modules/content-type/LICENSE +22 -0
- gaia/eval/webapp/node_modules/content-type/README.md +94 -0
- gaia/eval/webapp/node_modules/content-type/index.js +225 -0
- gaia/eval/webapp/node_modules/content-type/package.json +42 -0
- gaia/eval/webapp/node_modules/cookie/LICENSE +24 -0
- gaia/eval/webapp/node_modules/cookie/README.md +317 -0
- gaia/eval/webapp/node_modules/cookie/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/cookie/index.js +334 -0
- gaia/eval/webapp/node_modules/cookie/package.json +44 -0
- gaia/eval/webapp/node_modules/cookie-signature/.npmignore +4 -0
- gaia/eval/webapp/node_modules/cookie-signature/History.md +38 -0
- gaia/eval/webapp/node_modules/cookie-signature/Readme.md +42 -0
- gaia/eval/webapp/node_modules/cookie-signature/index.js +51 -0
- gaia/eval/webapp/node_modules/cookie-signature/package.json +18 -0
- gaia/eval/webapp/node_modules/debug/.coveralls.yml +1 -0
- gaia/eval/webapp/node_modules/debug/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/debug/.npmignore +9 -0
- gaia/eval/webapp/node_modules/debug/.travis.yml +14 -0
- gaia/eval/webapp/node_modules/debug/CHANGELOG.md +362 -0
- gaia/eval/webapp/node_modules/debug/LICENSE +19 -0
- gaia/eval/webapp/node_modules/debug/Makefile +50 -0
- gaia/eval/webapp/node_modules/debug/README.md +312 -0
- gaia/eval/webapp/node_modules/debug/component.json +19 -0
- gaia/eval/webapp/node_modules/debug/karma.conf.js +70 -0
- gaia/eval/webapp/node_modules/debug/node.js +1 -0
- gaia/eval/webapp/node_modules/debug/package.json +49 -0
- gaia/eval/webapp/node_modules/debug/src/browser.js +185 -0
- gaia/eval/webapp/node_modules/debug/src/debug.js +202 -0
- gaia/eval/webapp/node_modules/debug/src/index.js +10 -0
- gaia/eval/webapp/node_modules/debug/src/inspector-log.js +15 -0
- gaia/eval/webapp/node_modules/debug/src/node.js +248 -0
- gaia/eval/webapp/node_modules/depd/History.md +103 -0
- gaia/eval/webapp/node_modules/depd/LICENSE +22 -0
- gaia/eval/webapp/node_modules/depd/Readme.md +280 -0
- gaia/eval/webapp/node_modules/depd/index.js +538 -0
- gaia/eval/webapp/node_modules/depd/lib/browser/index.js +77 -0
- gaia/eval/webapp/node_modules/depd/package.json +45 -0
- gaia/eval/webapp/node_modules/destroy/LICENSE +23 -0
- gaia/eval/webapp/node_modules/destroy/README.md +63 -0
- gaia/eval/webapp/node_modules/destroy/index.js +209 -0
- gaia/eval/webapp/node_modules/destroy/package.json +48 -0
- gaia/eval/webapp/node_modules/dunder-proto/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/dunder-proto/.nycrc +13 -0
- gaia/eval/webapp/node_modules/dunder-proto/CHANGELOG.md +24 -0
- gaia/eval/webapp/node_modules/dunder-proto/LICENSE +21 -0
- gaia/eval/webapp/node_modules/dunder-proto/README.md +54 -0
- gaia/eval/webapp/node_modules/dunder-proto/get.d.ts +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/get.js +30 -0
- gaia/eval/webapp/node_modules/dunder-proto/package.json +76 -0
- gaia/eval/webapp/node_modules/dunder-proto/set.d.ts +5 -0
- gaia/eval/webapp/node_modules/dunder-proto/set.js +35 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/get.js +34 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/index.js +4 -0
- gaia/eval/webapp/node_modules/dunder-proto/test/set.js +50 -0
- gaia/eval/webapp/node_modules/dunder-proto/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/ee-first/LICENSE +22 -0
- gaia/eval/webapp/node_modules/ee-first/README.md +80 -0
- gaia/eval/webapp/node_modules/ee-first/index.js +95 -0
- gaia/eval/webapp/node_modules/ee-first/package.json +29 -0
- gaia/eval/webapp/node_modules/encodeurl/LICENSE +22 -0
- gaia/eval/webapp/node_modules/encodeurl/README.md +109 -0
- gaia/eval/webapp/node_modules/encodeurl/index.js +60 -0
- gaia/eval/webapp/node_modules/encodeurl/package.json +40 -0
- gaia/eval/webapp/node_modules/es-define-property/.eslintrc +13 -0
- gaia/eval/webapp/node_modules/es-define-property/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-define-property/.nycrc +9 -0
- gaia/eval/webapp/node_modules/es-define-property/CHANGELOG.md +29 -0
- gaia/eval/webapp/node_modules/es-define-property/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-define-property/README.md +49 -0
- gaia/eval/webapp/node_modules/es-define-property/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-define-property/index.js +14 -0
- gaia/eval/webapp/node_modules/es-define-property/package.json +81 -0
- gaia/eval/webapp/node_modules/es-define-property/test/index.js +56 -0
- gaia/eval/webapp/node_modules/es-define-property/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/es-errors/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/es-errors/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-errors/CHANGELOG.md +40 -0
- gaia/eval/webapp/node_modules/es-errors/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-errors/README.md +55 -0
- gaia/eval/webapp/node_modules/es-errors/eval.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/eval.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/index.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/package.json +80 -0
- gaia/eval/webapp/node_modules/es-errors/range.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/range.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/ref.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/ref.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/syntax.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/syntax.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/test/index.js +19 -0
- gaia/eval/webapp/node_modules/es-errors/tsconfig.json +49 -0
- gaia/eval/webapp/node_modules/es-errors/type.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/type.js +4 -0
- gaia/eval/webapp/node_modules/es-errors/uri.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-errors/uri.js +4 -0
- gaia/eval/webapp/node_modules/es-object-atoms/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/es-object-atoms/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/es-object-atoms/CHANGELOG.md +37 -0
- gaia/eval/webapp/node_modules/es-object-atoms/LICENSE +21 -0
- gaia/eval/webapp/node_modules/es-object-atoms/README.md +63 -0
- gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.js +11 -0
- gaia/eval/webapp/node_modules/es-object-atoms/ToObject.d.ts +7 -0
- gaia/eval/webapp/node_modules/es-object-atoms/ToObject.js +10 -0
- gaia/eval/webapp/node_modules/es-object-atoms/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/index.js +4 -0
- gaia/eval/webapp/node_modules/es-object-atoms/isObject.d.ts +3 -0
- gaia/eval/webapp/node_modules/es-object-atoms/isObject.js +6 -0
- gaia/eval/webapp/node_modules/es-object-atoms/package.json +80 -0
- gaia/eval/webapp/node_modules/es-object-atoms/test/index.js +38 -0
- gaia/eval/webapp/node_modules/es-object-atoms/tsconfig.json +6 -0
- gaia/eval/webapp/node_modules/escape-html/LICENSE +24 -0
- gaia/eval/webapp/node_modules/escape-html/Readme.md +43 -0
- gaia/eval/webapp/node_modules/escape-html/index.js +78 -0
- gaia/eval/webapp/node_modules/escape-html/package.json +24 -0
- gaia/eval/webapp/node_modules/etag/HISTORY.md +83 -0
- gaia/eval/webapp/node_modules/etag/LICENSE +22 -0
- gaia/eval/webapp/node_modules/etag/README.md +159 -0
- gaia/eval/webapp/node_modules/etag/index.js +131 -0
- gaia/eval/webapp/node_modules/etag/package.json +47 -0
- gaia/eval/webapp/node_modules/express/History.md +3656 -0
- gaia/eval/webapp/node_modules/express/LICENSE +24 -0
- gaia/eval/webapp/node_modules/express/Readme.md +260 -0
- gaia/eval/webapp/node_modules/express/index.js +11 -0
- gaia/eval/webapp/node_modules/express/lib/application.js +661 -0
- gaia/eval/webapp/node_modules/express/lib/express.js +116 -0
- gaia/eval/webapp/node_modules/express/lib/middleware/init.js +43 -0
- gaia/eval/webapp/node_modules/express/lib/middleware/query.js +47 -0
- gaia/eval/webapp/node_modules/express/lib/request.js +525 -0
- gaia/eval/webapp/node_modules/express/lib/response.js +1179 -0
- gaia/eval/webapp/node_modules/express/lib/router/index.js +673 -0
- gaia/eval/webapp/node_modules/express/lib/router/layer.js +181 -0
- gaia/eval/webapp/node_modules/express/lib/router/route.js +230 -0
- gaia/eval/webapp/node_modules/express/lib/utils.js +303 -0
- gaia/eval/webapp/node_modules/express/lib/view.js +182 -0
- gaia/eval/webapp/node_modules/express/package.json +102 -0
- gaia/eval/webapp/node_modules/finalhandler/HISTORY.md +210 -0
- gaia/eval/webapp/node_modules/finalhandler/LICENSE +22 -0
- gaia/eval/webapp/node_modules/finalhandler/README.md +147 -0
- gaia/eval/webapp/node_modules/finalhandler/SECURITY.md +25 -0
- gaia/eval/webapp/node_modules/finalhandler/index.js +341 -0
- gaia/eval/webapp/node_modules/finalhandler/package.json +47 -0
- gaia/eval/webapp/node_modules/forwarded/HISTORY.md +21 -0
- gaia/eval/webapp/node_modules/forwarded/LICENSE +22 -0
- gaia/eval/webapp/node_modules/forwarded/README.md +57 -0
- gaia/eval/webapp/node_modules/forwarded/index.js +90 -0
- gaia/eval/webapp/node_modules/forwarded/package.json +45 -0
- gaia/eval/webapp/node_modules/fresh/HISTORY.md +70 -0
- gaia/eval/webapp/node_modules/fresh/LICENSE +23 -0
- gaia/eval/webapp/node_modules/fresh/README.md +119 -0
- gaia/eval/webapp/node_modules/fresh/index.js +137 -0
- gaia/eval/webapp/node_modules/fresh/package.json +46 -0
- gaia/eval/webapp/node_modules/fs/README.md +9 -0
- gaia/eval/webapp/node_modules/fs/package.json +20 -0
- gaia/eval/webapp/node_modules/function-bind/.eslintrc +21 -0
- gaia/eval/webapp/node_modules/function-bind/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/function-bind/.github/SECURITY.md +3 -0
- gaia/eval/webapp/node_modules/function-bind/.nycrc +13 -0
- gaia/eval/webapp/node_modules/function-bind/CHANGELOG.md +136 -0
- gaia/eval/webapp/node_modules/function-bind/LICENSE +20 -0
- gaia/eval/webapp/node_modules/function-bind/README.md +46 -0
- gaia/eval/webapp/node_modules/function-bind/implementation.js +84 -0
- gaia/eval/webapp/node_modules/function-bind/index.js +5 -0
- gaia/eval/webapp/node_modules/function-bind/package.json +87 -0
- gaia/eval/webapp/node_modules/function-bind/test/.eslintrc +9 -0
- gaia/eval/webapp/node_modules/function-bind/test/index.js +252 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.eslintrc +42 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/get-intrinsic/.nycrc +9 -0
- gaia/eval/webapp/node_modules/get-intrinsic/CHANGELOG.md +186 -0
- gaia/eval/webapp/node_modules/get-intrinsic/LICENSE +21 -0
- gaia/eval/webapp/node_modules/get-intrinsic/README.md +71 -0
- gaia/eval/webapp/node_modules/get-intrinsic/index.js +378 -0
- gaia/eval/webapp/node_modules/get-intrinsic/package.json +97 -0
- gaia/eval/webapp/node_modules/get-intrinsic/test/GetIntrinsic.js +274 -0
- gaia/eval/webapp/node_modules/get-proto/.eslintrc +10 -0
- gaia/eval/webapp/node_modules/get-proto/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/get-proto/.nycrc +9 -0
- gaia/eval/webapp/node_modules/get-proto/CHANGELOG.md +21 -0
- gaia/eval/webapp/node_modules/get-proto/LICENSE +21 -0
- gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.d.ts +5 -0
- gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.js +6 -0
- gaia/eval/webapp/node_modules/get-proto/README.md +50 -0
- gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.d.ts +3 -0
- gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.js +4 -0
- gaia/eval/webapp/node_modules/get-proto/index.d.ts +5 -0
- gaia/eval/webapp/node_modules/get-proto/index.js +27 -0
- gaia/eval/webapp/node_modules/get-proto/package.json +81 -0
- gaia/eval/webapp/node_modules/get-proto/test/index.js +68 -0
- gaia/eval/webapp/node_modules/get-proto/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/gopd/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/gopd/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/gopd/CHANGELOG.md +45 -0
- gaia/eval/webapp/node_modules/gopd/LICENSE +21 -0
- gaia/eval/webapp/node_modules/gopd/README.md +40 -0
- gaia/eval/webapp/node_modules/gopd/gOPD.d.ts +1 -0
- gaia/eval/webapp/node_modules/gopd/gOPD.js +4 -0
- gaia/eval/webapp/node_modules/gopd/index.d.ts +5 -0
- gaia/eval/webapp/node_modules/gopd/index.js +15 -0
- gaia/eval/webapp/node_modules/gopd/package.json +77 -0
- gaia/eval/webapp/node_modules/gopd/test/index.js +36 -0
- gaia/eval/webapp/node_modules/gopd/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/has-symbols/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/has-symbols/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/has-symbols/.nycrc +9 -0
- gaia/eval/webapp/node_modules/has-symbols/CHANGELOG.md +91 -0
- gaia/eval/webapp/node_modules/has-symbols/LICENSE +21 -0
- gaia/eval/webapp/node_modules/has-symbols/README.md +46 -0
- gaia/eval/webapp/node_modules/has-symbols/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/has-symbols/index.js +14 -0
- gaia/eval/webapp/node_modules/has-symbols/package.json +111 -0
- gaia/eval/webapp/node_modules/has-symbols/shams.d.ts +3 -0
- gaia/eval/webapp/node_modules/has-symbols/shams.js +45 -0
- gaia/eval/webapp/node_modules/has-symbols/test/index.js +22 -0
- gaia/eval/webapp/node_modules/has-symbols/test/shams/core-js.js +29 -0
- gaia/eval/webapp/node_modules/has-symbols/test/shams/get-own-property-symbols.js +29 -0
- gaia/eval/webapp/node_modules/has-symbols/test/tests.js +58 -0
- gaia/eval/webapp/node_modules/has-symbols/tsconfig.json +10 -0
- gaia/eval/webapp/node_modules/hasown/.eslintrc +5 -0
- gaia/eval/webapp/node_modules/hasown/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/hasown/.nycrc +13 -0
- gaia/eval/webapp/node_modules/hasown/CHANGELOG.md +40 -0
- gaia/eval/webapp/node_modules/hasown/LICENSE +21 -0
- gaia/eval/webapp/node_modules/hasown/README.md +40 -0
- gaia/eval/webapp/node_modules/hasown/index.d.ts +3 -0
- gaia/eval/webapp/node_modules/hasown/index.js +8 -0
- gaia/eval/webapp/node_modules/hasown/package.json +92 -0
- gaia/eval/webapp/node_modules/hasown/tsconfig.json +6 -0
- gaia/eval/webapp/node_modules/http-errors/HISTORY.md +180 -0
- gaia/eval/webapp/node_modules/http-errors/LICENSE +23 -0
- gaia/eval/webapp/node_modules/http-errors/README.md +169 -0
- gaia/eval/webapp/node_modules/http-errors/index.js +289 -0
- gaia/eval/webapp/node_modules/http-errors/package.json +50 -0
- gaia/eval/webapp/node_modules/iconv-lite/Changelog.md +162 -0
- gaia/eval/webapp/node_modules/iconv-lite/LICENSE +21 -0
- gaia/eval/webapp/node_modules/iconv-lite/README.md +156 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-codec.js +555 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-data.js +176 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/index.js +22 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/internal.js +188 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-codec.js +72 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data-generated.js +451 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data.js +174 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/big5-added.json +122 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp936.json +264 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp949.json +273 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp950.json +177 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/eucjp.json +182 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gb18030-ranges.json +1 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gbk-added.json +55 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/shiftjis.json +125 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/utf16.js +177 -0
- gaia/eval/webapp/node_modules/iconv-lite/encodings/utf7.js +290 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/bom-handling.js +52 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/extend-node.js +217 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/index.d.ts +24 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/index.js +153 -0
- gaia/eval/webapp/node_modules/iconv-lite/lib/streams.js +121 -0
- gaia/eval/webapp/node_modules/iconv-lite/package.json +46 -0
- gaia/eval/webapp/node_modules/inherits/LICENSE +16 -0
- gaia/eval/webapp/node_modules/inherits/README.md +42 -0
- gaia/eval/webapp/node_modules/inherits/inherits.js +9 -0
- gaia/eval/webapp/node_modules/inherits/inherits_browser.js +27 -0
- gaia/eval/webapp/node_modules/inherits/package.json +29 -0
- gaia/eval/webapp/node_modules/ipaddr.js/LICENSE +19 -0
- gaia/eval/webapp/node_modules/ipaddr.js/README.md +233 -0
- gaia/eval/webapp/node_modules/ipaddr.js/ipaddr.min.js +1 -0
- gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js +673 -0
- gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js.d.ts +68 -0
- gaia/eval/webapp/node_modules/ipaddr.js/package.json +35 -0
- gaia/eval/webapp/node_modules/math-intrinsics/.eslintrc +16 -0
- gaia/eval/webapp/node_modules/math-intrinsics/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/math-intrinsics/CHANGELOG.md +24 -0
- gaia/eval/webapp/node_modules/math-intrinsics/LICENSE +21 -0
- gaia/eval/webapp/node_modules/math-intrinsics/README.md +50 -0
- gaia/eval/webapp/node_modules/math-intrinsics/abs.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/abs.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.js +5 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.js +5 -0
- gaia/eval/webapp/node_modules/math-intrinsics/floor.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/floor.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isFinite.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isFinite.js +12 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isInteger.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isInteger.js +16 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNaN.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNaN.js +6 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.js +6 -0
- gaia/eval/webapp/node_modules/math-intrinsics/max.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/max.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/min.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/min.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/mod.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/mod.js +9 -0
- gaia/eval/webapp/node_modules/math-intrinsics/package.json +86 -0
- gaia/eval/webapp/node_modules/math-intrinsics/pow.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/pow.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/round.d.ts +1 -0
- gaia/eval/webapp/node_modules/math-intrinsics/round.js +4 -0
- gaia/eval/webapp/node_modules/math-intrinsics/sign.d.ts +3 -0
- gaia/eval/webapp/node_modules/math-intrinsics/sign.js +11 -0
- gaia/eval/webapp/node_modules/math-intrinsics/test/index.js +192 -0
- gaia/eval/webapp/node_modules/math-intrinsics/tsconfig.json +3 -0
- gaia/eval/webapp/node_modules/media-typer/HISTORY.md +22 -0
- gaia/eval/webapp/node_modules/media-typer/LICENSE +22 -0
- gaia/eval/webapp/node_modules/media-typer/README.md +81 -0
- gaia/eval/webapp/node_modules/media-typer/index.js +270 -0
- gaia/eval/webapp/node_modules/media-typer/package.json +26 -0
- gaia/eval/webapp/node_modules/merge-descriptors/HISTORY.md +21 -0
- gaia/eval/webapp/node_modules/merge-descriptors/LICENSE +23 -0
- gaia/eval/webapp/node_modules/merge-descriptors/README.md +49 -0
- gaia/eval/webapp/node_modules/merge-descriptors/index.js +60 -0
- gaia/eval/webapp/node_modules/merge-descriptors/package.json +39 -0
- gaia/eval/webapp/node_modules/methods/HISTORY.md +29 -0
- gaia/eval/webapp/node_modules/methods/LICENSE +24 -0
- gaia/eval/webapp/node_modules/methods/README.md +51 -0
- gaia/eval/webapp/node_modules/methods/index.js +69 -0
- gaia/eval/webapp/node_modules/methods/package.json +36 -0
- gaia/eval/webapp/node_modules/mime/.npmignore +0 -0
- gaia/eval/webapp/node_modules/mime/CHANGELOG.md +164 -0
- gaia/eval/webapp/node_modules/mime/LICENSE +21 -0
- gaia/eval/webapp/node_modules/mime/README.md +90 -0
- gaia/eval/webapp/node_modules/mime/cli.js +8 -0
- gaia/eval/webapp/node_modules/mime/mime.js +108 -0
- gaia/eval/webapp/node_modules/mime/package.json +44 -0
- gaia/eval/webapp/node_modules/mime/src/build.js +53 -0
- gaia/eval/webapp/node_modules/mime/src/test.js +60 -0
- gaia/eval/webapp/node_modules/mime/types.json +1 -0
- gaia/eval/webapp/node_modules/mime-db/HISTORY.md +507 -0
- gaia/eval/webapp/node_modules/mime-db/LICENSE +23 -0
- gaia/eval/webapp/node_modules/mime-db/README.md +100 -0
- gaia/eval/webapp/node_modules/mime-db/db.json +8519 -0
- gaia/eval/webapp/node_modules/mime-db/index.js +12 -0
- gaia/eval/webapp/node_modules/mime-db/package.json +60 -0
- gaia/eval/webapp/node_modules/mime-types/HISTORY.md +397 -0
- gaia/eval/webapp/node_modules/mime-types/LICENSE +23 -0
- gaia/eval/webapp/node_modules/mime-types/README.md +113 -0
- gaia/eval/webapp/node_modules/mime-types/index.js +188 -0
- gaia/eval/webapp/node_modules/mime-types/package.json +44 -0
- gaia/eval/webapp/node_modules/ms/index.js +152 -0
- gaia/eval/webapp/node_modules/ms/license.md +21 -0
- gaia/eval/webapp/node_modules/ms/package.json +37 -0
- gaia/eval/webapp/node_modules/ms/readme.md +51 -0
- gaia/eval/webapp/node_modules/negotiator/HISTORY.md +108 -0
- gaia/eval/webapp/node_modules/negotiator/LICENSE +24 -0
- gaia/eval/webapp/node_modules/negotiator/README.md +203 -0
- gaia/eval/webapp/node_modules/negotiator/index.js +82 -0
- gaia/eval/webapp/node_modules/negotiator/lib/charset.js +169 -0
- gaia/eval/webapp/node_modules/negotiator/lib/encoding.js +184 -0
- gaia/eval/webapp/node_modules/negotiator/lib/language.js +179 -0
- gaia/eval/webapp/node_modules/negotiator/lib/mediaType.js +294 -0
- gaia/eval/webapp/node_modules/negotiator/package.json +42 -0
- gaia/eval/webapp/node_modules/object-inspect/.eslintrc +53 -0
- gaia/eval/webapp/node_modules/object-inspect/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/object-inspect/.nycrc +13 -0
- gaia/eval/webapp/node_modules/object-inspect/CHANGELOG.md +424 -0
- gaia/eval/webapp/node_modules/object-inspect/LICENSE +21 -0
- gaia/eval/webapp/node_modules/object-inspect/example/all.js +23 -0
- gaia/eval/webapp/node_modules/object-inspect/example/circular.js +6 -0
- gaia/eval/webapp/node_modules/object-inspect/example/fn.js +5 -0
- gaia/eval/webapp/node_modules/object-inspect/example/inspect.js +10 -0
- gaia/eval/webapp/node_modules/object-inspect/index.js +544 -0
- gaia/eval/webapp/node_modules/object-inspect/package-support.json +20 -0
- gaia/eval/webapp/node_modules/object-inspect/package.json +105 -0
- gaia/eval/webapp/node_modules/object-inspect/readme.markdown +84 -0
- gaia/eval/webapp/node_modules/object-inspect/test/bigint.js +58 -0
- gaia/eval/webapp/node_modules/object-inspect/test/browser/dom.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/circular.js +16 -0
- gaia/eval/webapp/node_modules/object-inspect/test/deep.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/element.js +53 -0
- gaia/eval/webapp/node_modules/object-inspect/test/err.js +48 -0
- gaia/eval/webapp/node_modules/object-inspect/test/fakes.js +29 -0
- gaia/eval/webapp/node_modules/object-inspect/test/fn.js +76 -0
- gaia/eval/webapp/node_modules/object-inspect/test/global.js +17 -0
- gaia/eval/webapp/node_modules/object-inspect/test/has.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/holes.js +15 -0
- gaia/eval/webapp/node_modules/object-inspect/test/indent-option.js +271 -0
- gaia/eval/webapp/node_modules/object-inspect/test/inspect.js +139 -0
- gaia/eval/webapp/node_modules/object-inspect/test/lowbyte.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/number.js +58 -0
- gaia/eval/webapp/node_modules/object-inspect/test/quoteStyle.js +26 -0
- gaia/eval/webapp/node_modules/object-inspect/test/toStringTag.js +40 -0
- gaia/eval/webapp/node_modules/object-inspect/test/undef.js +12 -0
- gaia/eval/webapp/node_modules/object-inspect/test/values.js +261 -0
- gaia/eval/webapp/node_modules/object-inspect/test-core-js.js +26 -0
- gaia/eval/webapp/node_modules/object-inspect/util.inspect.js +1 -0
- gaia/eval/webapp/node_modules/on-finished/HISTORY.md +98 -0
- gaia/eval/webapp/node_modules/on-finished/LICENSE +23 -0
- gaia/eval/webapp/node_modules/on-finished/README.md +162 -0
- gaia/eval/webapp/node_modules/on-finished/index.js +234 -0
- gaia/eval/webapp/node_modules/on-finished/package.json +39 -0
- gaia/eval/webapp/node_modules/parseurl/HISTORY.md +58 -0
- gaia/eval/webapp/node_modules/parseurl/LICENSE +24 -0
- gaia/eval/webapp/node_modules/parseurl/README.md +133 -0
- gaia/eval/webapp/node_modules/parseurl/index.js +158 -0
- gaia/eval/webapp/node_modules/parseurl/package.json +40 -0
- gaia/eval/webapp/node_modules/path/.npmignore +1 -0
- gaia/eval/webapp/node_modules/path/LICENSE +18 -0
- gaia/eval/webapp/node_modules/path/README.md +15 -0
- gaia/eval/webapp/node_modules/path/package.json +24 -0
- gaia/eval/webapp/node_modules/path/path.js +628 -0
- gaia/eval/webapp/node_modules/path-to-regexp/LICENSE +21 -0
- gaia/eval/webapp/node_modules/path-to-regexp/Readme.md +35 -0
- gaia/eval/webapp/node_modules/path-to-regexp/index.js +156 -0
- gaia/eval/webapp/node_modules/path-to-regexp/package.json +30 -0
- gaia/eval/webapp/node_modules/process/.eslintrc +21 -0
- gaia/eval/webapp/node_modules/process/LICENSE +22 -0
- gaia/eval/webapp/node_modules/process/README.md +26 -0
- gaia/eval/webapp/node_modules/process/browser.js +184 -0
- gaia/eval/webapp/node_modules/process/index.js +2 -0
- gaia/eval/webapp/node_modules/process/package.json +27 -0
- gaia/eval/webapp/node_modules/process/test.js +199 -0
- gaia/eval/webapp/node_modules/proxy-addr/HISTORY.md +161 -0
- gaia/eval/webapp/node_modules/proxy-addr/LICENSE +22 -0
- gaia/eval/webapp/node_modules/proxy-addr/README.md +139 -0
- gaia/eval/webapp/node_modules/proxy-addr/index.js +327 -0
- gaia/eval/webapp/node_modules/proxy-addr/package.json +47 -0
- gaia/eval/webapp/node_modules/qs/.editorconfig +46 -0
- gaia/eval/webapp/node_modules/qs/.eslintrc +38 -0
- gaia/eval/webapp/node_modules/qs/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/qs/.nycrc +13 -0
- gaia/eval/webapp/node_modules/qs/CHANGELOG.md +600 -0
- gaia/eval/webapp/node_modules/qs/LICENSE.md +29 -0
- gaia/eval/webapp/node_modules/qs/README.md +709 -0
- gaia/eval/webapp/node_modules/qs/dist/qs.js +90 -0
- gaia/eval/webapp/node_modules/qs/lib/formats.js +23 -0
- gaia/eval/webapp/node_modules/qs/lib/index.js +11 -0
- gaia/eval/webapp/node_modules/qs/lib/parse.js +296 -0
- gaia/eval/webapp/node_modules/qs/lib/stringify.js +351 -0
- gaia/eval/webapp/node_modules/qs/lib/utils.js +265 -0
- gaia/eval/webapp/node_modules/qs/package.json +91 -0
- gaia/eval/webapp/node_modules/qs/test/empty-keys-cases.js +267 -0
- gaia/eval/webapp/node_modules/qs/test/parse.js +1170 -0
- gaia/eval/webapp/node_modules/qs/test/stringify.js +1298 -0
- gaia/eval/webapp/node_modules/qs/test/utils.js +136 -0
- gaia/eval/webapp/node_modules/range-parser/HISTORY.md +56 -0
- gaia/eval/webapp/node_modules/range-parser/LICENSE +23 -0
- gaia/eval/webapp/node_modules/range-parser/README.md +84 -0
- gaia/eval/webapp/node_modules/range-parser/index.js +162 -0
- gaia/eval/webapp/node_modules/range-parser/package.json +44 -0
- gaia/eval/webapp/node_modules/raw-body/HISTORY.md +308 -0
- gaia/eval/webapp/node_modules/raw-body/LICENSE +22 -0
- gaia/eval/webapp/node_modules/raw-body/README.md +223 -0
- gaia/eval/webapp/node_modules/raw-body/SECURITY.md +24 -0
- gaia/eval/webapp/node_modules/raw-body/index.d.ts +87 -0
- gaia/eval/webapp/node_modules/raw-body/index.js +336 -0
- gaia/eval/webapp/node_modules/raw-body/package.json +49 -0
- gaia/eval/webapp/node_modules/safe-buffer/LICENSE +21 -0
- gaia/eval/webapp/node_modules/safe-buffer/README.md +584 -0
- gaia/eval/webapp/node_modules/safe-buffer/index.d.ts +187 -0
- gaia/eval/webapp/node_modules/safe-buffer/index.js +65 -0
- gaia/eval/webapp/node_modules/safe-buffer/package.json +51 -0
- gaia/eval/webapp/node_modules/safer-buffer/LICENSE +21 -0
- gaia/eval/webapp/node_modules/safer-buffer/Porting-Buffer.md +268 -0
- gaia/eval/webapp/node_modules/safer-buffer/Readme.md +156 -0
- gaia/eval/webapp/node_modules/safer-buffer/dangerous.js +58 -0
- gaia/eval/webapp/node_modules/safer-buffer/package.json +34 -0
- gaia/eval/webapp/node_modules/safer-buffer/safer.js +77 -0
- gaia/eval/webapp/node_modules/safer-buffer/tests.js +406 -0
- gaia/eval/webapp/node_modules/send/HISTORY.md +526 -0
- gaia/eval/webapp/node_modules/send/LICENSE +23 -0
- gaia/eval/webapp/node_modules/send/README.md +327 -0
- gaia/eval/webapp/node_modules/send/SECURITY.md +24 -0
- gaia/eval/webapp/node_modules/send/index.js +1142 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/HISTORY.md +14 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/LICENSE +22 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/README.md +128 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/index.js +60 -0
- gaia/eval/webapp/node_modules/send/node_modules/encodeurl/package.json +40 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/index.js +162 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/license.md +21 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/package.json +38 -0
- gaia/eval/webapp/node_modules/send/node_modules/ms/readme.md +59 -0
- gaia/eval/webapp/node_modules/send/package.json +62 -0
- gaia/eval/webapp/node_modules/serve-static/HISTORY.md +487 -0
- gaia/eval/webapp/node_modules/serve-static/LICENSE +25 -0
- gaia/eval/webapp/node_modules/serve-static/README.md +257 -0
- gaia/eval/webapp/node_modules/serve-static/index.js +209 -0
- gaia/eval/webapp/node_modules/serve-static/package.json +42 -0
- gaia/eval/webapp/node_modules/setprototypeof/LICENSE +13 -0
- gaia/eval/webapp/node_modules/setprototypeof/README.md +31 -0
- gaia/eval/webapp/node_modules/setprototypeof/index.d.ts +2 -0
- gaia/eval/webapp/node_modules/setprototypeof/index.js +17 -0
- gaia/eval/webapp/node_modules/setprototypeof/package.json +38 -0
- gaia/eval/webapp/node_modules/setprototypeof/test/index.js +24 -0
- gaia/eval/webapp/node_modules/side-channel/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel/.eslintrc +12 -0
- gaia/eval/webapp/node_modules/side-channel/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel/CHANGELOG.md +110 -0
- gaia/eval/webapp/node_modules/side-channel/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel/README.md +61 -0
- gaia/eval/webapp/node_modules/side-channel/index.d.ts +14 -0
- gaia/eval/webapp/node_modules/side-channel/index.js +43 -0
- gaia/eval/webapp/node_modules/side-channel/package.json +85 -0
- gaia/eval/webapp/node_modules/side-channel/test/index.js +104 -0
- gaia/eval/webapp/node_modules/side-channel/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-list/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-list/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/side-channel-list/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-list/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-list/CHANGELOG.md +15 -0
- gaia/eval/webapp/node_modules/side-channel-list/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-list/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-list/index.d.ts +13 -0
- gaia/eval/webapp/node_modules/side-channel-list/index.js +113 -0
- gaia/eval/webapp/node_modules/side-channel-list/list.d.ts +14 -0
- gaia/eval/webapp/node_modules/side-channel-list/package.json +77 -0
- gaia/eval/webapp/node_modules/side-channel-list/test/index.js +104 -0
- gaia/eval/webapp/node_modules/side-channel-list/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-map/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-map/.eslintrc +11 -0
- gaia/eval/webapp/node_modules/side-channel-map/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-map/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-map/CHANGELOG.md +22 -0
- gaia/eval/webapp/node_modules/side-channel-map/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-map/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-map/index.d.ts +15 -0
- gaia/eval/webapp/node_modules/side-channel-map/index.js +68 -0
- gaia/eval/webapp/node_modules/side-channel-map/package.json +80 -0
- gaia/eval/webapp/node_modules/side-channel-map/test/index.js +114 -0
- gaia/eval/webapp/node_modules/side-channel-map/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.editorconfig +9 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.eslintrc +12 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.github/FUNDING.yml +12 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/.nycrc +13 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/CHANGELOG.md +28 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/LICENSE +21 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/README.md +62 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/index.d.ts +15 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/index.js +84 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/package.json +87 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/test/index.js +114 -0
- gaia/eval/webapp/node_modules/side-channel-weakmap/tsconfig.json +9 -0
- gaia/eval/webapp/node_modules/statuses/HISTORY.md +82 -0
- gaia/eval/webapp/node_modules/statuses/LICENSE +23 -0
- gaia/eval/webapp/node_modules/statuses/README.md +136 -0
- gaia/eval/webapp/node_modules/statuses/codes.json +65 -0
- gaia/eval/webapp/node_modules/statuses/index.js +146 -0
- gaia/eval/webapp/node_modules/statuses/package.json +49 -0
- gaia/eval/webapp/node_modules/toidentifier/HISTORY.md +9 -0
- gaia/eval/webapp/node_modules/toidentifier/LICENSE +21 -0
- gaia/eval/webapp/node_modules/toidentifier/README.md +61 -0
- gaia/eval/webapp/node_modules/toidentifier/index.js +32 -0
- gaia/eval/webapp/node_modules/toidentifier/package.json +38 -0
- gaia/eval/webapp/node_modules/type-is/HISTORY.md +259 -0
- gaia/eval/webapp/node_modules/type-is/LICENSE +23 -0
- gaia/eval/webapp/node_modules/type-is/README.md +170 -0
- gaia/eval/webapp/node_modules/type-is/index.js +266 -0
- gaia/eval/webapp/node_modules/type-is/package.json +45 -0
- gaia/eval/webapp/node_modules/unpipe/HISTORY.md +4 -0
- gaia/eval/webapp/node_modules/unpipe/LICENSE +22 -0
- gaia/eval/webapp/node_modules/unpipe/README.md +43 -0
- gaia/eval/webapp/node_modules/unpipe/index.js +69 -0
- gaia/eval/webapp/node_modules/unpipe/package.json +27 -0
- gaia/eval/webapp/node_modules/util/LICENSE +18 -0
- gaia/eval/webapp/node_modules/util/README.md +15 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/LICENSE +16 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/README.md +42 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits.js +7 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits_browser.js +23 -0
- gaia/eval/webapp/node_modules/util/node_modules/inherits/package.json +29 -0
- gaia/eval/webapp/node_modules/util/package.json +35 -0
- gaia/eval/webapp/node_modules/util/support/isBuffer.js +3 -0
- gaia/eval/webapp/node_modules/util/support/isBufferBrowser.js +6 -0
- gaia/eval/webapp/node_modules/util/util.js +586 -0
- gaia/eval/webapp/node_modules/utils-merge/.npmignore +9 -0
- gaia/eval/webapp/node_modules/utils-merge/LICENSE +20 -0
- gaia/eval/webapp/node_modules/utils-merge/README.md +34 -0
- gaia/eval/webapp/node_modules/utils-merge/index.js +23 -0
- gaia/eval/webapp/node_modules/utils-merge/package.json +40 -0
- gaia/eval/webapp/node_modules/vary/HISTORY.md +39 -0
- gaia/eval/webapp/node_modules/vary/LICENSE +22 -0
- gaia/eval/webapp/node_modules/vary/README.md +101 -0
- gaia/eval/webapp/node_modules/vary/index.js +149 -0
- gaia/eval/webapp/node_modules/vary/package.json +43 -0
- gaia/eval/webapp/package-lock.json +875 -0
- gaia/eval/webapp/package.json +21 -0
- gaia/eval/webapp/public/app.js +3403 -0
- gaia/eval/webapp/public/index.html +88 -0
- gaia/eval/webapp/public/styles.css +3661 -0
- gaia/eval/webapp/server.js +416 -0
- gaia/eval/webapp/test-setup.js +73 -0
- gaia/llm/__init__.py +2 -0
- gaia/llm/lemonade_client.py +3083 -0
- gaia/llm/lemonade_manager.py +269 -0
- gaia/llm/llm_client.py +729 -0
- gaia/llm/vlm_client.py +307 -0
- gaia/logger.py +189 -0
- gaia/mcp/agent_mcp_server.py +245 -0
- gaia/mcp/blender_mcp_client.py +138 -0
- gaia/mcp/blender_mcp_server.py +648 -0
- gaia/mcp/context7_cache.py +332 -0
- gaia/mcp/external_services.py +518 -0
- gaia/mcp/mcp_bridge.py +550 -0
- gaia/mcp/servers/__init__.py +6 -0
- gaia/mcp/servers/docker_mcp.py +83 -0
- gaia/rag/__init__.py +10 -0
- gaia/rag/app.py +293 -0
- gaia/rag/demo.py +304 -0
- gaia/rag/pdf_utils.py +235 -0
- gaia/rag/sdk.py +2194 -0
- gaia/security.py +163 -0
- gaia/talk/app.py +289 -0
- gaia/talk/sdk.py +538 -0
- gaia/util.py +46 -0
- gaia/version.py +100 -0
|
@@ -0,0 +1,1729 @@
|
|
|
1
|
+
# Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
"""
|
|
4
|
+
RAG Tools Mixin for Chat Agent.
|
|
5
|
+
|
|
6
|
+
Provides document retrieval, querying, and evaluation tools.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_page_from_chunk(chunk_text, chunk_index=-1, all_chunks=None):
|
|
19
|
+
"""
|
|
20
|
+
Extract page number from chunk text or by looking at nearby chunks.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
chunk_text: The chunk text to extract page from
|
|
24
|
+
chunk_index: Global index of this chunk (for looking backwards)
|
|
25
|
+
all_chunks: List of all chunks (for looking backwards)
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Page number as int, or None if not found
|
|
29
|
+
"""
|
|
30
|
+
# Strategy 1: Try [Page X] format in this chunk
|
|
31
|
+
match = re.search(r"\[Page (\d+)\]", chunk_text)
|
|
32
|
+
if match:
|
|
33
|
+
return int(match.group(1))
|
|
34
|
+
|
|
35
|
+
# Strategy 2: Try (Page X) format
|
|
36
|
+
match = re.search(r"\(Page (\d+)\)", chunk_text)
|
|
37
|
+
if match:
|
|
38
|
+
return int(match.group(1))
|
|
39
|
+
|
|
40
|
+
# Strategy 3: Look backwards in previous chunks to find most recent page marker
|
|
41
|
+
if chunk_index >= 0 and all_chunks:
|
|
42
|
+
for prev_idx in range(chunk_index - 1, max(-1, chunk_index - 5), -1):
|
|
43
|
+
if prev_idx < len(all_chunks):
|
|
44
|
+
prev_chunk = all_chunks[prev_idx]
|
|
45
|
+
match = re.search(r"\[Page (\d+)\]", prev_chunk)
|
|
46
|
+
if match:
|
|
47
|
+
return int(match.group(1))
|
|
48
|
+
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class RAGToolsMixin:
|
|
53
|
+
"""
|
|
54
|
+
Mixin providing RAG and document query tools.
|
|
55
|
+
|
|
56
|
+
Tools provided:
|
|
57
|
+
- query_documents: Semantic search across all indexed documents
|
|
58
|
+
- query_specific_file: Semantic search in one specific file
|
|
59
|
+
- search_indexed_chunks: Exact text search in RAG indexed chunks (in-memory)
|
|
60
|
+
- evaluate_retrieval: Evaluate if retrieved information is sufficient
|
|
61
|
+
- index_document: Add document to RAG index
|
|
62
|
+
- index_directory: Index all files in a directory
|
|
63
|
+
- list_indexed_documents: List currently indexed documents
|
|
64
|
+
- summarize_document: Generate document summaries
|
|
65
|
+
- rag_status: Get RAG system status
|
|
66
|
+
|
|
67
|
+
Note: File system search tools (search_file, search_directory, search_file_content)
|
|
68
|
+
are provided by FileSearchToolsMixin from gaia.agents.tools.file_tools
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def register_rag_tools(self) -> None:
|
|
72
|
+
"""Register RAG-related tools."""
|
|
73
|
+
from gaia.agents.base.tools import tool
|
|
74
|
+
|
|
75
|
+
@tool(
|
|
76
|
+
name="query_documents",
|
|
77
|
+
description="Query indexed documents using RAG to find relevant information. Returns document chunks that the agent should use to answer the user's question.",
|
|
78
|
+
parameters={
|
|
79
|
+
"query": {
|
|
80
|
+
"type": "str",
|
|
81
|
+
"description": "The question or query to search for in documents",
|
|
82
|
+
"required": True,
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
)
|
|
86
|
+
def query_documents(
|
|
87
|
+
query: str, debug: bool = False # pylint: disable=unused-argument
|
|
88
|
+
) -> Dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
Query indexed documents with improved search key generation.
|
|
91
|
+
|
|
92
|
+
Returns chunks for the agent to use in formulating an answer,
|
|
93
|
+
rather than generating the answer directly. This maintains proper
|
|
94
|
+
integration with the agent's conversation flow.
|
|
95
|
+
"""
|
|
96
|
+
try:
|
|
97
|
+
# Check if RAG is initialized and has documents
|
|
98
|
+
if not self.rag or not self.rag.index or len(self.rag.chunks) == 0:
|
|
99
|
+
return {
|
|
100
|
+
"status": "no_documents",
|
|
101
|
+
"message": "No documents are indexed. Answer the user's question using your general knowledge.",
|
|
102
|
+
"instruction": (
|
|
103
|
+
"There are no documents indexed to search. "
|
|
104
|
+
"Please answer the user's question using your general knowledge instead. "
|
|
105
|
+
"Do NOT apologize or say you can't help - just answer naturally."
|
|
106
|
+
),
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
# Generate multiple search keys for better retrieval
|
|
110
|
+
search_keys = self._generate_search_keys(query)
|
|
111
|
+
logger.info(f"Generated {len(search_keys)} search keys for query")
|
|
112
|
+
|
|
113
|
+
# Try each search key and aggregate results
|
|
114
|
+
all_chunks = []
|
|
115
|
+
all_scores = []
|
|
116
|
+
|
|
117
|
+
# Debug information collection
|
|
118
|
+
debug_info = (
|
|
119
|
+
{
|
|
120
|
+
"search_keys": search_keys,
|
|
121
|
+
"embedding_retrieval": [],
|
|
122
|
+
"keyword_retrieval": [],
|
|
123
|
+
"total_chunks_before_dedup": 0,
|
|
124
|
+
"total_chunks_after_dedup": 0,
|
|
125
|
+
}
|
|
126
|
+
if hasattr(self, "debug") and self.debug
|
|
127
|
+
else None
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# First, use embedding-based retrieval
|
|
131
|
+
for search_key in search_keys:
|
|
132
|
+
try:
|
|
133
|
+
# Use RAG to retrieve chunks
|
|
134
|
+
# pylint: disable=protected-access
|
|
135
|
+
chunks, scores = self.rag._retrieve_chunks(search_key)
|
|
136
|
+
if chunks:
|
|
137
|
+
all_chunks.extend(chunks)
|
|
138
|
+
all_scores.extend(scores)
|
|
139
|
+
|
|
140
|
+
# Capture debug info with full chunk content and indices
|
|
141
|
+
if debug_info:
|
|
142
|
+
# Get global indices for these chunks
|
|
143
|
+
chunk_global_indices = []
|
|
144
|
+
for chunk in chunks[:5]:
|
|
145
|
+
try:
|
|
146
|
+
idx = self.rag.chunks.index(chunk)
|
|
147
|
+
chunk_global_indices.append(idx)
|
|
148
|
+
except ValueError:
|
|
149
|
+
chunk_global_indices.append(-1)
|
|
150
|
+
|
|
151
|
+
debug_info["embedding_retrieval"].append(
|
|
152
|
+
{
|
|
153
|
+
"search_key": search_key,
|
|
154
|
+
"chunks_found": len(chunks),
|
|
155
|
+
"chunk_indices": chunk_global_indices, # Which chunks
|
|
156
|
+
"scores": [
|
|
157
|
+
float(s) for s in scores[:5]
|
|
158
|
+
], # Top 5 scores
|
|
159
|
+
"top_chunk_preview": (
|
|
160
|
+
chunks[0][:200] if chunks else None
|
|
161
|
+
),
|
|
162
|
+
"all_chunks": (
|
|
163
|
+
[
|
|
164
|
+
{
|
|
165
|
+
"global_index": (
|
|
166
|
+
chunk_global_indices[i]
|
|
167
|
+
if i < len(chunk_global_indices)
|
|
168
|
+
else -1
|
|
169
|
+
),
|
|
170
|
+
"content": chunk[
|
|
171
|
+
:500
|
|
172
|
+
], # First 500 chars
|
|
173
|
+
"score": (
|
|
174
|
+
float(scores[i])
|
|
175
|
+
if i < len(scores)
|
|
176
|
+
else 0
|
|
177
|
+
),
|
|
178
|
+
"full_length": len(chunk),
|
|
179
|
+
}
|
|
180
|
+
for i, chunk in enumerate(
|
|
181
|
+
chunks[:5]
|
|
182
|
+
) # Top 5 chunks
|
|
183
|
+
]
|
|
184
|
+
if chunks
|
|
185
|
+
else []
|
|
186
|
+
),
|
|
187
|
+
}
|
|
188
|
+
)
|
|
189
|
+
logger.info(
|
|
190
|
+
f"[DEBUG] Embedding search '{search_key}': found {len(chunks)} chunks (indices: {chunk_global_indices})"
|
|
191
|
+
)
|
|
192
|
+
except Exception as e:
|
|
193
|
+
logger.warning(f"Search key '{search_key}' failed: {e}")
|
|
194
|
+
if debug_info:
|
|
195
|
+
debug_info["embedding_retrieval"].append(
|
|
196
|
+
{"search_key": search_key, "error": str(e)}
|
|
197
|
+
)
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
# HYBRID SEARCH: Boost scores of chunks containing keywords
|
|
201
|
+
# Instead of creating new text snippets, we boost the scores of existing chunks
|
|
202
|
+
query_lower = query.lower()
|
|
203
|
+
|
|
204
|
+
# Identify important terms (not common words)
|
|
205
|
+
|
|
206
|
+
query_words = re.findall(r"\b[a-z]+\b", query_lower)
|
|
207
|
+
stop_words = {
|
|
208
|
+
"the",
|
|
209
|
+
"is",
|
|
210
|
+
"what",
|
|
211
|
+
"of",
|
|
212
|
+
"and",
|
|
213
|
+
"a",
|
|
214
|
+
"an",
|
|
215
|
+
"in",
|
|
216
|
+
"to",
|
|
217
|
+
"for",
|
|
218
|
+
}
|
|
219
|
+
important_terms = [
|
|
220
|
+
w for w in query_words if w not in stop_words and len(w) > 2
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
keyword_boost_info = []
|
|
224
|
+
|
|
225
|
+
if important_terms:
|
|
226
|
+
# Check each indexed chunk for keyword matches
|
|
227
|
+
for chunk_idx, chunk_text in enumerate(self.rag.chunks):
|
|
228
|
+
chunk_lower = chunk_text.lower()
|
|
229
|
+
|
|
230
|
+
# Count matching terms in this chunk (whole word matching)
|
|
231
|
+
matching_terms = []
|
|
232
|
+
for term in important_terms:
|
|
233
|
+
# Use word boundary regex for whole-word matching
|
|
234
|
+
if re.search(r"\b" + re.escape(term) + r"\b", chunk_lower):
|
|
235
|
+
matching_terms.append(term)
|
|
236
|
+
|
|
237
|
+
if matching_terms:
|
|
238
|
+
# Calculate boost score based on match ratio
|
|
239
|
+
match_ratio = (
|
|
240
|
+
len(matching_terms) / len(important_terms)
|
|
241
|
+
if important_terms
|
|
242
|
+
else 0
|
|
243
|
+
)
|
|
244
|
+
boost_score = 0.6 + (0.2 * match_ratio) # Range: 0.6-0.8
|
|
245
|
+
|
|
246
|
+
# Add this chunk with boosted score if not already in all_chunks
|
|
247
|
+
if chunk_text not in all_chunks:
|
|
248
|
+
all_chunks.append(chunk_text)
|
|
249
|
+
all_scores.append(boost_score)
|
|
250
|
+
|
|
251
|
+
# Get source file for this chunk
|
|
252
|
+
source_file = self.rag.chunk_to_file.get(
|
|
253
|
+
chunk_idx, "Unknown"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
keyword_boost_info.append(
|
|
257
|
+
{
|
|
258
|
+
"chunk_index": chunk_idx,
|
|
259
|
+
"source_file": (
|
|
260
|
+
Path(source_file).name
|
|
261
|
+
if source_file != "Unknown"
|
|
262
|
+
else "Unknown"
|
|
263
|
+
),
|
|
264
|
+
"matching_terms": matching_terms,
|
|
265
|
+
"boost_score": boost_score,
|
|
266
|
+
"match_ratio": match_ratio,
|
|
267
|
+
}
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Limit boosted chunks
|
|
271
|
+
if len(keyword_boost_info) >= 5:
|
|
272
|
+
break
|
|
273
|
+
|
|
274
|
+
# Capture debug info for keyword boosting
|
|
275
|
+
if debug_info and keyword_boost_info:
|
|
276
|
+
debug_info["keyword_retrieval"].append(
|
|
277
|
+
{
|
|
278
|
+
"chunks_boosted": len(keyword_boost_info),
|
|
279
|
+
"boosted_chunks": keyword_boost_info,
|
|
280
|
+
}
|
|
281
|
+
)
|
|
282
|
+
logger.info(
|
|
283
|
+
f"[DEBUG] Keyword search: boosted {len(keyword_boost_info)} chunks"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Update debug info before deduplication - track which chunks before dedup
|
|
287
|
+
if debug_info:
|
|
288
|
+
debug_info["total_chunks_before_dedup"] = len(all_chunks)
|
|
289
|
+
# Show which chunks were found before deduplication
|
|
290
|
+
all_chunk_indices = []
|
|
291
|
+
for chunk in all_chunks:
|
|
292
|
+
try:
|
|
293
|
+
idx = self.rag.chunks.index(chunk)
|
|
294
|
+
all_chunk_indices.append(idx)
|
|
295
|
+
except ValueError:
|
|
296
|
+
all_chunk_indices.append(
|
|
297
|
+
"keyword_context"
|
|
298
|
+
) # Keyword match, not a full chunk
|
|
299
|
+
debug_info["chunks_before_dedup_indices"] = all_chunk_indices
|
|
300
|
+
debug_info["deduplication_note"] = (
|
|
301
|
+
"Removes chunks that appear in both embedding and keyword results, keeping the one with higher score"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
if not all_chunks:
|
|
305
|
+
result = {
|
|
306
|
+
"status": "success",
|
|
307
|
+
"message": "No relevant information found in indexed documents.",
|
|
308
|
+
"chunks": [],
|
|
309
|
+
"num_chunks": 0,
|
|
310
|
+
"relevance_scores": [],
|
|
311
|
+
"instruction": "Inform the user that no relevant information was found in the documents for their query.",
|
|
312
|
+
}
|
|
313
|
+
if debug_info:
|
|
314
|
+
result["debug_info"] = debug_info
|
|
315
|
+
return result
|
|
316
|
+
|
|
317
|
+
# Remove duplicate chunks and keep best scores
|
|
318
|
+
# OPTIMIZED: Use hash-based deduplication instead of full text comparison
|
|
319
|
+
unique_chunks = {} # {chunk_hash: (chunk_text, score)}
|
|
320
|
+
|
|
321
|
+
for chunk, score in zip(all_chunks, all_scores):
|
|
322
|
+
# Use hash for O(1) lookup instead of O(N) string comparison
|
|
323
|
+
chunk_hash = hash(chunk)
|
|
324
|
+
|
|
325
|
+
if (
|
|
326
|
+
chunk_hash not in unique_chunks
|
|
327
|
+
or unique_chunks[chunk_hash][1] < score
|
|
328
|
+
):
|
|
329
|
+
unique_chunks[chunk_hash] = (chunk, score)
|
|
330
|
+
|
|
331
|
+
# Update debug info after deduplication - track which chunks remain
|
|
332
|
+
if debug_info:
|
|
333
|
+
debug_info["total_chunks_after_dedup"] = len(unique_chunks)
|
|
334
|
+
debug_info["duplicates_removed"] = debug_info[
|
|
335
|
+
"total_chunks_before_dedup"
|
|
336
|
+
] - len(unique_chunks)
|
|
337
|
+
# Show which chunks remain after deduplication
|
|
338
|
+
dedup_chunk_indices = []
|
|
339
|
+
for chunk_text, score in unique_chunks.values():
|
|
340
|
+
try:
|
|
341
|
+
idx = self.rag.chunks.index(chunk_text)
|
|
342
|
+
dedup_chunk_indices.append(idx)
|
|
343
|
+
except ValueError:
|
|
344
|
+
dedup_chunk_indices.append("keyword_context")
|
|
345
|
+
debug_info["chunks_after_dedup_indices"] = dedup_chunk_indices
|
|
346
|
+
|
|
347
|
+
# Sort by score and take top chunks
|
|
348
|
+
sorted_items = sorted(
|
|
349
|
+
unique_chunks.values(), key=lambda x: x[1], reverse=True
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
# Adaptive max_chunks: use more chunks for larger documents
|
|
353
|
+
# With 32K context, we can afford to retrieve more chunks for better coverage
|
|
354
|
+
total_chunks = len(self.rag.chunks)
|
|
355
|
+
if total_chunks > 200:
|
|
356
|
+
adaptive_max = min(
|
|
357
|
+
25, self.max_chunks * 5
|
|
358
|
+
) # Up to 25 chunks for very large docs (200+ pages)
|
|
359
|
+
elif total_chunks > 100:
|
|
360
|
+
adaptive_max = min(
|
|
361
|
+
20, self.max_chunks * 4
|
|
362
|
+
) # Up to 20 chunks for large docs (100+ pages)
|
|
363
|
+
elif total_chunks > 50:
|
|
364
|
+
adaptive_max = min(
|
|
365
|
+
10, self.max_chunks * 2
|
|
366
|
+
) # Up to 10 chunks for medium docs
|
|
367
|
+
else:
|
|
368
|
+
adaptive_max = self.max_chunks # Default (5) for small docs
|
|
369
|
+
|
|
370
|
+
top_chunks = [chunk for chunk, score in sorted_items[:adaptive_max]]
|
|
371
|
+
top_scores = [score for chunk, score in sorted_items[:adaptive_max]]
|
|
372
|
+
|
|
373
|
+
# Find the actual chunk indices from the RAG system
|
|
374
|
+
chunk_indices = []
|
|
375
|
+
for chunk in top_chunks:
|
|
376
|
+
# Find this chunk's index in the global chunks list
|
|
377
|
+
try:
|
|
378
|
+
idx = self.rag.chunks.index(chunk)
|
|
379
|
+
chunk_indices.append(idx)
|
|
380
|
+
except ValueError:
|
|
381
|
+
chunk_indices.append(-1) # Not found
|
|
382
|
+
|
|
383
|
+
# Format chunks with context markers for better readability
|
|
384
|
+
formatted_chunks = []
|
|
385
|
+
for i, chunk in enumerate(top_chunks):
|
|
386
|
+
formatted_chunks.append(
|
|
387
|
+
{
|
|
388
|
+
"chunk_id": i + 1, # Sequential for display
|
|
389
|
+
"page": extract_page_from_chunk(
|
|
390
|
+
chunk,
|
|
391
|
+
chunk_indices[i] if i < len(chunk_indices) else -1,
|
|
392
|
+
self.rag.chunks,
|
|
393
|
+
), # PDF page (with lookback)
|
|
394
|
+
"content": chunk,
|
|
395
|
+
"relevance_score": float(top_scores[i]),
|
|
396
|
+
"_debug_chunk_index": (
|
|
397
|
+
chunk_indices[i] if i < len(chunk_indices) else -1
|
|
398
|
+
), # Internal index (for debugging)
|
|
399
|
+
}
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
# Update debug info with final chunks
|
|
403
|
+
if debug_info:
|
|
404
|
+
debug_info["final_chunks_returned"] = len(top_chunks)
|
|
405
|
+
debug_info["score_distribution"] = {
|
|
406
|
+
"max": float(max(top_scores)) if top_scores else 0,
|
|
407
|
+
"min": float(min(top_scores)) if top_scores else 0,
|
|
408
|
+
"avg": (
|
|
409
|
+
float(sum(top_scores) / len(top_scores))
|
|
410
|
+
if top_scores
|
|
411
|
+
else 0
|
|
412
|
+
),
|
|
413
|
+
}
|
|
414
|
+
# Add preview of returned chunks
|
|
415
|
+
debug_info["chunks_preview"] = [
|
|
416
|
+
{
|
|
417
|
+
"chunk_id": c["chunk_id"],
|
|
418
|
+
"score": c["relevance_score"],
|
|
419
|
+
"preview": (
|
|
420
|
+
c["content"][:100] + "..."
|
|
421
|
+
if len(c["content"]) > 100
|
|
422
|
+
else c["content"]
|
|
423
|
+
),
|
|
424
|
+
}
|
|
425
|
+
for c in formatted_chunks[:3] # Show first 3 chunks
|
|
426
|
+
]
|
|
427
|
+
|
|
428
|
+
# Return chunks for agent to use in answer generation
|
|
429
|
+
result = {
|
|
430
|
+
"status": "success",
|
|
431
|
+
"message": f"Found {len(top_chunks)} relevant document chunks",
|
|
432
|
+
"chunks": formatted_chunks,
|
|
433
|
+
"num_chunks": len(top_chunks),
|
|
434
|
+
"search_keys_used": search_keys,
|
|
435
|
+
"source_files": (
|
|
436
|
+
list(
|
|
437
|
+
set(
|
|
438
|
+
[
|
|
439
|
+
self.rag.chunk_to_file.get(i, "Unknown")
|
|
440
|
+
for i in range(len(self.rag.chunks))
|
|
441
|
+
]
|
|
442
|
+
)
|
|
443
|
+
)
|
|
444
|
+
if hasattr(self.rag, "chunk_to_file")
|
|
445
|
+
else []
|
|
446
|
+
),
|
|
447
|
+
"instruction": "Use the provided document chunks to answer the user's question.\n\nCRITICAL CITATION REQUIREMENT:\nYour answer MUST start with: 'According to [document name], page X:' where X is the page number from each chunk's 'page' field.\n\nExample: If chunk has 'page': 2, say 'According to document.pdf, page 2:'\nIf info from pages 2 and 5, say 'According to document.pdf, pages 2 and 5:'",
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
# Add debug info to result if debug mode is enabled
|
|
451
|
+
if debug_info:
|
|
452
|
+
result["debug_info"] = debug_info
|
|
453
|
+
logger.info(
|
|
454
|
+
f"[DEBUG] Query complete: {debug_info['final_chunks_returned']} chunks returned from {debug_info['total_chunks_before_dedup']} total ({debug_info['duplicates_removed']} duplicates removed)"
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
return result
|
|
458
|
+
except Exception as e:
|
|
459
|
+
logger.error(f"Error in query_documents: {e}")
|
|
460
|
+
# Graceful degradation - inform agent to use general knowledge
|
|
461
|
+
return {
|
|
462
|
+
"status": "fallback",
|
|
463
|
+
"message": "Document search is temporarily unavailable",
|
|
464
|
+
"error": str(e),
|
|
465
|
+
"instruction": (
|
|
466
|
+
"The document search system encountered an error. "
|
|
467
|
+
"Please answer the user's question using your general knowledge "
|
|
468
|
+
"and inform them that document search is unavailable."
|
|
469
|
+
),
|
|
470
|
+
"fallback_response": (
|
|
471
|
+
"I apologize, but I'm currently unable to search the indexed documents. "
|
|
472
|
+
"Let me try to answer your question based on my general knowledge instead."
|
|
473
|
+
),
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
@tool(
|
|
477
|
+
name="query_specific_file",
|
|
478
|
+
description="Query a SPECIFIC file by name for targeted, fast retrieval. Use when user mentions a specific file or needs information from one document.",
|
|
479
|
+
parameters={
|
|
480
|
+
"file_path": {
|
|
481
|
+
"type": "str",
|
|
482
|
+
"description": "Name or path of the specific file to query (e.g., 'document.pdf' or full path)",
|
|
483
|
+
"required": True,
|
|
484
|
+
},
|
|
485
|
+
"query": {
|
|
486
|
+
"type": "str",
|
|
487
|
+
"description": "Question to ask about this specific file",
|
|
488
|
+
"required": True,
|
|
489
|
+
},
|
|
490
|
+
},
|
|
491
|
+
)
|
|
492
|
+
def query_specific_file(file_path: str, query: str) -> Dict[str, Any]:
|
|
493
|
+
"""
|
|
494
|
+
Query a specific file for fast, targeted retrieval.
|
|
495
|
+
|
|
496
|
+
This is faster than query_documents because it searches only one file.
|
|
497
|
+
"""
|
|
498
|
+
try:
|
|
499
|
+
# Debug information collection
|
|
500
|
+
debug_info = (
|
|
501
|
+
{
|
|
502
|
+
"tool": "query_specific_file",
|
|
503
|
+
"file_path": file_path,
|
|
504
|
+
"query": query,
|
|
505
|
+
"search_keys": [],
|
|
506
|
+
"embedding_retrieval": [],
|
|
507
|
+
"keyword_retrieval": [],
|
|
508
|
+
"total_chunks_before_dedup": 0,
|
|
509
|
+
"total_chunks_after_dedup": 0,
|
|
510
|
+
}
|
|
511
|
+
if hasattr(self, "debug") and self.debug
|
|
512
|
+
else None
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
# Find the file in indexed files
|
|
516
|
+
matching_files = [
|
|
517
|
+
f for f in self.rag.indexed_files if file_path in str(f)
|
|
518
|
+
]
|
|
519
|
+
|
|
520
|
+
if not matching_files:
|
|
521
|
+
return {
|
|
522
|
+
"status": "error",
|
|
523
|
+
"error": f"File '{file_path}' not found in indexed documents. Use search_files to find it first.",
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
# For now, use the first match
|
|
527
|
+
# TODO: Let user disambiguate if multiple matches
|
|
528
|
+
target_file = matching_files[0]
|
|
529
|
+
|
|
530
|
+
# Generate search keys for better retrieval
|
|
531
|
+
search_keys = self._generate_search_keys(query)
|
|
532
|
+
|
|
533
|
+
if debug_info:
|
|
534
|
+
debug_info["search_keys"] = search_keys
|
|
535
|
+
debug_info["target_file"] = str(target_file)
|
|
536
|
+
logger.info(
|
|
537
|
+
f"[DEBUG] query_specific_file: Searching '{Path(target_file).name}' with {len(search_keys)} search keys"
|
|
538
|
+
)
|
|
539
|
+
|
|
540
|
+
# Use per-file retrieval for efficient search
|
|
541
|
+
all_chunks = []
|
|
542
|
+
all_scores = []
|
|
543
|
+
|
|
544
|
+
# Add fields for hybrid search debug info
|
|
545
|
+
if debug_info:
|
|
546
|
+
debug_info["embedding_retrieval"] = []
|
|
547
|
+
debug_info["keyword_retrieval"] = []
|
|
548
|
+
|
|
549
|
+
# First, do embedding-based retrieval
|
|
550
|
+
for search_key in search_keys:
|
|
551
|
+
try:
|
|
552
|
+
# Use the new per-file retrieval method
|
|
553
|
+
# pylint: disable=protected-access
|
|
554
|
+
chunks, scores = self.rag._retrieve_chunks_from_file(
|
|
555
|
+
search_key, str(target_file)
|
|
556
|
+
)
|
|
557
|
+
if chunks:
|
|
558
|
+
all_chunks.extend(chunks)
|
|
559
|
+
all_scores.extend(scores)
|
|
560
|
+
|
|
561
|
+
# Capture debug info with full chunk content and indices
|
|
562
|
+
if debug_info:
|
|
563
|
+
# Get global indices for these chunks
|
|
564
|
+
chunk_global_indices = []
|
|
565
|
+
for chunk in chunks[:5]:
|
|
566
|
+
try:
|
|
567
|
+
idx = self.rag.chunks.index(chunk)
|
|
568
|
+
chunk_global_indices.append(idx)
|
|
569
|
+
except ValueError:
|
|
570
|
+
chunk_global_indices.append(-1)
|
|
571
|
+
|
|
572
|
+
debug_info["embedding_retrieval"].append(
|
|
573
|
+
{
|
|
574
|
+
"search_key": search_key,
|
|
575
|
+
"chunks_found": len(chunks),
|
|
576
|
+
"chunk_indices": chunk_global_indices, # Which chunks
|
|
577
|
+
"scores": [
|
|
578
|
+
float(s) for s in scores[:5]
|
|
579
|
+
], # Top 5 scores
|
|
580
|
+
"top_chunk_preview": (
|
|
581
|
+
chunks[0][:100] if chunks else None
|
|
582
|
+
),
|
|
583
|
+
"all_chunks": (
|
|
584
|
+
[
|
|
585
|
+
{
|
|
586
|
+
"global_index": (
|
|
587
|
+
chunk_global_indices[i]
|
|
588
|
+
if i < len(chunk_global_indices)
|
|
589
|
+
else -1
|
|
590
|
+
),
|
|
591
|
+
"content": chunk[
|
|
592
|
+
:500
|
|
593
|
+
], # First 500 chars
|
|
594
|
+
"score": (
|
|
595
|
+
float(scores[i])
|
|
596
|
+
if i < len(scores)
|
|
597
|
+
else 0
|
|
598
|
+
),
|
|
599
|
+
"full_length": len(chunk),
|
|
600
|
+
}
|
|
601
|
+
for i, chunk in enumerate(
|
|
602
|
+
chunks[:5]
|
|
603
|
+
) # Top 5 chunks
|
|
604
|
+
]
|
|
605
|
+
if chunks
|
|
606
|
+
else []
|
|
607
|
+
),
|
|
608
|
+
}
|
|
609
|
+
)
|
|
610
|
+
except Exception as e:
|
|
611
|
+
logger.warning(f"Search key '{search_key}' failed: {e}")
|
|
612
|
+
if debug_info:
|
|
613
|
+
debug_info["embedding_retrieval"].append(
|
|
614
|
+
{"search_key": search_key, "error": str(e)}
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
# HYBRID SEARCH: Boost scores of chunks containing keywords
|
|
618
|
+
# Instead of creating new text snippets, we boost the scores of existing chunks
|
|
619
|
+
if (
|
|
620
|
+
str(target_file) in self.rag.file_metadata
|
|
621
|
+
and "full_text" in self.rag.file_metadata[str(target_file)]
|
|
622
|
+
):
|
|
623
|
+
query_lower = query.lower()
|
|
624
|
+
|
|
625
|
+
# Identify important terms (not common words)
|
|
626
|
+
|
|
627
|
+
query_words = re.findall(r"\b[a-z]+\b", query_lower)
|
|
628
|
+
# Filter out common words
|
|
629
|
+
stop_words = {
|
|
630
|
+
"the",
|
|
631
|
+
"is",
|
|
632
|
+
"what",
|
|
633
|
+
"of",
|
|
634
|
+
"and",
|
|
635
|
+
"a",
|
|
636
|
+
"an",
|
|
637
|
+
"in",
|
|
638
|
+
"to",
|
|
639
|
+
"for",
|
|
640
|
+
}
|
|
641
|
+
important_terms = [
|
|
642
|
+
w for w in query_words if w not in stop_words and len(w) > 2
|
|
643
|
+
]
|
|
644
|
+
|
|
645
|
+
if important_terms:
|
|
646
|
+
file_keyword_info = []
|
|
647
|
+
|
|
648
|
+
# Check each chunk from this file for keyword matches
|
|
649
|
+
file_chunk_indices = self.rag.file_to_chunk_indices.get(
|
|
650
|
+
str(target_file), []
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
for chunk_idx in file_chunk_indices:
|
|
654
|
+
if chunk_idx < len(self.rag.chunks):
|
|
655
|
+
chunk_text = self.rag.chunks[chunk_idx].lower()
|
|
656
|
+
|
|
657
|
+
# Count matching terms in this chunk (whole word matching)
|
|
658
|
+
matching_terms = []
|
|
659
|
+
for term in important_terms:
|
|
660
|
+
# Use word boundary regex for whole-word matching
|
|
661
|
+
if re.search(
|
|
662
|
+
r"\b" + re.escape(term) + r"\b", chunk_text
|
|
663
|
+
):
|
|
664
|
+
matching_terms.append(term)
|
|
665
|
+
|
|
666
|
+
if matching_terms:
|
|
667
|
+
# Calculate boost score based on match ratio
|
|
668
|
+
match_ratio = (
|
|
669
|
+
len(matching_terms) / len(important_terms)
|
|
670
|
+
if important_terms
|
|
671
|
+
else 0
|
|
672
|
+
)
|
|
673
|
+
boost_score = 0.6 + (
|
|
674
|
+
0.2 * match_ratio
|
|
675
|
+
) # Range: 0.6-0.8
|
|
676
|
+
|
|
677
|
+
# Add this chunk with boosted score if not already in all_chunks
|
|
678
|
+
chunk_content = self.rag.chunks[chunk_idx]
|
|
679
|
+
if chunk_content not in all_chunks:
|
|
680
|
+
all_chunks.append(chunk_content)
|
|
681
|
+
all_scores.append(boost_score)
|
|
682
|
+
|
|
683
|
+
file_keyword_info.append(
|
|
684
|
+
{
|
|
685
|
+
"chunk_index": chunk_idx,
|
|
686
|
+
"matching_terms": matching_terms,
|
|
687
|
+
"boost_score": boost_score,
|
|
688
|
+
"match_ratio": match_ratio,
|
|
689
|
+
}
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
# Limit boosted chunks
|
|
693
|
+
if len(file_keyword_info) >= 5:
|
|
694
|
+
break
|
|
695
|
+
|
|
696
|
+
# Capture debug info for keyword search
|
|
697
|
+
if debug_info and file_keyword_info:
|
|
698
|
+
debug_info["keyword_retrieval"].append(
|
|
699
|
+
{
|
|
700
|
+
"file": Path(target_file).name,
|
|
701
|
+
"chunks_boosted": len(file_keyword_info),
|
|
702
|
+
"boosted_chunks": file_keyword_info[
|
|
703
|
+
:5
|
|
704
|
+
], # Show first 5 boosted chunks
|
|
705
|
+
}
|
|
706
|
+
)
|
|
707
|
+
logger.info(
|
|
708
|
+
f"[DEBUG] Keyword search in {Path(target_file).name}: boosted {len(file_keyword_info)} chunks"
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
# Update debug info before deduplication - track which chunks before dedup
|
|
712
|
+
if debug_info:
|
|
713
|
+
debug_info["total_chunks_before_dedup"] = len(all_chunks)
|
|
714
|
+
# Show which chunks were found before deduplication
|
|
715
|
+
all_chunk_indices = []
|
|
716
|
+
for chunk in all_chunks:
|
|
717
|
+
try:
|
|
718
|
+
idx = self.rag.chunks.index(chunk)
|
|
719
|
+
all_chunk_indices.append(idx)
|
|
720
|
+
except ValueError:
|
|
721
|
+
all_chunk_indices.append(
|
|
722
|
+
"keyword_context"
|
|
723
|
+
) # Keyword match context, not a full indexed chunk
|
|
724
|
+
debug_info["chunks_before_dedup_indices"] = all_chunk_indices
|
|
725
|
+
debug_info["deduplication_note"] = (
|
|
726
|
+
"Removes duplicate chunks found by both embedding and keyword search, keeping the version with higher score"
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
if not all_chunks:
|
|
730
|
+
result = {
|
|
731
|
+
"status": "success",
|
|
732
|
+
"message": f"No relevant information found in {Path(target_file).name}",
|
|
733
|
+
"chunks": [],
|
|
734
|
+
"file": str(target_file),
|
|
735
|
+
}
|
|
736
|
+
if debug_info:
|
|
737
|
+
result["debug_info"] = debug_info
|
|
738
|
+
return result
|
|
739
|
+
|
|
740
|
+
# Remove duplicates and sort using hash-based deduplication
|
|
741
|
+
unique_chunks = {} # {chunk_hash: (chunk_text, score)}
|
|
742
|
+
|
|
743
|
+
for chunk, score in zip(all_chunks, all_scores):
|
|
744
|
+
chunk_hash = hash(chunk)
|
|
745
|
+
if (
|
|
746
|
+
chunk_hash not in unique_chunks
|
|
747
|
+
or unique_chunks[chunk_hash][1] < score
|
|
748
|
+
):
|
|
749
|
+
unique_chunks[chunk_hash] = (chunk, score)
|
|
750
|
+
|
|
751
|
+
# Update debug info after deduplication - track which chunks remain
|
|
752
|
+
if debug_info:
|
|
753
|
+
debug_info["total_chunks_after_dedup"] = len(unique_chunks)
|
|
754
|
+
debug_info["duplicates_removed"] = debug_info[
|
|
755
|
+
"total_chunks_before_dedup"
|
|
756
|
+
] - len(unique_chunks)
|
|
757
|
+
# Show which chunks remain after deduplication
|
|
758
|
+
dedup_chunk_indices = []
|
|
759
|
+
for chunk_text, score in unique_chunks.values():
|
|
760
|
+
try:
|
|
761
|
+
idx = self.rag.chunks.index(chunk_text)
|
|
762
|
+
dedup_chunk_indices.append(idx)
|
|
763
|
+
except ValueError:
|
|
764
|
+
dedup_chunk_indices.append("keyword_context")
|
|
765
|
+
debug_info["chunks_after_dedup_indices"] = dedup_chunk_indices
|
|
766
|
+
|
|
767
|
+
sorted_items = sorted(
|
|
768
|
+
unique_chunks.values(), key=lambda x: x[1], reverse=True
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
# Adaptive max_chunks: use more chunks for larger documents
|
|
772
|
+
# With 32K context, we can afford to retrieve more chunks for better coverage
|
|
773
|
+
total_chunks = len(self.rag.chunks)
|
|
774
|
+
if total_chunks > 200:
|
|
775
|
+
adaptive_max = min(
|
|
776
|
+
25, self.max_chunks * 5
|
|
777
|
+
) # Up to 25 chunks for very large docs (200+ pages)
|
|
778
|
+
elif total_chunks > 100:
|
|
779
|
+
adaptive_max = min(
|
|
780
|
+
20, self.max_chunks * 4
|
|
781
|
+
) # Up to 20 chunks for large docs (100+ pages)
|
|
782
|
+
elif total_chunks > 50:
|
|
783
|
+
adaptive_max = min(
|
|
784
|
+
10, self.max_chunks * 2
|
|
785
|
+
) # Up to 10 chunks for medium docs
|
|
786
|
+
else:
|
|
787
|
+
adaptive_max = self.max_chunks # Default (5) for small docs
|
|
788
|
+
|
|
789
|
+
top_chunks = [chunk for chunk, score in sorted_items[:adaptive_max]]
|
|
790
|
+
top_scores = [score for chunk, score in sorted_items[:adaptive_max]]
|
|
791
|
+
|
|
792
|
+
# Update debug info with final chunks
|
|
793
|
+
if debug_info:
|
|
794
|
+
debug_info["final_chunks_returned"] = len(top_chunks)
|
|
795
|
+
debug_info["score_distribution"] = {
|
|
796
|
+
"max": float(max(top_scores)) if top_scores else 0,
|
|
797
|
+
"min": float(min(top_scores)) if top_scores else 0,
|
|
798
|
+
"avg": (
|
|
799
|
+
float(sum(top_scores) / len(top_scores))
|
|
800
|
+
if top_scores
|
|
801
|
+
else 0
|
|
802
|
+
),
|
|
803
|
+
}
|
|
804
|
+
logger.info(
|
|
805
|
+
f"[DEBUG] query_specific_file complete: {debug_info['final_chunks_returned']} chunks returned from {debug_info['total_chunks_before_dedup']} total"
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
# Find the actual chunk indices from the RAG system
|
|
809
|
+
chunk_indices = []
|
|
810
|
+
for chunk in top_chunks:
|
|
811
|
+
# Find this chunk's index in the global chunks list
|
|
812
|
+
try:
|
|
813
|
+
idx = self.rag.chunks.index(chunk)
|
|
814
|
+
chunk_indices.append(idx)
|
|
815
|
+
except ValueError:
|
|
816
|
+
chunk_indices.append(-1) # Not found
|
|
817
|
+
|
|
818
|
+
formatted_chunks = [
|
|
819
|
+
{
|
|
820
|
+
"chunk_id": i + 1, # Sequential for display
|
|
821
|
+
"page": extract_page_from_chunk(
|
|
822
|
+
chunk,
|
|
823
|
+
chunk_indices[i] if i < len(chunk_indices) else -1,
|
|
824
|
+
self.rag.chunks,
|
|
825
|
+
), # PDF page (with lookback)
|
|
826
|
+
"content": chunk,
|
|
827
|
+
"relevance_score": float(score),
|
|
828
|
+
"_debug_chunk_index": (
|
|
829
|
+
chunk_indices[i] if i < len(chunk_indices) else -1
|
|
830
|
+
), # Internal index (for debugging)
|
|
831
|
+
}
|
|
832
|
+
for i, (chunk, score) in enumerate(zip(top_chunks, top_scores))
|
|
833
|
+
]
|
|
834
|
+
|
|
835
|
+
result = {
|
|
836
|
+
"status": "success",
|
|
837
|
+
"message": f"Found {len(top_chunks)} relevant chunks in {Path(target_file).name}",
|
|
838
|
+
"chunks": formatted_chunks,
|
|
839
|
+
"file": str(target_file),
|
|
840
|
+
"instruction": f"Use these chunks from {Path(target_file).name} to answer the question. Read through ALL {len(top_chunks)} chunks completely before answering.\n\nCRITICAL CITATION REQUIREMENT:\nYour answer MUST start with: 'According to {Path(target_file).name}, page X:' where X is the page number from the chunk's 'page' field.\n\nExample: If chunk has 'page': 2, say 'According to {Path(target_file).name}, page 2:'\nIf info from multiple pages, say 'According to {Path(target_file).name}, pages 2 and 5:'",
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
# Add debug info to result if debug mode is enabled
|
|
844
|
+
if debug_info:
|
|
845
|
+
result["debug_info"] = debug_info
|
|
846
|
+
|
|
847
|
+
return result
|
|
848
|
+
|
|
849
|
+
except Exception as e:
|
|
850
|
+
logger.error(f"Error in query_specific_file: {e}")
|
|
851
|
+
# Graceful degradation
|
|
852
|
+
return {
|
|
853
|
+
"status": "fallback",
|
|
854
|
+
"message": f"Unable to search in {file_path}",
|
|
855
|
+
"error": str(e),
|
|
856
|
+
"instruction": (
|
|
857
|
+
f"Could not search in the specific file '{file_path}'. "
|
|
858
|
+
"Inform the user about this issue and offer to help with general knowledge."
|
|
859
|
+
),
|
|
860
|
+
"fallback_response": (
|
|
861
|
+
f"I encountered an error while trying to search in '{file_path}'. "
|
|
862
|
+
"The file might not be properly indexed or there was a technical issue. "
|
|
863
|
+
"Would you like me to try answering based on general knowledge instead?"
|
|
864
|
+
),
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
@tool(
|
|
868
|
+
name="search_indexed_chunks",
|
|
869
|
+
description="Search for exact text patterns within RAG-indexed document chunks. Use for finding specific phrases in indexed documents.",
|
|
870
|
+
parameters={
|
|
871
|
+
"pattern": {
|
|
872
|
+
"type": "str",
|
|
873
|
+
"description": "Text pattern or keyword to search for",
|
|
874
|
+
"required": True,
|
|
875
|
+
},
|
|
876
|
+
},
|
|
877
|
+
)
|
|
878
|
+
def search_indexed_chunks(pattern: str) -> Dict[str, Any]:
|
|
879
|
+
"""
|
|
880
|
+
Search for exact text patterns in RAG-indexed chunks.
|
|
881
|
+
|
|
882
|
+
Searches in-memory RAG chunks, not files on disk.
|
|
883
|
+
Faster than semantic RAG for exact matches.
|
|
884
|
+
"""
|
|
885
|
+
try:
|
|
886
|
+
# Debug information collection
|
|
887
|
+
debug_info = (
|
|
888
|
+
{
|
|
889
|
+
"tool": "search_indexed_chunks",
|
|
890
|
+
"pattern": pattern,
|
|
891
|
+
"total_chunks_searched": 0,
|
|
892
|
+
"matches_found": 0,
|
|
893
|
+
"chunks_with_matches": [],
|
|
894
|
+
}
|
|
895
|
+
if hasattr(self, "debug") and self.debug
|
|
896
|
+
else None
|
|
897
|
+
)
|
|
898
|
+
|
|
899
|
+
if not self.rag.chunks:
|
|
900
|
+
return {"status": "error", "error": "No documents indexed."}
|
|
901
|
+
|
|
902
|
+
# Search through chunks for pattern
|
|
903
|
+
matching_chunks = []
|
|
904
|
+
pattern_lower = pattern.lower()
|
|
905
|
+
|
|
906
|
+
if debug_info:
|
|
907
|
+
debug_info["total_chunks_searched"] = len(self.rag.chunks)
|
|
908
|
+
logger.info(
|
|
909
|
+
f"[DEBUG] search_indexed_chunks: Searching for '{pattern}' in {len(self.rag.chunks)} chunks"
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
for i, chunk in enumerate(self.rag.chunks):
|
|
913
|
+
if pattern_lower in chunk.lower():
|
|
914
|
+
matching_chunks.append(chunk)
|
|
915
|
+
|
|
916
|
+
# Capture debug info for first few matches
|
|
917
|
+
if debug_info and len(debug_info["chunks_with_matches"]) < 5:
|
|
918
|
+
# Find the line containing the pattern
|
|
919
|
+
lines = chunk.split("\n")
|
|
920
|
+
matching_lines = [
|
|
921
|
+
line for line in lines if pattern_lower in line.lower()
|
|
922
|
+
]
|
|
923
|
+
debug_info["chunks_with_matches"].append(
|
|
924
|
+
{
|
|
925
|
+
"chunk_index": i,
|
|
926
|
+
"chunk_preview": (
|
|
927
|
+
chunk[:100] + "..."
|
|
928
|
+
if len(chunk) > 100
|
|
929
|
+
else chunk
|
|
930
|
+
),
|
|
931
|
+
"matching_lines": matching_lines[
|
|
932
|
+
:2
|
|
933
|
+
], # First 2 matching lines
|
|
934
|
+
}
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
if debug_info:
|
|
938
|
+
debug_info["matches_found"] = len(matching_chunks)
|
|
939
|
+
logger.info(
|
|
940
|
+
f"[DEBUG] search_indexed_chunks complete: Found {len(matching_chunks)} matches"
|
|
941
|
+
)
|
|
942
|
+
|
|
943
|
+
if not matching_chunks:
|
|
944
|
+
result = {
|
|
945
|
+
"status": "success",
|
|
946
|
+
"message": f"Pattern '{pattern}' not found in indexed documents",
|
|
947
|
+
"matches": [],
|
|
948
|
+
"count": 0,
|
|
949
|
+
}
|
|
950
|
+
if debug_info:
|
|
951
|
+
result["debug_info"] = debug_info
|
|
952
|
+
return result
|
|
953
|
+
|
|
954
|
+
# Limit results
|
|
955
|
+
limited_matches = matching_chunks[:10]
|
|
956
|
+
|
|
957
|
+
result = {
|
|
958
|
+
"status": "success",
|
|
959
|
+
"message": f"Found {len(matching_chunks)} matches for '{pattern}'",
|
|
960
|
+
"matches": limited_matches,
|
|
961
|
+
"count": len(matching_chunks),
|
|
962
|
+
"showing": len(limited_matches),
|
|
963
|
+
"instruction": "Use these exact matches to answer the user's question.",
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
# Add debug info to result if debug mode is enabled
|
|
967
|
+
if debug_info:
|
|
968
|
+
result["debug_info"] = debug_info
|
|
969
|
+
|
|
970
|
+
return result
|
|
971
|
+
|
|
972
|
+
except Exception as e:
|
|
973
|
+
logger.error(f"Error in search_indexed_chunks: {e}")
|
|
974
|
+
# Consistent error handling with graceful degradation
|
|
975
|
+
return {
|
|
976
|
+
"status": "error",
|
|
977
|
+
"error": str(e),
|
|
978
|
+
"has_errors": True,
|
|
979
|
+
"operation": "search_indexed_chunks",
|
|
980
|
+
"hint": "The text search failed. Try using query_documents for semantic search instead.",
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
# NOTE: search_file_content (disk-based grep) and write_file are now
|
|
984
|
+
# provided by FileSearchToolsMixin from gaia.agents.tools.file_tools
|
|
985
|
+
|
|
986
|
+
@tool(
|
|
987
|
+
name="evaluate_retrieval",
|
|
988
|
+
description="Evaluate if retrieved information is sufficient to answer the question. Use before providing final answer.",
|
|
989
|
+
parameters={
|
|
990
|
+
"question": {
|
|
991
|
+
"type": "str",
|
|
992
|
+
"description": "The original question",
|
|
993
|
+
"required": True,
|
|
994
|
+
},
|
|
995
|
+
"retrieved_info": {
|
|
996
|
+
"type": "str",
|
|
997
|
+
"description": "Summary of information retrieved so far",
|
|
998
|
+
"required": True,
|
|
999
|
+
},
|
|
1000
|
+
},
|
|
1001
|
+
)
|
|
1002
|
+
def evaluate_retrieval(question: str, retrieved_info: str) -> Dict[str, Any]:
|
|
1003
|
+
"""
|
|
1004
|
+
Evaluate if retrieved information sufficiently answers the question.
|
|
1005
|
+
|
|
1006
|
+
Returns recommendation for next steps.
|
|
1007
|
+
"""
|
|
1008
|
+
try:
|
|
1009
|
+
# Simple heuristic evaluation
|
|
1010
|
+
# In production, this could use LLM or more sophisticated metrics
|
|
1011
|
+
|
|
1012
|
+
info_length = len(retrieved_info.strip())
|
|
1013
|
+
has_content = info_length > 50
|
|
1014
|
+
|
|
1015
|
+
# Check if question keywords appear in retrieved info
|
|
1016
|
+
question_words = set(question.lower().split())
|
|
1017
|
+
info_words = set(retrieved_info.lower().split())
|
|
1018
|
+
keyword_overlap = len(question_words & info_words) / max(
|
|
1019
|
+
len(question_words), 1
|
|
1020
|
+
)
|
|
1021
|
+
|
|
1022
|
+
is_sufficient = has_content and keyword_overlap > 0.3
|
|
1023
|
+
|
|
1024
|
+
if is_sufficient:
|
|
1025
|
+
return {
|
|
1026
|
+
"status": "success",
|
|
1027
|
+
"sufficient": True,
|
|
1028
|
+
"confidence": "high" if keyword_overlap > 0.5 else "medium",
|
|
1029
|
+
"recommendation": "Provide answer based on retrieved information",
|
|
1030
|
+
"keyword_overlap": round(keyword_overlap, 2),
|
|
1031
|
+
}
|
|
1032
|
+
else:
|
|
1033
|
+
return {
|
|
1034
|
+
"status": "success",
|
|
1035
|
+
"sufficient": False,
|
|
1036
|
+
"confidence": "low",
|
|
1037
|
+
"recommendation": "Try query_specific_file for targeted search or search_file_content for exact matches",
|
|
1038
|
+
"keyword_overlap": round(keyword_overlap, 2),
|
|
1039
|
+
"issues": [
|
|
1040
|
+
"Low information content" if not has_content else None,
|
|
1041
|
+
"Low keyword overlap" if keyword_overlap < 0.3 else None,
|
|
1042
|
+
],
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
except Exception as e:
|
|
1046
|
+
logger.error(f"Error in evaluate_retrieval: {e}")
|
|
1047
|
+
return {
|
|
1048
|
+
"status": "error",
|
|
1049
|
+
"error": str(e),
|
|
1050
|
+
"has_errors": True,
|
|
1051
|
+
"operation": "evaluate_retrieval",
|
|
1052
|
+
"hint": "Failed to evaluate retrieval quality. You can proceed with answering based on retrieved chunks.",
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
@tool(
|
|
1056
|
+
name="index_document",
|
|
1057
|
+
description="Add a document to the RAG index",
|
|
1058
|
+
parameters={
|
|
1059
|
+
"file_path": {
|
|
1060
|
+
"type": "str",
|
|
1061
|
+
"description": "Path to the document (PDF) to index",
|
|
1062
|
+
"required": True,
|
|
1063
|
+
}
|
|
1064
|
+
},
|
|
1065
|
+
)
|
|
1066
|
+
def index_document(file_path: str) -> Dict[str, Any]:
|
|
1067
|
+
"""Index a document with path validation and detailed statistics."""
|
|
1068
|
+
try:
|
|
1069
|
+
|
|
1070
|
+
if not os.path.exists(file_path):
|
|
1071
|
+
return {"status": "error", "error": f"File not found: {file_path}"}
|
|
1072
|
+
|
|
1073
|
+
# Validate path with user confirmation
|
|
1074
|
+
if not self.session_manager.validate_path(file_path, operation="index"):
|
|
1075
|
+
return {"status": "error", "error": f"Access denied: {file_path}"}
|
|
1076
|
+
|
|
1077
|
+
# Index the document (now returns dict with stats)
|
|
1078
|
+
result = self.rag.index_document(file_path)
|
|
1079
|
+
|
|
1080
|
+
if result.get("success"):
|
|
1081
|
+
self.indexed_files.add(file_path)
|
|
1082
|
+
|
|
1083
|
+
# Add to current session
|
|
1084
|
+
if self.current_session:
|
|
1085
|
+
if file_path not in self.current_session.indexed_documents:
|
|
1086
|
+
self.current_session.indexed_documents.append(file_path)
|
|
1087
|
+
self.session_manager.save_session(self.current_session)
|
|
1088
|
+
|
|
1089
|
+
# Update system prompt to include the new document
|
|
1090
|
+
if hasattr(self, "_update_system_prompt"):
|
|
1091
|
+
self._update_system_prompt()
|
|
1092
|
+
|
|
1093
|
+
# Return detailed stats from RAG SDK
|
|
1094
|
+
return {
|
|
1095
|
+
"status": "success",
|
|
1096
|
+
"message": f"Successfully indexed: {result.get('file_name', file_path)}",
|
|
1097
|
+
"file_name": result.get("file_name"),
|
|
1098
|
+
"file_type": result.get("file_type"),
|
|
1099
|
+
"file_size_mb": result.get("file_size_mb"),
|
|
1100
|
+
"num_pages": result.get("num_pages"),
|
|
1101
|
+
"num_chunks": result.get("num_chunks"),
|
|
1102
|
+
"total_indexed_files": result.get("total_indexed_files"),
|
|
1103
|
+
"total_chunks": result.get("total_chunks"),
|
|
1104
|
+
"from_cache": result.get("from_cache", False),
|
|
1105
|
+
"already_indexed": result.get("already_indexed", False),
|
|
1106
|
+
"reindexed": result.get("reindexed", False),
|
|
1107
|
+
}
|
|
1108
|
+
else:
|
|
1109
|
+
return {
|
|
1110
|
+
"status": "error",
|
|
1111
|
+
"error": result.get("error", f"Failed to index: {file_path}"),
|
|
1112
|
+
"file_name": result.get("file_name", Path(file_path).name),
|
|
1113
|
+
}
|
|
1114
|
+
except Exception as e:
|
|
1115
|
+
logger.error(f"Error indexing document: {e}")
|
|
1116
|
+
return {
|
|
1117
|
+
"status": "error",
|
|
1118
|
+
"error": str(e),
|
|
1119
|
+
"has_errors": True,
|
|
1120
|
+
"operation": "index_document",
|
|
1121
|
+
"file": file_path,
|
|
1122
|
+
"hint": "Failed to index document. Check if file exists and is readable.",
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
@tool(
|
|
1126
|
+
name="list_indexed_documents",
|
|
1127
|
+
description="List all currently indexed documents",
|
|
1128
|
+
parameters={},
|
|
1129
|
+
)
|
|
1130
|
+
def list_indexed_documents() -> Dict[str, Any]:
|
|
1131
|
+
"""List indexed documents."""
|
|
1132
|
+
try:
|
|
1133
|
+
docs = list(self.rag.indexed_files)
|
|
1134
|
+
return {
|
|
1135
|
+
"status": "success",
|
|
1136
|
+
"documents": [str(Path(d).name) for d in docs],
|
|
1137
|
+
"count": len(docs),
|
|
1138
|
+
"total_chunks": len(self.rag.chunks),
|
|
1139
|
+
}
|
|
1140
|
+
except Exception as e:
|
|
1141
|
+
logger.error(f"Error in list_indexed_documents: {e}")
|
|
1142
|
+
return {
|
|
1143
|
+
"status": "error",
|
|
1144
|
+
"error": str(e),
|
|
1145
|
+
"has_errors": True,
|
|
1146
|
+
"operation": "list_indexed_documents",
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
@tool(
|
|
1150
|
+
name="rag_status",
|
|
1151
|
+
description="Get the status of the RAG system",
|
|
1152
|
+
parameters={},
|
|
1153
|
+
)
|
|
1154
|
+
def rag_status() -> Dict[str, Any]:
|
|
1155
|
+
"""Get RAG system status."""
|
|
1156
|
+
try:
|
|
1157
|
+
status = self.rag.get_status()
|
|
1158
|
+
return {
|
|
1159
|
+
"status": "success",
|
|
1160
|
+
**status,
|
|
1161
|
+
"watched_directories": self.watch_directories,
|
|
1162
|
+
}
|
|
1163
|
+
except Exception as e:
|
|
1164
|
+
logger.error(f"Error in rag_status: {e}")
|
|
1165
|
+
return {
|
|
1166
|
+
"status": "error",
|
|
1167
|
+
"error": str(e),
|
|
1168
|
+
"has_errors": True,
|
|
1169
|
+
"operation": "rag_status",
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
@tool(
|
|
1173
|
+
name="summarize_document",
|
|
1174
|
+
description="Generate a comprehensive summary of a large indexed document by iterating through its content in sections. Best for getting an overview of lengthy documents.",
|
|
1175
|
+
parameters={
|
|
1176
|
+
"file_path": {
|
|
1177
|
+
"type": "str",
|
|
1178
|
+
"description": "Name or path of the document to summarize",
|
|
1179
|
+
"required": True,
|
|
1180
|
+
},
|
|
1181
|
+
"summary_type": {
|
|
1182
|
+
"type": "str",
|
|
1183
|
+
"description": "Type of summary: 'brief' (2-3 paragraphs), 'detailed' (comprehensive with all key points), 'bullets' (key points as bullets) - default: 'detailed'",
|
|
1184
|
+
"required": False,
|
|
1185
|
+
},
|
|
1186
|
+
"max_words_per_section": {
|
|
1187
|
+
"type": "int",
|
|
1188
|
+
"description": "Maximum words to process per section (default: 20000). Larger documents will be split into multiple sections and summarized iteratively.",
|
|
1189
|
+
"required": False,
|
|
1190
|
+
},
|
|
1191
|
+
},
|
|
1192
|
+
)
|
|
1193
|
+
def summarize_document(
|
|
1194
|
+
file_path: str,
|
|
1195
|
+
summary_type: str = "detailed",
|
|
1196
|
+
max_words_per_section: int = 20000,
|
|
1197
|
+
) -> Dict[str, Any]:
|
|
1198
|
+
"""
|
|
1199
|
+
Summarize a large document by iterating through its content.
|
|
1200
|
+
|
|
1201
|
+
For large documents, this will:
|
|
1202
|
+
1. Get the full text from cache (already extracted with VLM)
|
|
1203
|
+
2. Split into manageable sections based on word count
|
|
1204
|
+
3. Summarize each section with the LLM
|
|
1205
|
+
4. Combine section summaries into a final comprehensive summary
|
|
1206
|
+
"""
|
|
1207
|
+
try:
|
|
1208
|
+
|
|
1209
|
+
# Find the file in indexed files
|
|
1210
|
+
matching_files = [
|
|
1211
|
+
f for f in self.rag.indexed_files if file_path in str(f)
|
|
1212
|
+
]
|
|
1213
|
+
|
|
1214
|
+
if not matching_files:
|
|
1215
|
+
return {
|
|
1216
|
+
"status": "error",
|
|
1217
|
+
"error": f"Document '{file_path}' not found in indexed documents. Use index_document first.",
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
target_file = matching_files[0]
|
|
1221
|
+
|
|
1222
|
+
# Validate summary type
|
|
1223
|
+
valid_types = ["brief", "detailed", "bullets"]
|
|
1224
|
+
if summary_type not in valid_types:
|
|
1225
|
+
return {
|
|
1226
|
+
"status": "error",
|
|
1227
|
+
"error": f"Invalid summary_type '{summary_type}'. Valid types: {', '.join(valid_types)}",
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
# Get type-specific instruction
|
|
1231
|
+
type_instructions = {
|
|
1232
|
+
"brief": "Create a concise 2-3 paragraph summary highlighting the most important points and main themes.",
|
|
1233
|
+
"detailed": "Create a comprehensive summary covering all major topics, key points, and important details. Organize by sections if applicable.",
|
|
1234
|
+
"bullets": "Create a bullet-point summary of the key points, organizing related items together. Use sub-bullets for details.",
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
summary_instruction = type_instructions[summary_type]
|
|
1238
|
+
|
|
1239
|
+
# Get all chunks from the RAG index
|
|
1240
|
+
# Since we can't directly filter chunks by document, we'll use a workaround:
|
|
1241
|
+
# Extract text from the original PDF and chunk it
|
|
1242
|
+
|
|
1243
|
+
logger.info(f"Summarizing document: {target_file}")
|
|
1244
|
+
|
|
1245
|
+
# Use cached extracted text if available, otherwise extract
|
|
1246
|
+
try:
|
|
1247
|
+
# Check if we have cached metadata with full_text
|
|
1248
|
+
if (
|
|
1249
|
+
target_file in self.rag.file_metadata
|
|
1250
|
+
and "full_text" in self.rag.file_metadata[target_file]
|
|
1251
|
+
):
|
|
1252
|
+
# Use cached text - no need to re-run VLM or extraction!
|
|
1253
|
+
full_text = self.rag.file_metadata[target_file]["full_text"]
|
|
1254
|
+
logger.debug(
|
|
1255
|
+
f"Using cached extracted text for {Path(target_file).name}"
|
|
1256
|
+
)
|
|
1257
|
+
else:
|
|
1258
|
+
# Fallback: Extract text using RAG SDK's file extraction
|
|
1259
|
+
logger.warning(
|
|
1260
|
+
f"No cached text found for {Path(target_file).name}, extracting..."
|
|
1261
|
+
)
|
|
1262
|
+
# pylint: disable=protected-access
|
|
1263
|
+
full_text, _ = self.rag._extract_text_from_file(target_file)
|
|
1264
|
+
|
|
1265
|
+
if not full_text or not full_text.strip():
|
|
1266
|
+
return {
|
|
1267
|
+
"status": "error",
|
|
1268
|
+
"error": f"No text could be extracted from {Path(target_file).name}",
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
except Exception as e:
|
|
1272
|
+
return {
|
|
1273
|
+
"status": "error",
|
|
1274
|
+
"error": f"Failed to extract text from document: {e}",
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
# Split text into sections based on page boundaries
|
|
1278
|
+
# This is the simplest and most reliable semantic boundary
|
|
1279
|
+
|
|
1280
|
+
# Split by page markers while keeping the markers
|
|
1281
|
+
page_sections = re.split(r"(\[Page \d+\])", full_text)
|
|
1282
|
+
|
|
1283
|
+
# Recombine into complete pages
|
|
1284
|
+
pages = []
|
|
1285
|
+
current_page = ""
|
|
1286
|
+
|
|
1287
|
+
for part in page_sections:
|
|
1288
|
+
if re.match(r"\[Page \d+\]", part):
|
|
1289
|
+
# This is a page marker
|
|
1290
|
+
if current_page.strip():
|
|
1291
|
+
pages.append(current_page.strip())
|
|
1292
|
+
current_page = part + "\n"
|
|
1293
|
+
else:
|
|
1294
|
+
current_page += part
|
|
1295
|
+
|
|
1296
|
+
# Add last page
|
|
1297
|
+
if current_page.strip():
|
|
1298
|
+
pages.append(current_page.strip())
|
|
1299
|
+
|
|
1300
|
+
# Group pages into sections that fit within max_words_per_section
|
|
1301
|
+
# Include overlap: last page of previous section is included in next section
|
|
1302
|
+
sections = []
|
|
1303
|
+
current_section_pages = []
|
|
1304
|
+
current_word_count = 0
|
|
1305
|
+
overlap_pages = 1 # Number of pages to overlap between sections
|
|
1306
|
+
|
|
1307
|
+
for _page_idx, page in enumerate(pages):
|
|
1308
|
+
page_words = len(page.split())
|
|
1309
|
+
|
|
1310
|
+
if (
|
|
1311
|
+
current_word_count + page_words > max_words_per_section
|
|
1312
|
+
and current_section_pages
|
|
1313
|
+
):
|
|
1314
|
+
# Would exceed limit, save current section and start new with overlap
|
|
1315
|
+
sections.append("\n\n".join(current_section_pages))
|
|
1316
|
+
|
|
1317
|
+
# Start new section with overlap (include last N pages from previous section)
|
|
1318
|
+
overlap_start = max(
|
|
1319
|
+
0, len(current_section_pages) - overlap_pages
|
|
1320
|
+
)
|
|
1321
|
+
current_section_pages = current_section_pages[overlap_start:]
|
|
1322
|
+
current_word_count = sum(
|
|
1323
|
+
len(p.split()) for p in current_section_pages
|
|
1324
|
+
)
|
|
1325
|
+
|
|
1326
|
+
# Add page to current section
|
|
1327
|
+
current_section_pages.append(page)
|
|
1328
|
+
current_word_count += page_words
|
|
1329
|
+
|
|
1330
|
+
# Add last section
|
|
1331
|
+
if current_section_pages:
|
|
1332
|
+
sections.append("\n\n".join(current_section_pages))
|
|
1333
|
+
|
|
1334
|
+
total_words = len(full_text.split())
|
|
1335
|
+
logger.info(
|
|
1336
|
+
f"Document has {total_words} words, {len(pages)} pages, grouped into {len(sections)} sections"
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
# Get document metadata for enhanced summary
|
|
1340
|
+
file_metadata = self.rag.file_metadata.get(target_file, {})
|
|
1341
|
+
num_pages = file_metadata.get("num_pages", len(pages))
|
|
1342
|
+
_vlm_pages = file_metadata.get("vlm_pages", 0)
|
|
1343
|
+
|
|
1344
|
+
# If document is small enough (single section), summarize in one pass
|
|
1345
|
+
if len(sections) <= 1:
|
|
1346
|
+
prompt = f"""{summary_instruction}
|
|
1347
|
+
|
|
1348
|
+
Document to summarize: {Path(target_file).name}
|
|
1349
|
+
|
|
1350
|
+
Document content:
|
|
1351
|
+
{full_text}
|
|
1352
|
+
|
|
1353
|
+
Generate a well-structured summary with the following format:
|
|
1354
|
+
|
|
1355
|
+
# Document Summary: {Path(target_file).name}
|
|
1356
|
+
|
|
1357
|
+
## Document Information
|
|
1358
|
+
- **File**: {Path(target_file).name}
|
|
1359
|
+
- **Pages**: {num_pages}
|
|
1360
|
+
- **Total Words**: ~{total_words:,}
|
|
1361
|
+
|
|
1362
|
+
## Overview
|
|
1363
|
+
[2-3 sentence overview of what this document is]
|
|
1364
|
+
|
|
1365
|
+
## Key Content
|
|
1366
|
+
[Main content organized by topics/sections - reference page numbers where applicable]
|
|
1367
|
+
|
|
1368
|
+
## Key Takeaways
|
|
1369
|
+
[Bullet points of the most important points]
|
|
1370
|
+
|
|
1371
|
+
Use the {summary_type} style for the content sections."""
|
|
1372
|
+
|
|
1373
|
+
# Use chat SDK to generate summary
|
|
1374
|
+
try:
|
|
1375
|
+
# Use RAG's chat SDK for summary generation
|
|
1376
|
+
response = self.rag.chat.send(prompt)
|
|
1377
|
+
summary_text = response.text
|
|
1378
|
+
|
|
1379
|
+
return {
|
|
1380
|
+
"status": "success",
|
|
1381
|
+
"summary": summary_text,
|
|
1382
|
+
"summary_type": summary_type,
|
|
1383
|
+
"document": str(Path(target_file).name),
|
|
1384
|
+
"total_words": total_words,
|
|
1385
|
+
"sections_processed": 1,
|
|
1386
|
+
"instruction": "Present the summary to the user. The summary includes document metadata, structured sections, and page references.",
|
|
1387
|
+
}
|
|
1388
|
+
except Exception as e:
|
|
1389
|
+
logger.error(f"Error generating summary: {e}")
|
|
1390
|
+
return {
|
|
1391
|
+
"status": "error",
|
|
1392
|
+
"error": f"Failed to generate summary: {e}",
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
# For long documents, iterate over sections (preserving semantic boundaries)
|
|
1396
|
+
section_summaries = []
|
|
1397
|
+
num_sections = len(sections)
|
|
1398
|
+
|
|
1399
|
+
logger.info(f"Processing {num_sections} sections for summarization")
|
|
1400
|
+
|
|
1401
|
+
for section_num, section_text in enumerate(sections, 1):
|
|
1402
|
+
logger.info(
|
|
1403
|
+
f"Summarizing section {section_num}/{num_sections} ({len(section_text.split())} words)"
|
|
1404
|
+
)
|
|
1405
|
+
|
|
1406
|
+
# Generate summary for this section
|
|
1407
|
+
section_prompt = f"""This is section {section_num} of {num_sections} from the document.
|
|
1408
|
+
{summary_instruction}
|
|
1409
|
+
|
|
1410
|
+
Section content:
|
|
1411
|
+
{section_text}
|
|
1412
|
+
|
|
1413
|
+
Generate a summary of this section:"""
|
|
1414
|
+
|
|
1415
|
+
try:
|
|
1416
|
+
# Use RAG's chat SDK for section summary
|
|
1417
|
+
response = self.rag.chat.send(section_prompt)
|
|
1418
|
+
segment_summary = response.text
|
|
1419
|
+
|
|
1420
|
+
section_summaries.append(
|
|
1421
|
+
{"section": section_num, "summary": segment_summary}
|
|
1422
|
+
)
|
|
1423
|
+
except Exception as e:
|
|
1424
|
+
logger.warning(
|
|
1425
|
+
f"Failed to summarize segment {section_num}: {e}"
|
|
1426
|
+
)
|
|
1427
|
+
continue
|
|
1428
|
+
|
|
1429
|
+
# Combine section summaries into final summary
|
|
1430
|
+
if not section_summaries:
|
|
1431
|
+
return {
|
|
1432
|
+
"status": "error",
|
|
1433
|
+
"error": "Failed to generate any section summaries",
|
|
1434
|
+
}
|
|
1435
|
+
|
|
1436
|
+
# Final synthesis prompt with structured format
|
|
1437
|
+
combined_text = "\n\n".join(
|
|
1438
|
+
[
|
|
1439
|
+
f"Section {s['section']} Summary:\n{s['summary']}"
|
|
1440
|
+
for s in section_summaries
|
|
1441
|
+
]
|
|
1442
|
+
)
|
|
1443
|
+
|
|
1444
|
+
final_prompt = f"""You have summaries of {len(section_summaries)} sections from the document: {Path(target_file).name}
|
|
1445
|
+
|
|
1446
|
+
Section summaries:
|
|
1447
|
+
{combined_text}
|
|
1448
|
+
|
|
1449
|
+
Synthesize these into a single, well-structured summary using this format:
|
|
1450
|
+
|
|
1451
|
+
# Document Summary: {Path(target_file).name}
|
|
1452
|
+
|
|
1453
|
+
## Document Information
|
|
1454
|
+
- **File**: {Path(target_file).name}
|
|
1455
|
+
- **Pages**: {num_pages}
|
|
1456
|
+
- **Total Words**: ~{total_words:,}
|
|
1457
|
+
- **Sections Processed**: {len(section_summaries)}
|
|
1458
|
+
|
|
1459
|
+
## Overview
|
|
1460
|
+
[2-3 sentence overview synthesizing all sections]
|
|
1461
|
+
|
|
1462
|
+
## Key Content
|
|
1463
|
+
[Main content organized by topics - consolidate from all section summaries, reference page numbers]
|
|
1464
|
+
|
|
1465
|
+
## Key Takeaways
|
|
1466
|
+
[Bullet points of the most important points from across all sections]
|
|
1467
|
+
|
|
1468
|
+
Use the {summary_type} style. Ensure page references from section summaries are preserved."""
|
|
1469
|
+
|
|
1470
|
+
try:
|
|
1471
|
+
# Use RAG's chat SDK for final summary synthesis
|
|
1472
|
+
response = self.rag.chat.send(final_prompt)
|
|
1473
|
+
final_summary = response.text
|
|
1474
|
+
|
|
1475
|
+
return {
|
|
1476
|
+
"status": "success",
|
|
1477
|
+
"summary": final_summary,
|
|
1478
|
+
"summary_type": summary_type,
|
|
1479
|
+
"document": str(Path(target_file).name),
|
|
1480
|
+
"total_words": total_words,
|
|
1481
|
+
"sections_processed": len(section_summaries),
|
|
1482
|
+
"section_summaries": section_summaries,
|
|
1483
|
+
"instruction": "Present the formatted summary to the user. The summary includes document metadata, organized sections with page references, and key takeaways.",
|
|
1484
|
+
}
|
|
1485
|
+
except Exception as e:
|
|
1486
|
+
logger.error(f"Error synthesizing final summary: {e}")
|
|
1487
|
+
# Return segment summaries as fallback
|
|
1488
|
+
return {
|
|
1489
|
+
"status": "partial",
|
|
1490
|
+
"message": "Could not synthesize final summary, returning segment summaries",
|
|
1491
|
+
"summary_style": summary_type,
|
|
1492
|
+
"document": str(Path(target_file).name),
|
|
1493
|
+
"total_words": total_words,
|
|
1494
|
+
"iterations": len(section_summaries),
|
|
1495
|
+
"segment_summaries": section_summaries,
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
except Exception as e:
|
|
1499
|
+
logger.error(f"Error in summarize_document: {e}")
|
|
1500
|
+
return {
|
|
1501
|
+
"status": "error",
|
|
1502
|
+
"error": str(e),
|
|
1503
|
+
"has_errors": True,
|
|
1504
|
+
"operation": "summarize_document",
|
|
1505
|
+
"file": target_file,
|
|
1506
|
+
"hint": "Failed to generate summary. Try using query_documents to get specific information instead.",
|
|
1507
|
+
}
|
|
1508
|
+
|
|
1509
|
+
# NOTE: search_file and search_directory tools are now provided by
|
|
1510
|
+
# FileSearchToolsMixin from gaia.agents.tools.file_tools
|
|
1511
|
+
# This provides shared file search functionality across all agents
|
|
1512
|
+
|
|
1513
|
+
@tool(
|
|
1514
|
+
name="dump_document",
|
|
1515
|
+
description="Export the cached extracted text from an indexed document to a markdown file. Useful for reviewing extracted content or debugging.",
|
|
1516
|
+
parameters={
|
|
1517
|
+
"file_name": {
|
|
1518
|
+
"type": "str",
|
|
1519
|
+
"description": "Name or path of the indexed document to dump",
|
|
1520
|
+
"required": True,
|
|
1521
|
+
},
|
|
1522
|
+
"output_path": {
|
|
1523
|
+
"type": "str",
|
|
1524
|
+
"description": "Output path for the markdown file (optional, defaults to .gaia/{filename}.md)",
|
|
1525
|
+
"required": False,
|
|
1526
|
+
},
|
|
1527
|
+
},
|
|
1528
|
+
)
|
|
1529
|
+
def dump_document(file_name: str, output_path: str = None) -> Dict[str, Any]:
|
|
1530
|
+
"""
|
|
1531
|
+
Export cached extracted text from an indexed document.
|
|
1532
|
+
|
|
1533
|
+
This uses the cached full_text from file_metadata, avoiding re-extraction.
|
|
1534
|
+
"""
|
|
1535
|
+
try:
|
|
1536
|
+
|
|
1537
|
+
# Find the file in indexed files
|
|
1538
|
+
matching_files = [
|
|
1539
|
+
f for f in self.rag.indexed_files if file_name in str(f)
|
|
1540
|
+
]
|
|
1541
|
+
|
|
1542
|
+
if not matching_files:
|
|
1543
|
+
return {
|
|
1544
|
+
"status": "error",
|
|
1545
|
+
"error": f"Document '{file_name}' not found in indexed documents.",
|
|
1546
|
+
"hint": "Use list_indexed_documents to see available documents.",
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1549
|
+
target_file = matching_files[0]
|
|
1550
|
+
|
|
1551
|
+
# Get cached text from metadata
|
|
1552
|
+
if target_file not in self.rag.file_metadata:
|
|
1553
|
+
return {
|
|
1554
|
+
"status": "error",
|
|
1555
|
+
"error": f"No cached metadata found for {Path(target_file).name}",
|
|
1556
|
+
"hint": "Document may need to be re-indexed.",
|
|
1557
|
+
}
|
|
1558
|
+
|
|
1559
|
+
metadata = self.rag.file_metadata[target_file]
|
|
1560
|
+
full_text = metadata.get("full_text", "")
|
|
1561
|
+
|
|
1562
|
+
if not full_text:
|
|
1563
|
+
return {
|
|
1564
|
+
"status": "error",
|
|
1565
|
+
"error": f"No extracted text found in cache for {Path(target_file).name}",
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
# Determine output path
|
|
1569
|
+
if output_path is None:
|
|
1570
|
+
output_filename = Path(target_file).stem + "_extracted.md"
|
|
1571
|
+
output_path = os.path.join(
|
|
1572
|
+
self.rag.config.cache_dir, output_filename
|
|
1573
|
+
)
|
|
1574
|
+
else:
|
|
1575
|
+
output_path = str(Path(output_path).resolve())
|
|
1576
|
+
|
|
1577
|
+
# Write markdown file with metadata header
|
|
1578
|
+
markdown_content = f"""# Extracted Text from {Path(target_file).name}
|
|
1579
|
+
|
|
1580
|
+
**Source File:** {target_file}
|
|
1581
|
+
**Extraction Date:** {metadata.get('index_time', 'Unknown')}
|
|
1582
|
+
**Pages:** {metadata.get('num_pages', 'N/A')}
|
|
1583
|
+
**VLM Pages:** {metadata.get('vlm_pages', 0)}
|
|
1584
|
+
**Total Images:** {metadata.get('total_images', 0)}
|
|
1585
|
+
|
|
1586
|
+
---
|
|
1587
|
+
|
|
1588
|
+
{full_text}
|
|
1589
|
+
"""
|
|
1590
|
+
|
|
1591
|
+
# Ensure output directory exists
|
|
1592
|
+
os.makedirs(
|
|
1593
|
+
(
|
|
1594
|
+
os.path.dirname(output_path)
|
|
1595
|
+
if os.path.dirname(output_path)
|
|
1596
|
+
else "."
|
|
1597
|
+
),
|
|
1598
|
+
exist_ok=True,
|
|
1599
|
+
)
|
|
1600
|
+
|
|
1601
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
1602
|
+
f.write(markdown_content)
|
|
1603
|
+
|
|
1604
|
+
return {
|
|
1605
|
+
"status": "success",
|
|
1606
|
+
"output_path": output_path,
|
|
1607
|
+
"text_length": len(full_text),
|
|
1608
|
+
"num_pages": metadata.get("num_pages", "N/A"),
|
|
1609
|
+
"vlm_pages": metadata.get("vlm_pages", 0),
|
|
1610
|
+
"message": f"Exported extracted text to {output_path}",
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
except Exception as e:
|
|
1614
|
+
logger.error(f"Error dumping document: {e}")
|
|
1615
|
+
return {
|
|
1616
|
+
"status": "error",
|
|
1617
|
+
"error": str(e),
|
|
1618
|
+
"has_errors": True,
|
|
1619
|
+
"operation": "dump_document",
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
@tool(
|
|
1623
|
+
name="index_directory",
|
|
1624
|
+
description="Index all supported files in a directory. Supports PDF, TXT, CSV, JSON, and code files.",
|
|
1625
|
+
parameters={
|
|
1626
|
+
"directory_path": {
|
|
1627
|
+
"type": "str",
|
|
1628
|
+
"description": "Path to directory to index",
|
|
1629
|
+
"required": True,
|
|
1630
|
+
},
|
|
1631
|
+
"recursive": {
|
|
1632
|
+
"type": "bool",
|
|
1633
|
+
"description": "Whether to recursively index subdirectories (default: False)",
|
|
1634
|
+
"required": False,
|
|
1635
|
+
},
|
|
1636
|
+
},
|
|
1637
|
+
)
|
|
1638
|
+
def index_directory(
|
|
1639
|
+
directory_path: str, recursive: bool = False
|
|
1640
|
+
) -> Dict[str, Any]:
|
|
1641
|
+
"""
|
|
1642
|
+
Index all supported files in a directory.
|
|
1643
|
+
|
|
1644
|
+
Returns statistics about indexed files.
|
|
1645
|
+
"""
|
|
1646
|
+
try:
|
|
1647
|
+
dir_path = Path(directory_path).resolve()
|
|
1648
|
+
|
|
1649
|
+
if not dir_path.exists():
|
|
1650
|
+
return {
|
|
1651
|
+
"status": "error",
|
|
1652
|
+
"error": f"Directory does not exist: {directory_path}",
|
|
1653
|
+
"has_errors": True,
|
|
1654
|
+
}
|
|
1655
|
+
|
|
1656
|
+
if not dir_path.is_dir():
|
|
1657
|
+
return {
|
|
1658
|
+
"status": "error",
|
|
1659
|
+
"error": f"Path is not a directory: {directory_path}",
|
|
1660
|
+
"has_errors": True,
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
logger.info(f"Indexing directory: {dir_path} (recursive={recursive})")
|
|
1664
|
+
|
|
1665
|
+
# Supported file extensions
|
|
1666
|
+
supported_extensions = {
|
|
1667
|
+
".pdf",
|
|
1668
|
+
".txt",
|
|
1669
|
+
".csv",
|
|
1670
|
+
".json",
|
|
1671
|
+
".py",
|
|
1672
|
+
".js",
|
|
1673
|
+
".java",
|
|
1674
|
+
".cpp",
|
|
1675
|
+
".c",
|
|
1676
|
+
".h",
|
|
1677
|
+
".md",
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
indexed_files = []
|
|
1681
|
+
failed_files = []
|
|
1682
|
+
skipped_files = []
|
|
1683
|
+
|
|
1684
|
+
# Get files to index
|
|
1685
|
+
if recursive:
|
|
1686
|
+
files_to_index = [f for f in dir_path.rglob("*") if f.is_file()]
|
|
1687
|
+
else:
|
|
1688
|
+
files_to_index = [f for f in dir_path.iterdir() if f.is_file()]
|
|
1689
|
+
|
|
1690
|
+
for file_path in files_to_index:
|
|
1691
|
+
if file_path.suffix.lower() in supported_extensions:
|
|
1692
|
+
try:
|
|
1693
|
+
# Use the RAG SDK to index the file
|
|
1694
|
+
success = self.rag.index_document(str(file_path))
|
|
1695
|
+
if success:
|
|
1696
|
+
indexed_files.append(str(file_path))
|
|
1697
|
+
logger.info(f"Indexed: {file_path.name}")
|
|
1698
|
+
else:
|
|
1699
|
+
failed_files.append(str(file_path))
|
|
1700
|
+
except Exception as e:
|
|
1701
|
+
logger.warning(f"Failed to index {file_path}: {e}")
|
|
1702
|
+
failed_files.append(str(file_path))
|
|
1703
|
+
else:
|
|
1704
|
+
skipped_files.append(str(file_path))
|
|
1705
|
+
|
|
1706
|
+
# Update system prompt after indexing directory
|
|
1707
|
+
if indexed_files and hasattr(self, "_update_system_prompt"):
|
|
1708
|
+
self._update_system_prompt()
|
|
1709
|
+
|
|
1710
|
+
return {
|
|
1711
|
+
"status": "success",
|
|
1712
|
+
"indexed_count": len(indexed_files),
|
|
1713
|
+
"failed_count": len(failed_files),
|
|
1714
|
+
"skipped_count": len(skipped_files),
|
|
1715
|
+
"indexed_files": [Path(f).name for f in indexed_files],
|
|
1716
|
+
"failed_files": (
|
|
1717
|
+
[Path(f).name for f in failed_files] if failed_files else []
|
|
1718
|
+
),
|
|
1719
|
+
"message": f"Indexed {len(indexed_files)} files from {dir_path.name}",
|
|
1720
|
+
}
|
|
1721
|
+
|
|
1722
|
+
except Exception as e:
|
|
1723
|
+
logger.error(f"Error indexing directory: {e}")
|
|
1724
|
+
return {
|
|
1725
|
+
"status": "error",
|
|
1726
|
+
"error": str(e),
|
|
1727
|
+
"has_errors": True,
|
|
1728
|
+
"operation": "index_directory",
|
|
1729
|
+
}
|