amd-gaia 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (800) hide show
  1. amd_gaia-0.14.1.dist-info/METADATA +768 -0
  2. amd_gaia-0.14.1.dist-info/RECORD +800 -0
  3. amd_gaia-0.14.1.dist-info/WHEEL +5 -0
  4. amd_gaia-0.14.1.dist-info/entry_points.txt +5 -0
  5. amd_gaia-0.14.1.dist-info/licenses/LICENSE.md +21 -0
  6. amd_gaia-0.14.1.dist-info/top_level.txt +1 -0
  7. gaia/__init__.py +2 -0
  8. gaia/agents/__init__.py +19 -0
  9. gaia/agents/base/__init__.py +9 -0
  10. gaia/agents/base/agent.py +2072 -0
  11. gaia/agents/base/api_agent.py +120 -0
  12. gaia/agents/base/console.py +1457 -0
  13. gaia/agents/base/mcp_agent.py +86 -0
  14. gaia/agents/base/tools.py +83 -0
  15. gaia/agents/blender/agent.py +556 -0
  16. gaia/agents/blender/agent_simple.py +135 -0
  17. gaia/agents/blender/app.py +211 -0
  18. gaia/agents/blender/app_simple.py +41 -0
  19. gaia/agents/blender/core/__init__.py +16 -0
  20. gaia/agents/blender/core/materials.py +506 -0
  21. gaia/agents/blender/core/objects.py +316 -0
  22. gaia/agents/blender/core/rendering.py +225 -0
  23. gaia/agents/blender/core/scene.py +220 -0
  24. gaia/agents/blender/core/view.py +146 -0
  25. gaia/agents/chat/__init__.py +9 -0
  26. gaia/agents/chat/agent.py +975 -0
  27. gaia/agents/chat/app.py +1058 -0
  28. gaia/agents/chat/session.py +508 -0
  29. gaia/agents/chat/tools/__init__.py +15 -0
  30. gaia/agents/chat/tools/file_tools.py +96 -0
  31. gaia/agents/chat/tools/rag_tools.py +1729 -0
  32. gaia/agents/chat/tools/shell_tools.py +436 -0
  33. gaia/agents/code/__init__.py +7 -0
  34. gaia/agents/code/agent.py +547 -0
  35. gaia/agents/code/app.py +266 -0
  36. gaia/agents/code/models.py +135 -0
  37. gaia/agents/code/orchestration/__init__.py +24 -0
  38. gaia/agents/code/orchestration/checklist_executor.py +1739 -0
  39. gaia/agents/code/orchestration/checklist_generator.py +709 -0
  40. gaia/agents/code/orchestration/factories/__init__.py +9 -0
  41. gaia/agents/code/orchestration/factories/base.py +63 -0
  42. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -0
  43. gaia/agents/code/orchestration/factories/python_factory.py +106 -0
  44. gaia/agents/code/orchestration/orchestrator.py +610 -0
  45. gaia/agents/code/orchestration/project_analyzer.py +391 -0
  46. gaia/agents/code/orchestration/steps/__init__.py +67 -0
  47. gaia/agents/code/orchestration/steps/base.py +188 -0
  48. gaia/agents/code/orchestration/steps/error_handler.py +314 -0
  49. gaia/agents/code/orchestration/steps/nextjs.py +828 -0
  50. gaia/agents/code/orchestration/steps/python.py +307 -0
  51. gaia/agents/code/orchestration/template_catalog.py +463 -0
  52. gaia/agents/code/orchestration/workflows/__init__.py +14 -0
  53. gaia/agents/code/orchestration/workflows/base.py +80 -0
  54. gaia/agents/code/orchestration/workflows/nextjs.py +186 -0
  55. gaia/agents/code/orchestration/workflows/python.py +94 -0
  56. gaia/agents/code/prompts/__init__.py +11 -0
  57. gaia/agents/code/prompts/base_prompt.py +77 -0
  58. gaia/agents/code/prompts/code_patterns.py +1925 -0
  59. gaia/agents/code/prompts/nextjs_prompt.py +40 -0
  60. gaia/agents/code/prompts/python_prompt.py +109 -0
  61. gaia/agents/code/schema_inference.py +365 -0
  62. gaia/agents/code/system_prompt.py +41 -0
  63. gaia/agents/code/tools/__init__.py +42 -0
  64. gaia/agents/code/tools/cli_tools.py +1138 -0
  65. gaia/agents/code/tools/code_formatting.py +319 -0
  66. gaia/agents/code/tools/code_tools.py +769 -0
  67. gaia/agents/code/tools/error_fixing.py +1347 -0
  68. gaia/agents/code/tools/external_tools.py +180 -0
  69. gaia/agents/code/tools/file_io.py +845 -0
  70. gaia/agents/code/tools/prisma_tools.py +190 -0
  71. gaia/agents/code/tools/project_management.py +1016 -0
  72. gaia/agents/code/tools/testing.py +321 -0
  73. gaia/agents/code/tools/typescript_tools.py +122 -0
  74. gaia/agents/code/tools/validation_parsing.py +461 -0
  75. gaia/agents/code/tools/validation_tools.py +803 -0
  76. gaia/agents/code/tools/web_dev_tools.py +1744 -0
  77. gaia/agents/code/validators/__init__.py +16 -0
  78. gaia/agents/code/validators/antipattern_checker.py +241 -0
  79. gaia/agents/code/validators/ast_analyzer.py +197 -0
  80. gaia/agents/code/validators/requirements_validator.py +145 -0
  81. gaia/agents/code/validators/syntax_validator.py +171 -0
  82. gaia/agents/docker/__init__.py +7 -0
  83. gaia/agents/docker/agent.py +642 -0
  84. gaia/agents/jira/__init__.py +11 -0
  85. gaia/agents/jira/agent.py +894 -0
  86. gaia/agents/jira/jql_templates.py +299 -0
  87. gaia/agents/routing/__init__.py +7 -0
  88. gaia/agents/routing/agent.py +512 -0
  89. gaia/agents/routing/system_prompt.py +75 -0
  90. gaia/api/__init__.py +23 -0
  91. gaia/api/agent_registry.py +238 -0
  92. gaia/api/app.py +305 -0
  93. gaia/api/openai_server.py +575 -0
  94. gaia/api/schemas.py +186 -0
  95. gaia/api/sse_handler.py +370 -0
  96. gaia/apps/__init__.py +4 -0
  97. gaia/apps/llm/__init__.py +6 -0
  98. gaia/apps/llm/app.py +169 -0
  99. gaia/apps/summarize/app.py +633 -0
  100. gaia/apps/summarize/html_viewer.py +133 -0
  101. gaia/apps/summarize/pdf_formatter.py +284 -0
  102. gaia/audio/__init__.py +2 -0
  103. gaia/audio/audio_client.py +439 -0
  104. gaia/audio/audio_recorder.py +269 -0
  105. gaia/audio/kokoro_tts.py +599 -0
  106. gaia/audio/whisper_asr.py +432 -0
  107. gaia/chat/__init__.py +16 -0
  108. gaia/chat/app.py +430 -0
  109. gaia/chat/prompts.py +522 -0
  110. gaia/chat/sdk.py +1200 -0
  111. gaia/cli.py +5621 -0
  112. gaia/eval/batch_experiment.py +2332 -0
  113. gaia/eval/claude.py +542 -0
  114. gaia/eval/config.py +37 -0
  115. gaia/eval/email_generator.py +512 -0
  116. gaia/eval/eval.py +3179 -0
  117. gaia/eval/groundtruth.py +1130 -0
  118. gaia/eval/transcript_generator.py +582 -0
  119. gaia/eval/webapp/README.md +168 -0
  120. gaia/eval/webapp/node_modules/.bin/mime +16 -0
  121. gaia/eval/webapp/node_modules/.bin/mime.cmd +17 -0
  122. gaia/eval/webapp/node_modules/.bin/mime.ps1 +28 -0
  123. gaia/eval/webapp/node_modules/.package-lock.json +865 -0
  124. gaia/eval/webapp/node_modules/accepts/HISTORY.md +243 -0
  125. gaia/eval/webapp/node_modules/accepts/LICENSE +23 -0
  126. gaia/eval/webapp/node_modules/accepts/README.md +140 -0
  127. gaia/eval/webapp/node_modules/accepts/index.js +238 -0
  128. gaia/eval/webapp/node_modules/accepts/package.json +47 -0
  129. gaia/eval/webapp/node_modules/array-flatten/LICENSE +21 -0
  130. gaia/eval/webapp/node_modules/array-flatten/README.md +43 -0
  131. gaia/eval/webapp/node_modules/array-flatten/array-flatten.js +64 -0
  132. gaia/eval/webapp/node_modules/array-flatten/package.json +39 -0
  133. gaia/eval/webapp/node_modules/body-parser/HISTORY.md +672 -0
  134. gaia/eval/webapp/node_modules/body-parser/LICENSE +23 -0
  135. gaia/eval/webapp/node_modules/body-parser/README.md +476 -0
  136. gaia/eval/webapp/node_modules/body-parser/SECURITY.md +25 -0
  137. gaia/eval/webapp/node_modules/body-parser/index.js +156 -0
  138. gaia/eval/webapp/node_modules/body-parser/lib/read.js +205 -0
  139. gaia/eval/webapp/node_modules/body-parser/lib/types/json.js +247 -0
  140. gaia/eval/webapp/node_modules/body-parser/lib/types/raw.js +101 -0
  141. gaia/eval/webapp/node_modules/body-parser/lib/types/text.js +121 -0
  142. gaia/eval/webapp/node_modules/body-parser/lib/types/urlencoded.js +307 -0
  143. gaia/eval/webapp/node_modules/body-parser/package.json +56 -0
  144. gaia/eval/webapp/node_modules/bytes/History.md +97 -0
  145. gaia/eval/webapp/node_modules/bytes/LICENSE +23 -0
  146. gaia/eval/webapp/node_modules/bytes/Readme.md +152 -0
  147. gaia/eval/webapp/node_modules/bytes/index.js +170 -0
  148. gaia/eval/webapp/node_modules/bytes/package.json +42 -0
  149. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.eslintrc +17 -0
  150. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.github/FUNDING.yml +12 -0
  151. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.nycrc +9 -0
  152. gaia/eval/webapp/node_modules/call-bind-apply-helpers/CHANGELOG.md +30 -0
  153. gaia/eval/webapp/node_modules/call-bind-apply-helpers/LICENSE +21 -0
  154. gaia/eval/webapp/node_modules/call-bind-apply-helpers/README.md +62 -0
  155. gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.d.ts +1 -0
  156. gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.js +10 -0
  157. gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.d.ts +19 -0
  158. gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.js +10 -0
  159. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.d.ts +1 -0
  160. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.js +4 -0
  161. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.d.ts +1 -0
  162. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.js +4 -0
  163. gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.d.ts +64 -0
  164. gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.js +15 -0
  165. gaia/eval/webapp/node_modules/call-bind-apply-helpers/package.json +85 -0
  166. gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.d.ts +3 -0
  167. gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.js +4 -0
  168. gaia/eval/webapp/node_modules/call-bind-apply-helpers/test/index.js +63 -0
  169. gaia/eval/webapp/node_modules/call-bind-apply-helpers/tsconfig.json +9 -0
  170. gaia/eval/webapp/node_modules/call-bound/.eslintrc +13 -0
  171. gaia/eval/webapp/node_modules/call-bound/.github/FUNDING.yml +12 -0
  172. gaia/eval/webapp/node_modules/call-bound/.nycrc +9 -0
  173. gaia/eval/webapp/node_modules/call-bound/CHANGELOG.md +42 -0
  174. gaia/eval/webapp/node_modules/call-bound/LICENSE +21 -0
  175. gaia/eval/webapp/node_modules/call-bound/README.md +53 -0
  176. gaia/eval/webapp/node_modules/call-bound/index.d.ts +94 -0
  177. gaia/eval/webapp/node_modules/call-bound/index.js +19 -0
  178. gaia/eval/webapp/node_modules/call-bound/package.json +99 -0
  179. gaia/eval/webapp/node_modules/call-bound/test/index.js +61 -0
  180. gaia/eval/webapp/node_modules/call-bound/tsconfig.json +10 -0
  181. gaia/eval/webapp/node_modules/content-disposition/HISTORY.md +60 -0
  182. gaia/eval/webapp/node_modules/content-disposition/LICENSE +22 -0
  183. gaia/eval/webapp/node_modules/content-disposition/README.md +142 -0
  184. gaia/eval/webapp/node_modules/content-disposition/index.js +458 -0
  185. gaia/eval/webapp/node_modules/content-disposition/package.json +44 -0
  186. gaia/eval/webapp/node_modules/content-type/HISTORY.md +29 -0
  187. gaia/eval/webapp/node_modules/content-type/LICENSE +22 -0
  188. gaia/eval/webapp/node_modules/content-type/README.md +94 -0
  189. gaia/eval/webapp/node_modules/content-type/index.js +225 -0
  190. gaia/eval/webapp/node_modules/content-type/package.json +42 -0
  191. gaia/eval/webapp/node_modules/cookie/LICENSE +24 -0
  192. gaia/eval/webapp/node_modules/cookie/README.md +317 -0
  193. gaia/eval/webapp/node_modules/cookie/SECURITY.md +25 -0
  194. gaia/eval/webapp/node_modules/cookie/index.js +334 -0
  195. gaia/eval/webapp/node_modules/cookie/package.json +44 -0
  196. gaia/eval/webapp/node_modules/cookie-signature/.npmignore +4 -0
  197. gaia/eval/webapp/node_modules/cookie-signature/History.md +38 -0
  198. gaia/eval/webapp/node_modules/cookie-signature/Readme.md +42 -0
  199. gaia/eval/webapp/node_modules/cookie-signature/index.js +51 -0
  200. gaia/eval/webapp/node_modules/cookie-signature/package.json +18 -0
  201. gaia/eval/webapp/node_modules/debug/.coveralls.yml +1 -0
  202. gaia/eval/webapp/node_modules/debug/.eslintrc +11 -0
  203. gaia/eval/webapp/node_modules/debug/.npmignore +9 -0
  204. gaia/eval/webapp/node_modules/debug/.travis.yml +14 -0
  205. gaia/eval/webapp/node_modules/debug/CHANGELOG.md +362 -0
  206. gaia/eval/webapp/node_modules/debug/LICENSE +19 -0
  207. gaia/eval/webapp/node_modules/debug/Makefile +50 -0
  208. gaia/eval/webapp/node_modules/debug/README.md +312 -0
  209. gaia/eval/webapp/node_modules/debug/component.json +19 -0
  210. gaia/eval/webapp/node_modules/debug/karma.conf.js +70 -0
  211. gaia/eval/webapp/node_modules/debug/node.js +1 -0
  212. gaia/eval/webapp/node_modules/debug/package.json +49 -0
  213. gaia/eval/webapp/node_modules/debug/src/browser.js +185 -0
  214. gaia/eval/webapp/node_modules/debug/src/debug.js +202 -0
  215. gaia/eval/webapp/node_modules/debug/src/index.js +10 -0
  216. gaia/eval/webapp/node_modules/debug/src/inspector-log.js +15 -0
  217. gaia/eval/webapp/node_modules/debug/src/node.js +248 -0
  218. gaia/eval/webapp/node_modules/depd/History.md +103 -0
  219. gaia/eval/webapp/node_modules/depd/LICENSE +22 -0
  220. gaia/eval/webapp/node_modules/depd/Readme.md +280 -0
  221. gaia/eval/webapp/node_modules/depd/index.js +538 -0
  222. gaia/eval/webapp/node_modules/depd/lib/browser/index.js +77 -0
  223. gaia/eval/webapp/node_modules/depd/package.json +45 -0
  224. gaia/eval/webapp/node_modules/destroy/LICENSE +23 -0
  225. gaia/eval/webapp/node_modules/destroy/README.md +63 -0
  226. gaia/eval/webapp/node_modules/destroy/index.js +209 -0
  227. gaia/eval/webapp/node_modules/destroy/package.json +48 -0
  228. gaia/eval/webapp/node_modules/dunder-proto/.eslintrc +5 -0
  229. gaia/eval/webapp/node_modules/dunder-proto/.github/FUNDING.yml +12 -0
  230. gaia/eval/webapp/node_modules/dunder-proto/.nycrc +13 -0
  231. gaia/eval/webapp/node_modules/dunder-proto/CHANGELOG.md +24 -0
  232. gaia/eval/webapp/node_modules/dunder-proto/LICENSE +21 -0
  233. gaia/eval/webapp/node_modules/dunder-proto/README.md +54 -0
  234. gaia/eval/webapp/node_modules/dunder-proto/get.d.ts +5 -0
  235. gaia/eval/webapp/node_modules/dunder-proto/get.js +30 -0
  236. gaia/eval/webapp/node_modules/dunder-proto/package.json +76 -0
  237. gaia/eval/webapp/node_modules/dunder-proto/set.d.ts +5 -0
  238. gaia/eval/webapp/node_modules/dunder-proto/set.js +35 -0
  239. gaia/eval/webapp/node_modules/dunder-proto/test/get.js +34 -0
  240. gaia/eval/webapp/node_modules/dunder-proto/test/index.js +4 -0
  241. gaia/eval/webapp/node_modules/dunder-proto/test/set.js +50 -0
  242. gaia/eval/webapp/node_modules/dunder-proto/tsconfig.json +9 -0
  243. gaia/eval/webapp/node_modules/ee-first/LICENSE +22 -0
  244. gaia/eval/webapp/node_modules/ee-first/README.md +80 -0
  245. gaia/eval/webapp/node_modules/ee-first/index.js +95 -0
  246. gaia/eval/webapp/node_modules/ee-first/package.json +29 -0
  247. gaia/eval/webapp/node_modules/encodeurl/LICENSE +22 -0
  248. gaia/eval/webapp/node_modules/encodeurl/README.md +109 -0
  249. gaia/eval/webapp/node_modules/encodeurl/index.js +60 -0
  250. gaia/eval/webapp/node_modules/encodeurl/package.json +40 -0
  251. gaia/eval/webapp/node_modules/es-define-property/.eslintrc +13 -0
  252. gaia/eval/webapp/node_modules/es-define-property/.github/FUNDING.yml +12 -0
  253. gaia/eval/webapp/node_modules/es-define-property/.nycrc +9 -0
  254. gaia/eval/webapp/node_modules/es-define-property/CHANGELOG.md +29 -0
  255. gaia/eval/webapp/node_modules/es-define-property/LICENSE +21 -0
  256. gaia/eval/webapp/node_modules/es-define-property/README.md +49 -0
  257. gaia/eval/webapp/node_modules/es-define-property/index.d.ts +3 -0
  258. gaia/eval/webapp/node_modules/es-define-property/index.js +14 -0
  259. gaia/eval/webapp/node_modules/es-define-property/package.json +81 -0
  260. gaia/eval/webapp/node_modules/es-define-property/test/index.js +56 -0
  261. gaia/eval/webapp/node_modules/es-define-property/tsconfig.json +10 -0
  262. gaia/eval/webapp/node_modules/es-errors/.eslintrc +5 -0
  263. gaia/eval/webapp/node_modules/es-errors/.github/FUNDING.yml +12 -0
  264. gaia/eval/webapp/node_modules/es-errors/CHANGELOG.md +40 -0
  265. gaia/eval/webapp/node_modules/es-errors/LICENSE +21 -0
  266. gaia/eval/webapp/node_modules/es-errors/README.md +55 -0
  267. gaia/eval/webapp/node_modules/es-errors/eval.d.ts +3 -0
  268. gaia/eval/webapp/node_modules/es-errors/eval.js +4 -0
  269. gaia/eval/webapp/node_modules/es-errors/index.d.ts +3 -0
  270. gaia/eval/webapp/node_modules/es-errors/index.js +4 -0
  271. gaia/eval/webapp/node_modules/es-errors/package.json +80 -0
  272. gaia/eval/webapp/node_modules/es-errors/range.d.ts +3 -0
  273. gaia/eval/webapp/node_modules/es-errors/range.js +4 -0
  274. gaia/eval/webapp/node_modules/es-errors/ref.d.ts +3 -0
  275. gaia/eval/webapp/node_modules/es-errors/ref.js +4 -0
  276. gaia/eval/webapp/node_modules/es-errors/syntax.d.ts +3 -0
  277. gaia/eval/webapp/node_modules/es-errors/syntax.js +4 -0
  278. gaia/eval/webapp/node_modules/es-errors/test/index.js +19 -0
  279. gaia/eval/webapp/node_modules/es-errors/tsconfig.json +49 -0
  280. gaia/eval/webapp/node_modules/es-errors/type.d.ts +3 -0
  281. gaia/eval/webapp/node_modules/es-errors/type.js +4 -0
  282. gaia/eval/webapp/node_modules/es-errors/uri.d.ts +3 -0
  283. gaia/eval/webapp/node_modules/es-errors/uri.js +4 -0
  284. gaia/eval/webapp/node_modules/es-object-atoms/.eslintrc +16 -0
  285. gaia/eval/webapp/node_modules/es-object-atoms/.github/FUNDING.yml +12 -0
  286. gaia/eval/webapp/node_modules/es-object-atoms/CHANGELOG.md +37 -0
  287. gaia/eval/webapp/node_modules/es-object-atoms/LICENSE +21 -0
  288. gaia/eval/webapp/node_modules/es-object-atoms/README.md +63 -0
  289. gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.d.ts +3 -0
  290. gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.js +11 -0
  291. gaia/eval/webapp/node_modules/es-object-atoms/ToObject.d.ts +7 -0
  292. gaia/eval/webapp/node_modules/es-object-atoms/ToObject.js +10 -0
  293. gaia/eval/webapp/node_modules/es-object-atoms/index.d.ts +3 -0
  294. gaia/eval/webapp/node_modules/es-object-atoms/index.js +4 -0
  295. gaia/eval/webapp/node_modules/es-object-atoms/isObject.d.ts +3 -0
  296. gaia/eval/webapp/node_modules/es-object-atoms/isObject.js +6 -0
  297. gaia/eval/webapp/node_modules/es-object-atoms/package.json +80 -0
  298. gaia/eval/webapp/node_modules/es-object-atoms/test/index.js +38 -0
  299. gaia/eval/webapp/node_modules/es-object-atoms/tsconfig.json +6 -0
  300. gaia/eval/webapp/node_modules/escape-html/LICENSE +24 -0
  301. gaia/eval/webapp/node_modules/escape-html/Readme.md +43 -0
  302. gaia/eval/webapp/node_modules/escape-html/index.js +78 -0
  303. gaia/eval/webapp/node_modules/escape-html/package.json +24 -0
  304. gaia/eval/webapp/node_modules/etag/HISTORY.md +83 -0
  305. gaia/eval/webapp/node_modules/etag/LICENSE +22 -0
  306. gaia/eval/webapp/node_modules/etag/README.md +159 -0
  307. gaia/eval/webapp/node_modules/etag/index.js +131 -0
  308. gaia/eval/webapp/node_modules/etag/package.json +47 -0
  309. gaia/eval/webapp/node_modules/express/History.md +3656 -0
  310. gaia/eval/webapp/node_modules/express/LICENSE +24 -0
  311. gaia/eval/webapp/node_modules/express/Readme.md +260 -0
  312. gaia/eval/webapp/node_modules/express/index.js +11 -0
  313. gaia/eval/webapp/node_modules/express/lib/application.js +661 -0
  314. gaia/eval/webapp/node_modules/express/lib/express.js +116 -0
  315. gaia/eval/webapp/node_modules/express/lib/middleware/init.js +43 -0
  316. gaia/eval/webapp/node_modules/express/lib/middleware/query.js +47 -0
  317. gaia/eval/webapp/node_modules/express/lib/request.js +525 -0
  318. gaia/eval/webapp/node_modules/express/lib/response.js +1179 -0
  319. gaia/eval/webapp/node_modules/express/lib/router/index.js +673 -0
  320. gaia/eval/webapp/node_modules/express/lib/router/layer.js +181 -0
  321. gaia/eval/webapp/node_modules/express/lib/router/route.js +230 -0
  322. gaia/eval/webapp/node_modules/express/lib/utils.js +303 -0
  323. gaia/eval/webapp/node_modules/express/lib/view.js +182 -0
  324. gaia/eval/webapp/node_modules/express/package.json +102 -0
  325. gaia/eval/webapp/node_modules/finalhandler/HISTORY.md +210 -0
  326. gaia/eval/webapp/node_modules/finalhandler/LICENSE +22 -0
  327. gaia/eval/webapp/node_modules/finalhandler/README.md +147 -0
  328. gaia/eval/webapp/node_modules/finalhandler/SECURITY.md +25 -0
  329. gaia/eval/webapp/node_modules/finalhandler/index.js +341 -0
  330. gaia/eval/webapp/node_modules/finalhandler/package.json +47 -0
  331. gaia/eval/webapp/node_modules/forwarded/HISTORY.md +21 -0
  332. gaia/eval/webapp/node_modules/forwarded/LICENSE +22 -0
  333. gaia/eval/webapp/node_modules/forwarded/README.md +57 -0
  334. gaia/eval/webapp/node_modules/forwarded/index.js +90 -0
  335. gaia/eval/webapp/node_modules/forwarded/package.json +45 -0
  336. gaia/eval/webapp/node_modules/fresh/HISTORY.md +70 -0
  337. gaia/eval/webapp/node_modules/fresh/LICENSE +23 -0
  338. gaia/eval/webapp/node_modules/fresh/README.md +119 -0
  339. gaia/eval/webapp/node_modules/fresh/index.js +137 -0
  340. gaia/eval/webapp/node_modules/fresh/package.json +46 -0
  341. gaia/eval/webapp/node_modules/fs/README.md +9 -0
  342. gaia/eval/webapp/node_modules/fs/package.json +20 -0
  343. gaia/eval/webapp/node_modules/function-bind/.eslintrc +21 -0
  344. gaia/eval/webapp/node_modules/function-bind/.github/FUNDING.yml +12 -0
  345. gaia/eval/webapp/node_modules/function-bind/.github/SECURITY.md +3 -0
  346. gaia/eval/webapp/node_modules/function-bind/.nycrc +13 -0
  347. gaia/eval/webapp/node_modules/function-bind/CHANGELOG.md +136 -0
  348. gaia/eval/webapp/node_modules/function-bind/LICENSE +20 -0
  349. gaia/eval/webapp/node_modules/function-bind/README.md +46 -0
  350. gaia/eval/webapp/node_modules/function-bind/implementation.js +84 -0
  351. gaia/eval/webapp/node_modules/function-bind/index.js +5 -0
  352. gaia/eval/webapp/node_modules/function-bind/package.json +87 -0
  353. gaia/eval/webapp/node_modules/function-bind/test/.eslintrc +9 -0
  354. gaia/eval/webapp/node_modules/function-bind/test/index.js +252 -0
  355. gaia/eval/webapp/node_modules/get-intrinsic/.eslintrc +42 -0
  356. gaia/eval/webapp/node_modules/get-intrinsic/.github/FUNDING.yml +12 -0
  357. gaia/eval/webapp/node_modules/get-intrinsic/.nycrc +9 -0
  358. gaia/eval/webapp/node_modules/get-intrinsic/CHANGELOG.md +186 -0
  359. gaia/eval/webapp/node_modules/get-intrinsic/LICENSE +21 -0
  360. gaia/eval/webapp/node_modules/get-intrinsic/README.md +71 -0
  361. gaia/eval/webapp/node_modules/get-intrinsic/index.js +378 -0
  362. gaia/eval/webapp/node_modules/get-intrinsic/package.json +97 -0
  363. gaia/eval/webapp/node_modules/get-intrinsic/test/GetIntrinsic.js +274 -0
  364. gaia/eval/webapp/node_modules/get-proto/.eslintrc +10 -0
  365. gaia/eval/webapp/node_modules/get-proto/.github/FUNDING.yml +12 -0
  366. gaia/eval/webapp/node_modules/get-proto/.nycrc +9 -0
  367. gaia/eval/webapp/node_modules/get-proto/CHANGELOG.md +21 -0
  368. gaia/eval/webapp/node_modules/get-proto/LICENSE +21 -0
  369. gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.d.ts +5 -0
  370. gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.js +6 -0
  371. gaia/eval/webapp/node_modules/get-proto/README.md +50 -0
  372. gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.d.ts +3 -0
  373. gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.js +4 -0
  374. gaia/eval/webapp/node_modules/get-proto/index.d.ts +5 -0
  375. gaia/eval/webapp/node_modules/get-proto/index.js +27 -0
  376. gaia/eval/webapp/node_modules/get-proto/package.json +81 -0
  377. gaia/eval/webapp/node_modules/get-proto/test/index.js +68 -0
  378. gaia/eval/webapp/node_modules/get-proto/tsconfig.json +9 -0
  379. gaia/eval/webapp/node_modules/gopd/.eslintrc +16 -0
  380. gaia/eval/webapp/node_modules/gopd/.github/FUNDING.yml +12 -0
  381. gaia/eval/webapp/node_modules/gopd/CHANGELOG.md +45 -0
  382. gaia/eval/webapp/node_modules/gopd/LICENSE +21 -0
  383. gaia/eval/webapp/node_modules/gopd/README.md +40 -0
  384. gaia/eval/webapp/node_modules/gopd/gOPD.d.ts +1 -0
  385. gaia/eval/webapp/node_modules/gopd/gOPD.js +4 -0
  386. gaia/eval/webapp/node_modules/gopd/index.d.ts +5 -0
  387. gaia/eval/webapp/node_modules/gopd/index.js +15 -0
  388. gaia/eval/webapp/node_modules/gopd/package.json +77 -0
  389. gaia/eval/webapp/node_modules/gopd/test/index.js +36 -0
  390. gaia/eval/webapp/node_modules/gopd/tsconfig.json +9 -0
  391. gaia/eval/webapp/node_modules/has-symbols/.eslintrc +11 -0
  392. gaia/eval/webapp/node_modules/has-symbols/.github/FUNDING.yml +12 -0
  393. gaia/eval/webapp/node_modules/has-symbols/.nycrc +9 -0
  394. gaia/eval/webapp/node_modules/has-symbols/CHANGELOG.md +91 -0
  395. gaia/eval/webapp/node_modules/has-symbols/LICENSE +21 -0
  396. gaia/eval/webapp/node_modules/has-symbols/README.md +46 -0
  397. gaia/eval/webapp/node_modules/has-symbols/index.d.ts +3 -0
  398. gaia/eval/webapp/node_modules/has-symbols/index.js +14 -0
  399. gaia/eval/webapp/node_modules/has-symbols/package.json +111 -0
  400. gaia/eval/webapp/node_modules/has-symbols/shams.d.ts +3 -0
  401. gaia/eval/webapp/node_modules/has-symbols/shams.js +45 -0
  402. gaia/eval/webapp/node_modules/has-symbols/test/index.js +22 -0
  403. gaia/eval/webapp/node_modules/has-symbols/test/shams/core-js.js +29 -0
  404. gaia/eval/webapp/node_modules/has-symbols/test/shams/get-own-property-symbols.js +29 -0
  405. gaia/eval/webapp/node_modules/has-symbols/test/tests.js +58 -0
  406. gaia/eval/webapp/node_modules/has-symbols/tsconfig.json +10 -0
  407. gaia/eval/webapp/node_modules/hasown/.eslintrc +5 -0
  408. gaia/eval/webapp/node_modules/hasown/.github/FUNDING.yml +12 -0
  409. gaia/eval/webapp/node_modules/hasown/.nycrc +13 -0
  410. gaia/eval/webapp/node_modules/hasown/CHANGELOG.md +40 -0
  411. gaia/eval/webapp/node_modules/hasown/LICENSE +21 -0
  412. gaia/eval/webapp/node_modules/hasown/README.md +40 -0
  413. gaia/eval/webapp/node_modules/hasown/index.d.ts +3 -0
  414. gaia/eval/webapp/node_modules/hasown/index.js +8 -0
  415. gaia/eval/webapp/node_modules/hasown/package.json +92 -0
  416. gaia/eval/webapp/node_modules/hasown/tsconfig.json +6 -0
  417. gaia/eval/webapp/node_modules/http-errors/HISTORY.md +180 -0
  418. gaia/eval/webapp/node_modules/http-errors/LICENSE +23 -0
  419. gaia/eval/webapp/node_modules/http-errors/README.md +169 -0
  420. gaia/eval/webapp/node_modules/http-errors/index.js +289 -0
  421. gaia/eval/webapp/node_modules/http-errors/package.json +50 -0
  422. gaia/eval/webapp/node_modules/iconv-lite/Changelog.md +162 -0
  423. gaia/eval/webapp/node_modules/iconv-lite/LICENSE +21 -0
  424. gaia/eval/webapp/node_modules/iconv-lite/README.md +156 -0
  425. gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-codec.js +555 -0
  426. gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-data.js +176 -0
  427. gaia/eval/webapp/node_modules/iconv-lite/encodings/index.js +22 -0
  428. gaia/eval/webapp/node_modules/iconv-lite/encodings/internal.js +188 -0
  429. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-codec.js +72 -0
  430. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data-generated.js +451 -0
  431. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data.js +174 -0
  432. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/big5-added.json +122 -0
  433. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp936.json +264 -0
  434. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp949.json +273 -0
  435. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp950.json +177 -0
  436. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/eucjp.json +182 -0
  437. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gb18030-ranges.json +1 -0
  438. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gbk-added.json +55 -0
  439. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/shiftjis.json +125 -0
  440. gaia/eval/webapp/node_modules/iconv-lite/encodings/utf16.js +177 -0
  441. gaia/eval/webapp/node_modules/iconv-lite/encodings/utf7.js +290 -0
  442. gaia/eval/webapp/node_modules/iconv-lite/lib/bom-handling.js +52 -0
  443. gaia/eval/webapp/node_modules/iconv-lite/lib/extend-node.js +217 -0
  444. gaia/eval/webapp/node_modules/iconv-lite/lib/index.d.ts +24 -0
  445. gaia/eval/webapp/node_modules/iconv-lite/lib/index.js +153 -0
  446. gaia/eval/webapp/node_modules/iconv-lite/lib/streams.js +121 -0
  447. gaia/eval/webapp/node_modules/iconv-lite/package.json +46 -0
  448. gaia/eval/webapp/node_modules/inherits/LICENSE +16 -0
  449. gaia/eval/webapp/node_modules/inherits/README.md +42 -0
  450. gaia/eval/webapp/node_modules/inherits/inherits.js +9 -0
  451. gaia/eval/webapp/node_modules/inherits/inherits_browser.js +27 -0
  452. gaia/eval/webapp/node_modules/inherits/package.json +29 -0
  453. gaia/eval/webapp/node_modules/ipaddr.js/LICENSE +19 -0
  454. gaia/eval/webapp/node_modules/ipaddr.js/README.md +233 -0
  455. gaia/eval/webapp/node_modules/ipaddr.js/ipaddr.min.js +1 -0
  456. gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js +673 -0
  457. gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js.d.ts +68 -0
  458. gaia/eval/webapp/node_modules/ipaddr.js/package.json +35 -0
  459. gaia/eval/webapp/node_modules/math-intrinsics/.eslintrc +16 -0
  460. gaia/eval/webapp/node_modules/math-intrinsics/.github/FUNDING.yml +12 -0
  461. gaia/eval/webapp/node_modules/math-intrinsics/CHANGELOG.md +24 -0
  462. gaia/eval/webapp/node_modules/math-intrinsics/LICENSE +21 -0
  463. gaia/eval/webapp/node_modules/math-intrinsics/README.md +50 -0
  464. gaia/eval/webapp/node_modules/math-intrinsics/abs.d.ts +1 -0
  465. gaia/eval/webapp/node_modules/math-intrinsics/abs.js +4 -0
  466. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.d.ts +3 -0
  467. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.js +4 -0
  468. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.d.ts +3 -0
  469. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.js +5 -0
  470. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.d.ts +3 -0
  471. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.js +5 -0
  472. gaia/eval/webapp/node_modules/math-intrinsics/floor.d.ts +1 -0
  473. gaia/eval/webapp/node_modules/math-intrinsics/floor.js +4 -0
  474. gaia/eval/webapp/node_modules/math-intrinsics/isFinite.d.ts +3 -0
  475. gaia/eval/webapp/node_modules/math-intrinsics/isFinite.js +12 -0
  476. gaia/eval/webapp/node_modules/math-intrinsics/isInteger.d.ts +3 -0
  477. gaia/eval/webapp/node_modules/math-intrinsics/isInteger.js +16 -0
  478. gaia/eval/webapp/node_modules/math-intrinsics/isNaN.d.ts +1 -0
  479. gaia/eval/webapp/node_modules/math-intrinsics/isNaN.js +6 -0
  480. gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.d.ts +3 -0
  481. gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.js +6 -0
  482. gaia/eval/webapp/node_modules/math-intrinsics/max.d.ts +1 -0
  483. gaia/eval/webapp/node_modules/math-intrinsics/max.js +4 -0
  484. gaia/eval/webapp/node_modules/math-intrinsics/min.d.ts +1 -0
  485. gaia/eval/webapp/node_modules/math-intrinsics/min.js +4 -0
  486. gaia/eval/webapp/node_modules/math-intrinsics/mod.d.ts +3 -0
  487. gaia/eval/webapp/node_modules/math-intrinsics/mod.js +9 -0
  488. gaia/eval/webapp/node_modules/math-intrinsics/package.json +86 -0
  489. gaia/eval/webapp/node_modules/math-intrinsics/pow.d.ts +1 -0
  490. gaia/eval/webapp/node_modules/math-intrinsics/pow.js +4 -0
  491. gaia/eval/webapp/node_modules/math-intrinsics/round.d.ts +1 -0
  492. gaia/eval/webapp/node_modules/math-intrinsics/round.js +4 -0
  493. gaia/eval/webapp/node_modules/math-intrinsics/sign.d.ts +3 -0
  494. gaia/eval/webapp/node_modules/math-intrinsics/sign.js +11 -0
  495. gaia/eval/webapp/node_modules/math-intrinsics/test/index.js +192 -0
  496. gaia/eval/webapp/node_modules/math-intrinsics/tsconfig.json +3 -0
  497. gaia/eval/webapp/node_modules/media-typer/HISTORY.md +22 -0
  498. gaia/eval/webapp/node_modules/media-typer/LICENSE +22 -0
  499. gaia/eval/webapp/node_modules/media-typer/README.md +81 -0
  500. gaia/eval/webapp/node_modules/media-typer/index.js +270 -0
  501. gaia/eval/webapp/node_modules/media-typer/package.json +26 -0
  502. gaia/eval/webapp/node_modules/merge-descriptors/HISTORY.md +21 -0
  503. gaia/eval/webapp/node_modules/merge-descriptors/LICENSE +23 -0
  504. gaia/eval/webapp/node_modules/merge-descriptors/README.md +49 -0
  505. gaia/eval/webapp/node_modules/merge-descriptors/index.js +60 -0
  506. gaia/eval/webapp/node_modules/merge-descriptors/package.json +39 -0
  507. gaia/eval/webapp/node_modules/methods/HISTORY.md +29 -0
  508. gaia/eval/webapp/node_modules/methods/LICENSE +24 -0
  509. gaia/eval/webapp/node_modules/methods/README.md +51 -0
  510. gaia/eval/webapp/node_modules/methods/index.js +69 -0
  511. gaia/eval/webapp/node_modules/methods/package.json +36 -0
  512. gaia/eval/webapp/node_modules/mime/.npmignore +0 -0
  513. gaia/eval/webapp/node_modules/mime/CHANGELOG.md +164 -0
  514. gaia/eval/webapp/node_modules/mime/LICENSE +21 -0
  515. gaia/eval/webapp/node_modules/mime/README.md +90 -0
  516. gaia/eval/webapp/node_modules/mime/cli.js +8 -0
  517. gaia/eval/webapp/node_modules/mime/mime.js +108 -0
  518. gaia/eval/webapp/node_modules/mime/package.json +44 -0
  519. gaia/eval/webapp/node_modules/mime/src/build.js +53 -0
  520. gaia/eval/webapp/node_modules/mime/src/test.js +60 -0
  521. gaia/eval/webapp/node_modules/mime/types.json +1 -0
  522. gaia/eval/webapp/node_modules/mime-db/HISTORY.md +507 -0
  523. gaia/eval/webapp/node_modules/mime-db/LICENSE +23 -0
  524. gaia/eval/webapp/node_modules/mime-db/README.md +100 -0
  525. gaia/eval/webapp/node_modules/mime-db/db.json +8519 -0
  526. gaia/eval/webapp/node_modules/mime-db/index.js +12 -0
  527. gaia/eval/webapp/node_modules/mime-db/package.json +60 -0
  528. gaia/eval/webapp/node_modules/mime-types/HISTORY.md +397 -0
  529. gaia/eval/webapp/node_modules/mime-types/LICENSE +23 -0
  530. gaia/eval/webapp/node_modules/mime-types/README.md +113 -0
  531. gaia/eval/webapp/node_modules/mime-types/index.js +188 -0
  532. gaia/eval/webapp/node_modules/mime-types/package.json +44 -0
  533. gaia/eval/webapp/node_modules/ms/index.js +152 -0
  534. gaia/eval/webapp/node_modules/ms/license.md +21 -0
  535. gaia/eval/webapp/node_modules/ms/package.json +37 -0
  536. gaia/eval/webapp/node_modules/ms/readme.md +51 -0
  537. gaia/eval/webapp/node_modules/negotiator/HISTORY.md +108 -0
  538. gaia/eval/webapp/node_modules/negotiator/LICENSE +24 -0
  539. gaia/eval/webapp/node_modules/negotiator/README.md +203 -0
  540. gaia/eval/webapp/node_modules/negotiator/index.js +82 -0
  541. gaia/eval/webapp/node_modules/negotiator/lib/charset.js +169 -0
  542. gaia/eval/webapp/node_modules/negotiator/lib/encoding.js +184 -0
  543. gaia/eval/webapp/node_modules/negotiator/lib/language.js +179 -0
  544. gaia/eval/webapp/node_modules/negotiator/lib/mediaType.js +294 -0
  545. gaia/eval/webapp/node_modules/negotiator/package.json +42 -0
  546. gaia/eval/webapp/node_modules/object-inspect/.eslintrc +53 -0
  547. gaia/eval/webapp/node_modules/object-inspect/.github/FUNDING.yml +12 -0
  548. gaia/eval/webapp/node_modules/object-inspect/.nycrc +13 -0
  549. gaia/eval/webapp/node_modules/object-inspect/CHANGELOG.md +424 -0
  550. gaia/eval/webapp/node_modules/object-inspect/LICENSE +21 -0
  551. gaia/eval/webapp/node_modules/object-inspect/example/all.js +23 -0
  552. gaia/eval/webapp/node_modules/object-inspect/example/circular.js +6 -0
  553. gaia/eval/webapp/node_modules/object-inspect/example/fn.js +5 -0
  554. gaia/eval/webapp/node_modules/object-inspect/example/inspect.js +10 -0
  555. gaia/eval/webapp/node_modules/object-inspect/index.js +544 -0
  556. gaia/eval/webapp/node_modules/object-inspect/package-support.json +20 -0
  557. gaia/eval/webapp/node_modules/object-inspect/package.json +105 -0
  558. gaia/eval/webapp/node_modules/object-inspect/readme.markdown +84 -0
  559. gaia/eval/webapp/node_modules/object-inspect/test/bigint.js +58 -0
  560. gaia/eval/webapp/node_modules/object-inspect/test/browser/dom.js +15 -0
  561. gaia/eval/webapp/node_modules/object-inspect/test/circular.js +16 -0
  562. gaia/eval/webapp/node_modules/object-inspect/test/deep.js +12 -0
  563. gaia/eval/webapp/node_modules/object-inspect/test/element.js +53 -0
  564. gaia/eval/webapp/node_modules/object-inspect/test/err.js +48 -0
  565. gaia/eval/webapp/node_modules/object-inspect/test/fakes.js +29 -0
  566. gaia/eval/webapp/node_modules/object-inspect/test/fn.js +76 -0
  567. gaia/eval/webapp/node_modules/object-inspect/test/global.js +17 -0
  568. gaia/eval/webapp/node_modules/object-inspect/test/has.js +15 -0
  569. gaia/eval/webapp/node_modules/object-inspect/test/holes.js +15 -0
  570. gaia/eval/webapp/node_modules/object-inspect/test/indent-option.js +271 -0
  571. gaia/eval/webapp/node_modules/object-inspect/test/inspect.js +139 -0
  572. gaia/eval/webapp/node_modules/object-inspect/test/lowbyte.js +12 -0
  573. gaia/eval/webapp/node_modules/object-inspect/test/number.js +58 -0
  574. gaia/eval/webapp/node_modules/object-inspect/test/quoteStyle.js +26 -0
  575. gaia/eval/webapp/node_modules/object-inspect/test/toStringTag.js +40 -0
  576. gaia/eval/webapp/node_modules/object-inspect/test/undef.js +12 -0
  577. gaia/eval/webapp/node_modules/object-inspect/test/values.js +261 -0
  578. gaia/eval/webapp/node_modules/object-inspect/test-core-js.js +26 -0
  579. gaia/eval/webapp/node_modules/object-inspect/util.inspect.js +1 -0
  580. gaia/eval/webapp/node_modules/on-finished/HISTORY.md +98 -0
  581. gaia/eval/webapp/node_modules/on-finished/LICENSE +23 -0
  582. gaia/eval/webapp/node_modules/on-finished/README.md +162 -0
  583. gaia/eval/webapp/node_modules/on-finished/index.js +234 -0
  584. gaia/eval/webapp/node_modules/on-finished/package.json +39 -0
  585. gaia/eval/webapp/node_modules/parseurl/HISTORY.md +58 -0
  586. gaia/eval/webapp/node_modules/parseurl/LICENSE +24 -0
  587. gaia/eval/webapp/node_modules/parseurl/README.md +133 -0
  588. gaia/eval/webapp/node_modules/parseurl/index.js +158 -0
  589. gaia/eval/webapp/node_modules/parseurl/package.json +40 -0
  590. gaia/eval/webapp/node_modules/path/.npmignore +1 -0
  591. gaia/eval/webapp/node_modules/path/LICENSE +18 -0
  592. gaia/eval/webapp/node_modules/path/README.md +15 -0
  593. gaia/eval/webapp/node_modules/path/package.json +24 -0
  594. gaia/eval/webapp/node_modules/path/path.js +628 -0
  595. gaia/eval/webapp/node_modules/path-to-regexp/LICENSE +21 -0
  596. gaia/eval/webapp/node_modules/path-to-regexp/Readme.md +35 -0
  597. gaia/eval/webapp/node_modules/path-to-regexp/index.js +156 -0
  598. gaia/eval/webapp/node_modules/path-to-regexp/package.json +30 -0
  599. gaia/eval/webapp/node_modules/process/.eslintrc +21 -0
  600. gaia/eval/webapp/node_modules/process/LICENSE +22 -0
  601. gaia/eval/webapp/node_modules/process/README.md +26 -0
  602. gaia/eval/webapp/node_modules/process/browser.js +184 -0
  603. gaia/eval/webapp/node_modules/process/index.js +2 -0
  604. gaia/eval/webapp/node_modules/process/package.json +27 -0
  605. gaia/eval/webapp/node_modules/process/test.js +199 -0
  606. gaia/eval/webapp/node_modules/proxy-addr/HISTORY.md +161 -0
  607. gaia/eval/webapp/node_modules/proxy-addr/LICENSE +22 -0
  608. gaia/eval/webapp/node_modules/proxy-addr/README.md +139 -0
  609. gaia/eval/webapp/node_modules/proxy-addr/index.js +327 -0
  610. gaia/eval/webapp/node_modules/proxy-addr/package.json +47 -0
  611. gaia/eval/webapp/node_modules/qs/.editorconfig +46 -0
  612. gaia/eval/webapp/node_modules/qs/.eslintrc +38 -0
  613. gaia/eval/webapp/node_modules/qs/.github/FUNDING.yml +12 -0
  614. gaia/eval/webapp/node_modules/qs/.nycrc +13 -0
  615. gaia/eval/webapp/node_modules/qs/CHANGELOG.md +600 -0
  616. gaia/eval/webapp/node_modules/qs/LICENSE.md +29 -0
  617. gaia/eval/webapp/node_modules/qs/README.md +709 -0
  618. gaia/eval/webapp/node_modules/qs/dist/qs.js +90 -0
  619. gaia/eval/webapp/node_modules/qs/lib/formats.js +23 -0
  620. gaia/eval/webapp/node_modules/qs/lib/index.js +11 -0
  621. gaia/eval/webapp/node_modules/qs/lib/parse.js +296 -0
  622. gaia/eval/webapp/node_modules/qs/lib/stringify.js +351 -0
  623. gaia/eval/webapp/node_modules/qs/lib/utils.js +265 -0
  624. gaia/eval/webapp/node_modules/qs/package.json +91 -0
  625. gaia/eval/webapp/node_modules/qs/test/empty-keys-cases.js +267 -0
  626. gaia/eval/webapp/node_modules/qs/test/parse.js +1170 -0
  627. gaia/eval/webapp/node_modules/qs/test/stringify.js +1298 -0
  628. gaia/eval/webapp/node_modules/qs/test/utils.js +136 -0
  629. gaia/eval/webapp/node_modules/range-parser/HISTORY.md +56 -0
  630. gaia/eval/webapp/node_modules/range-parser/LICENSE +23 -0
  631. gaia/eval/webapp/node_modules/range-parser/README.md +84 -0
  632. gaia/eval/webapp/node_modules/range-parser/index.js +162 -0
  633. gaia/eval/webapp/node_modules/range-parser/package.json +44 -0
  634. gaia/eval/webapp/node_modules/raw-body/HISTORY.md +308 -0
  635. gaia/eval/webapp/node_modules/raw-body/LICENSE +22 -0
  636. gaia/eval/webapp/node_modules/raw-body/README.md +223 -0
  637. gaia/eval/webapp/node_modules/raw-body/SECURITY.md +24 -0
  638. gaia/eval/webapp/node_modules/raw-body/index.d.ts +87 -0
  639. gaia/eval/webapp/node_modules/raw-body/index.js +336 -0
  640. gaia/eval/webapp/node_modules/raw-body/package.json +49 -0
  641. gaia/eval/webapp/node_modules/safe-buffer/LICENSE +21 -0
  642. gaia/eval/webapp/node_modules/safe-buffer/README.md +584 -0
  643. gaia/eval/webapp/node_modules/safe-buffer/index.d.ts +187 -0
  644. gaia/eval/webapp/node_modules/safe-buffer/index.js +65 -0
  645. gaia/eval/webapp/node_modules/safe-buffer/package.json +51 -0
  646. gaia/eval/webapp/node_modules/safer-buffer/LICENSE +21 -0
  647. gaia/eval/webapp/node_modules/safer-buffer/Porting-Buffer.md +268 -0
  648. gaia/eval/webapp/node_modules/safer-buffer/Readme.md +156 -0
  649. gaia/eval/webapp/node_modules/safer-buffer/dangerous.js +58 -0
  650. gaia/eval/webapp/node_modules/safer-buffer/package.json +34 -0
  651. gaia/eval/webapp/node_modules/safer-buffer/safer.js +77 -0
  652. gaia/eval/webapp/node_modules/safer-buffer/tests.js +406 -0
  653. gaia/eval/webapp/node_modules/send/HISTORY.md +526 -0
  654. gaia/eval/webapp/node_modules/send/LICENSE +23 -0
  655. gaia/eval/webapp/node_modules/send/README.md +327 -0
  656. gaia/eval/webapp/node_modules/send/SECURITY.md +24 -0
  657. gaia/eval/webapp/node_modules/send/index.js +1142 -0
  658. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/HISTORY.md +14 -0
  659. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/LICENSE +22 -0
  660. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/README.md +128 -0
  661. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/index.js +60 -0
  662. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/package.json +40 -0
  663. gaia/eval/webapp/node_modules/send/node_modules/ms/index.js +162 -0
  664. gaia/eval/webapp/node_modules/send/node_modules/ms/license.md +21 -0
  665. gaia/eval/webapp/node_modules/send/node_modules/ms/package.json +38 -0
  666. gaia/eval/webapp/node_modules/send/node_modules/ms/readme.md +59 -0
  667. gaia/eval/webapp/node_modules/send/package.json +62 -0
  668. gaia/eval/webapp/node_modules/serve-static/HISTORY.md +487 -0
  669. gaia/eval/webapp/node_modules/serve-static/LICENSE +25 -0
  670. gaia/eval/webapp/node_modules/serve-static/README.md +257 -0
  671. gaia/eval/webapp/node_modules/serve-static/index.js +209 -0
  672. gaia/eval/webapp/node_modules/serve-static/package.json +42 -0
  673. gaia/eval/webapp/node_modules/setprototypeof/LICENSE +13 -0
  674. gaia/eval/webapp/node_modules/setprototypeof/README.md +31 -0
  675. gaia/eval/webapp/node_modules/setprototypeof/index.d.ts +2 -0
  676. gaia/eval/webapp/node_modules/setprototypeof/index.js +17 -0
  677. gaia/eval/webapp/node_modules/setprototypeof/package.json +38 -0
  678. gaia/eval/webapp/node_modules/setprototypeof/test/index.js +24 -0
  679. gaia/eval/webapp/node_modules/side-channel/.editorconfig +9 -0
  680. gaia/eval/webapp/node_modules/side-channel/.eslintrc +12 -0
  681. gaia/eval/webapp/node_modules/side-channel/.github/FUNDING.yml +12 -0
  682. gaia/eval/webapp/node_modules/side-channel/.nycrc +13 -0
  683. gaia/eval/webapp/node_modules/side-channel/CHANGELOG.md +110 -0
  684. gaia/eval/webapp/node_modules/side-channel/LICENSE +21 -0
  685. gaia/eval/webapp/node_modules/side-channel/README.md +61 -0
  686. gaia/eval/webapp/node_modules/side-channel/index.d.ts +14 -0
  687. gaia/eval/webapp/node_modules/side-channel/index.js +43 -0
  688. gaia/eval/webapp/node_modules/side-channel/package.json +85 -0
  689. gaia/eval/webapp/node_modules/side-channel/test/index.js +104 -0
  690. gaia/eval/webapp/node_modules/side-channel/tsconfig.json +9 -0
  691. gaia/eval/webapp/node_modules/side-channel-list/.editorconfig +9 -0
  692. gaia/eval/webapp/node_modules/side-channel-list/.eslintrc +11 -0
  693. gaia/eval/webapp/node_modules/side-channel-list/.github/FUNDING.yml +12 -0
  694. gaia/eval/webapp/node_modules/side-channel-list/.nycrc +13 -0
  695. gaia/eval/webapp/node_modules/side-channel-list/CHANGELOG.md +15 -0
  696. gaia/eval/webapp/node_modules/side-channel-list/LICENSE +21 -0
  697. gaia/eval/webapp/node_modules/side-channel-list/README.md +62 -0
  698. gaia/eval/webapp/node_modules/side-channel-list/index.d.ts +13 -0
  699. gaia/eval/webapp/node_modules/side-channel-list/index.js +113 -0
  700. gaia/eval/webapp/node_modules/side-channel-list/list.d.ts +14 -0
  701. gaia/eval/webapp/node_modules/side-channel-list/package.json +77 -0
  702. gaia/eval/webapp/node_modules/side-channel-list/test/index.js +104 -0
  703. gaia/eval/webapp/node_modules/side-channel-list/tsconfig.json +9 -0
  704. gaia/eval/webapp/node_modules/side-channel-map/.editorconfig +9 -0
  705. gaia/eval/webapp/node_modules/side-channel-map/.eslintrc +11 -0
  706. gaia/eval/webapp/node_modules/side-channel-map/.github/FUNDING.yml +12 -0
  707. gaia/eval/webapp/node_modules/side-channel-map/.nycrc +13 -0
  708. gaia/eval/webapp/node_modules/side-channel-map/CHANGELOG.md +22 -0
  709. gaia/eval/webapp/node_modules/side-channel-map/LICENSE +21 -0
  710. gaia/eval/webapp/node_modules/side-channel-map/README.md +62 -0
  711. gaia/eval/webapp/node_modules/side-channel-map/index.d.ts +15 -0
  712. gaia/eval/webapp/node_modules/side-channel-map/index.js +68 -0
  713. gaia/eval/webapp/node_modules/side-channel-map/package.json +80 -0
  714. gaia/eval/webapp/node_modules/side-channel-map/test/index.js +114 -0
  715. gaia/eval/webapp/node_modules/side-channel-map/tsconfig.json +9 -0
  716. gaia/eval/webapp/node_modules/side-channel-weakmap/.editorconfig +9 -0
  717. gaia/eval/webapp/node_modules/side-channel-weakmap/.eslintrc +12 -0
  718. gaia/eval/webapp/node_modules/side-channel-weakmap/.github/FUNDING.yml +12 -0
  719. gaia/eval/webapp/node_modules/side-channel-weakmap/.nycrc +13 -0
  720. gaia/eval/webapp/node_modules/side-channel-weakmap/CHANGELOG.md +28 -0
  721. gaia/eval/webapp/node_modules/side-channel-weakmap/LICENSE +21 -0
  722. gaia/eval/webapp/node_modules/side-channel-weakmap/README.md +62 -0
  723. gaia/eval/webapp/node_modules/side-channel-weakmap/index.d.ts +15 -0
  724. gaia/eval/webapp/node_modules/side-channel-weakmap/index.js +84 -0
  725. gaia/eval/webapp/node_modules/side-channel-weakmap/package.json +87 -0
  726. gaia/eval/webapp/node_modules/side-channel-weakmap/test/index.js +114 -0
  727. gaia/eval/webapp/node_modules/side-channel-weakmap/tsconfig.json +9 -0
  728. gaia/eval/webapp/node_modules/statuses/HISTORY.md +82 -0
  729. gaia/eval/webapp/node_modules/statuses/LICENSE +23 -0
  730. gaia/eval/webapp/node_modules/statuses/README.md +136 -0
  731. gaia/eval/webapp/node_modules/statuses/codes.json +65 -0
  732. gaia/eval/webapp/node_modules/statuses/index.js +146 -0
  733. gaia/eval/webapp/node_modules/statuses/package.json +49 -0
  734. gaia/eval/webapp/node_modules/toidentifier/HISTORY.md +9 -0
  735. gaia/eval/webapp/node_modules/toidentifier/LICENSE +21 -0
  736. gaia/eval/webapp/node_modules/toidentifier/README.md +61 -0
  737. gaia/eval/webapp/node_modules/toidentifier/index.js +32 -0
  738. gaia/eval/webapp/node_modules/toidentifier/package.json +38 -0
  739. gaia/eval/webapp/node_modules/type-is/HISTORY.md +259 -0
  740. gaia/eval/webapp/node_modules/type-is/LICENSE +23 -0
  741. gaia/eval/webapp/node_modules/type-is/README.md +170 -0
  742. gaia/eval/webapp/node_modules/type-is/index.js +266 -0
  743. gaia/eval/webapp/node_modules/type-is/package.json +45 -0
  744. gaia/eval/webapp/node_modules/unpipe/HISTORY.md +4 -0
  745. gaia/eval/webapp/node_modules/unpipe/LICENSE +22 -0
  746. gaia/eval/webapp/node_modules/unpipe/README.md +43 -0
  747. gaia/eval/webapp/node_modules/unpipe/index.js +69 -0
  748. gaia/eval/webapp/node_modules/unpipe/package.json +27 -0
  749. gaia/eval/webapp/node_modules/util/LICENSE +18 -0
  750. gaia/eval/webapp/node_modules/util/README.md +15 -0
  751. gaia/eval/webapp/node_modules/util/node_modules/inherits/LICENSE +16 -0
  752. gaia/eval/webapp/node_modules/util/node_modules/inherits/README.md +42 -0
  753. gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits.js +7 -0
  754. gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits_browser.js +23 -0
  755. gaia/eval/webapp/node_modules/util/node_modules/inherits/package.json +29 -0
  756. gaia/eval/webapp/node_modules/util/package.json +35 -0
  757. gaia/eval/webapp/node_modules/util/support/isBuffer.js +3 -0
  758. gaia/eval/webapp/node_modules/util/support/isBufferBrowser.js +6 -0
  759. gaia/eval/webapp/node_modules/util/util.js +586 -0
  760. gaia/eval/webapp/node_modules/utils-merge/.npmignore +9 -0
  761. gaia/eval/webapp/node_modules/utils-merge/LICENSE +20 -0
  762. gaia/eval/webapp/node_modules/utils-merge/README.md +34 -0
  763. gaia/eval/webapp/node_modules/utils-merge/index.js +23 -0
  764. gaia/eval/webapp/node_modules/utils-merge/package.json +40 -0
  765. gaia/eval/webapp/node_modules/vary/HISTORY.md +39 -0
  766. gaia/eval/webapp/node_modules/vary/LICENSE +22 -0
  767. gaia/eval/webapp/node_modules/vary/README.md +101 -0
  768. gaia/eval/webapp/node_modules/vary/index.js +149 -0
  769. gaia/eval/webapp/node_modules/vary/package.json +43 -0
  770. gaia/eval/webapp/package-lock.json +875 -0
  771. gaia/eval/webapp/package.json +21 -0
  772. gaia/eval/webapp/public/app.js +3403 -0
  773. gaia/eval/webapp/public/index.html +88 -0
  774. gaia/eval/webapp/public/styles.css +3661 -0
  775. gaia/eval/webapp/server.js +416 -0
  776. gaia/eval/webapp/test-setup.js +73 -0
  777. gaia/llm/__init__.py +2 -0
  778. gaia/llm/lemonade_client.py +3083 -0
  779. gaia/llm/lemonade_manager.py +269 -0
  780. gaia/llm/llm_client.py +729 -0
  781. gaia/llm/vlm_client.py +307 -0
  782. gaia/logger.py +189 -0
  783. gaia/mcp/agent_mcp_server.py +245 -0
  784. gaia/mcp/blender_mcp_client.py +138 -0
  785. gaia/mcp/blender_mcp_server.py +648 -0
  786. gaia/mcp/context7_cache.py +332 -0
  787. gaia/mcp/external_services.py +518 -0
  788. gaia/mcp/mcp_bridge.py +550 -0
  789. gaia/mcp/servers/__init__.py +6 -0
  790. gaia/mcp/servers/docker_mcp.py +83 -0
  791. gaia/rag/__init__.py +10 -0
  792. gaia/rag/app.py +293 -0
  793. gaia/rag/demo.py +304 -0
  794. gaia/rag/pdf_utils.py +235 -0
  795. gaia/rag/sdk.py +2194 -0
  796. gaia/security.py +163 -0
  797. gaia/talk/app.py +289 -0
  798. gaia/talk/sdk.py +538 -0
  799. gaia/util.py +46 -0
  800. gaia/version.py +100 -0
@@ -0,0 +1,1729 @@
1
+ # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ """
4
+ RAG Tools Mixin for Chat Agent.
5
+
6
+ Provides document retrieval, querying, and evaluation tools.
7
+ """
8
+
9
+ import logging
10
+ import os
11
+ import re
12
+ from pathlib import Path
13
+ from typing import Any, Dict
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def extract_page_from_chunk(chunk_text, chunk_index=-1, all_chunks=None):
19
+ """
20
+ Extract page number from chunk text or by looking at nearby chunks.
21
+
22
+ Args:
23
+ chunk_text: The chunk text to extract page from
24
+ chunk_index: Global index of this chunk (for looking backwards)
25
+ all_chunks: List of all chunks (for looking backwards)
26
+
27
+ Returns:
28
+ Page number as int, or None if not found
29
+ """
30
+ # Strategy 1: Try [Page X] format in this chunk
31
+ match = re.search(r"\[Page (\d+)\]", chunk_text)
32
+ if match:
33
+ return int(match.group(1))
34
+
35
+ # Strategy 2: Try (Page X) format
36
+ match = re.search(r"\(Page (\d+)\)", chunk_text)
37
+ if match:
38
+ return int(match.group(1))
39
+
40
+ # Strategy 3: Look backwards in previous chunks to find most recent page marker
41
+ if chunk_index >= 0 and all_chunks:
42
+ for prev_idx in range(chunk_index - 1, max(-1, chunk_index - 5), -1):
43
+ if prev_idx < len(all_chunks):
44
+ prev_chunk = all_chunks[prev_idx]
45
+ match = re.search(r"\[Page (\d+)\]", prev_chunk)
46
+ if match:
47
+ return int(match.group(1))
48
+
49
+ return None
50
+
51
+
52
+ class RAGToolsMixin:
53
+ """
54
+ Mixin providing RAG and document query tools.
55
+
56
+ Tools provided:
57
+ - query_documents: Semantic search across all indexed documents
58
+ - query_specific_file: Semantic search in one specific file
59
+ - search_indexed_chunks: Exact text search in RAG indexed chunks (in-memory)
60
+ - evaluate_retrieval: Evaluate if retrieved information is sufficient
61
+ - index_document: Add document to RAG index
62
+ - index_directory: Index all files in a directory
63
+ - list_indexed_documents: List currently indexed documents
64
+ - summarize_document: Generate document summaries
65
+ - rag_status: Get RAG system status
66
+
67
+ Note: File system search tools (search_file, search_directory, search_file_content)
68
+ are provided by FileSearchToolsMixin from gaia.agents.tools.file_tools
69
+ """
70
+
71
+ def register_rag_tools(self) -> None:
72
+ """Register RAG-related tools."""
73
+ from gaia.agents.base.tools import tool
74
+
75
+ @tool(
76
+ name="query_documents",
77
+ description="Query indexed documents using RAG to find relevant information. Returns document chunks that the agent should use to answer the user's question.",
78
+ parameters={
79
+ "query": {
80
+ "type": "str",
81
+ "description": "The question or query to search for in documents",
82
+ "required": True,
83
+ }
84
+ },
85
+ )
86
+ def query_documents(
87
+ query: str, debug: bool = False # pylint: disable=unused-argument
88
+ ) -> Dict[str, Any]:
89
+ """
90
+ Query indexed documents with improved search key generation.
91
+
92
+ Returns chunks for the agent to use in formulating an answer,
93
+ rather than generating the answer directly. This maintains proper
94
+ integration with the agent's conversation flow.
95
+ """
96
+ try:
97
+ # Check if RAG is initialized and has documents
98
+ if not self.rag or not self.rag.index or len(self.rag.chunks) == 0:
99
+ return {
100
+ "status": "no_documents",
101
+ "message": "No documents are indexed. Answer the user's question using your general knowledge.",
102
+ "instruction": (
103
+ "There are no documents indexed to search. "
104
+ "Please answer the user's question using your general knowledge instead. "
105
+ "Do NOT apologize or say you can't help - just answer naturally."
106
+ ),
107
+ }
108
+
109
+ # Generate multiple search keys for better retrieval
110
+ search_keys = self._generate_search_keys(query)
111
+ logger.info(f"Generated {len(search_keys)} search keys for query")
112
+
113
+ # Try each search key and aggregate results
114
+ all_chunks = []
115
+ all_scores = []
116
+
117
+ # Debug information collection
118
+ debug_info = (
119
+ {
120
+ "search_keys": search_keys,
121
+ "embedding_retrieval": [],
122
+ "keyword_retrieval": [],
123
+ "total_chunks_before_dedup": 0,
124
+ "total_chunks_after_dedup": 0,
125
+ }
126
+ if hasattr(self, "debug") and self.debug
127
+ else None
128
+ )
129
+
130
+ # First, use embedding-based retrieval
131
+ for search_key in search_keys:
132
+ try:
133
+ # Use RAG to retrieve chunks
134
+ # pylint: disable=protected-access
135
+ chunks, scores = self.rag._retrieve_chunks(search_key)
136
+ if chunks:
137
+ all_chunks.extend(chunks)
138
+ all_scores.extend(scores)
139
+
140
+ # Capture debug info with full chunk content and indices
141
+ if debug_info:
142
+ # Get global indices for these chunks
143
+ chunk_global_indices = []
144
+ for chunk in chunks[:5]:
145
+ try:
146
+ idx = self.rag.chunks.index(chunk)
147
+ chunk_global_indices.append(idx)
148
+ except ValueError:
149
+ chunk_global_indices.append(-1)
150
+
151
+ debug_info["embedding_retrieval"].append(
152
+ {
153
+ "search_key": search_key,
154
+ "chunks_found": len(chunks),
155
+ "chunk_indices": chunk_global_indices, # Which chunks
156
+ "scores": [
157
+ float(s) for s in scores[:5]
158
+ ], # Top 5 scores
159
+ "top_chunk_preview": (
160
+ chunks[0][:200] if chunks else None
161
+ ),
162
+ "all_chunks": (
163
+ [
164
+ {
165
+ "global_index": (
166
+ chunk_global_indices[i]
167
+ if i < len(chunk_global_indices)
168
+ else -1
169
+ ),
170
+ "content": chunk[
171
+ :500
172
+ ], # First 500 chars
173
+ "score": (
174
+ float(scores[i])
175
+ if i < len(scores)
176
+ else 0
177
+ ),
178
+ "full_length": len(chunk),
179
+ }
180
+ for i, chunk in enumerate(
181
+ chunks[:5]
182
+ ) # Top 5 chunks
183
+ ]
184
+ if chunks
185
+ else []
186
+ ),
187
+ }
188
+ )
189
+ logger.info(
190
+ f"[DEBUG] Embedding search '{search_key}': found {len(chunks)} chunks (indices: {chunk_global_indices})"
191
+ )
192
+ except Exception as e:
193
+ logger.warning(f"Search key '{search_key}' failed: {e}")
194
+ if debug_info:
195
+ debug_info["embedding_retrieval"].append(
196
+ {"search_key": search_key, "error": str(e)}
197
+ )
198
+ continue
199
+
200
+ # HYBRID SEARCH: Boost scores of chunks containing keywords
201
+ # Instead of creating new text snippets, we boost the scores of existing chunks
202
+ query_lower = query.lower()
203
+
204
+ # Identify important terms (not common words)
205
+
206
+ query_words = re.findall(r"\b[a-z]+\b", query_lower)
207
+ stop_words = {
208
+ "the",
209
+ "is",
210
+ "what",
211
+ "of",
212
+ "and",
213
+ "a",
214
+ "an",
215
+ "in",
216
+ "to",
217
+ "for",
218
+ }
219
+ important_terms = [
220
+ w for w in query_words if w not in stop_words and len(w) > 2
221
+ ]
222
+
223
+ keyword_boost_info = []
224
+
225
+ if important_terms:
226
+ # Check each indexed chunk for keyword matches
227
+ for chunk_idx, chunk_text in enumerate(self.rag.chunks):
228
+ chunk_lower = chunk_text.lower()
229
+
230
+ # Count matching terms in this chunk (whole word matching)
231
+ matching_terms = []
232
+ for term in important_terms:
233
+ # Use word boundary regex for whole-word matching
234
+ if re.search(r"\b" + re.escape(term) + r"\b", chunk_lower):
235
+ matching_terms.append(term)
236
+
237
+ if matching_terms:
238
+ # Calculate boost score based on match ratio
239
+ match_ratio = (
240
+ len(matching_terms) / len(important_terms)
241
+ if important_terms
242
+ else 0
243
+ )
244
+ boost_score = 0.6 + (0.2 * match_ratio) # Range: 0.6-0.8
245
+
246
+ # Add this chunk with boosted score if not already in all_chunks
247
+ if chunk_text not in all_chunks:
248
+ all_chunks.append(chunk_text)
249
+ all_scores.append(boost_score)
250
+
251
+ # Get source file for this chunk
252
+ source_file = self.rag.chunk_to_file.get(
253
+ chunk_idx, "Unknown"
254
+ )
255
+
256
+ keyword_boost_info.append(
257
+ {
258
+ "chunk_index": chunk_idx,
259
+ "source_file": (
260
+ Path(source_file).name
261
+ if source_file != "Unknown"
262
+ else "Unknown"
263
+ ),
264
+ "matching_terms": matching_terms,
265
+ "boost_score": boost_score,
266
+ "match_ratio": match_ratio,
267
+ }
268
+ )
269
+
270
+ # Limit boosted chunks
271
+ if len(keyword_boost_info) >= 5:
272
+ break
273
+
274
+ # Capture debug info for keyword boosting
275
+ if debug_info and keyword_boost_info:
276
+ debug_info["keyword_retrieval"].append(
277
+ {
278
+ "chunks_boosted": len(keyword_boost_info),
279
+ "boosted_chunks": keyword_boost_info,
280
+ }
281
+ )
282
+ logger.info(
283
+ f"[DEBUG] Keyword search: boosted {len(keyword_boost_info)} chunks"
284
+ )
285
+
286
+ # Update debug info before deduplication - track which chunks before dedup
287
+ if debug_info:
288
+ debug_info["total_chunks_before_dedup"] = len(all_chunks)
289
+ # Show which chunks were found before deduplication
290
+ all_chunk_indices = []
291
+ for chunk in all_chunks:
292
+ try:
293
+ idx = self.rag.chunks.index(chunk)
294
+ all_chunk_indices.append(idx)
295
+ except ValueError:
296
+ all_chunk_indices.append(
297
+ "keyword_context"
298
+ ) # Keyword match, not a full chunk
299
+ debug_info["chunks_before_dedup_indices"] = all_chunk_indices
300
+ debug_info["deduplication_note"] = (
301
+ "Removes chunks that appear in both embedding and keyword results, keeping the one with higher score"
302
+ )
303
+
304
+ if not all_chunks:
305
+ result = {
306
+ "status": "success",
307
+ "message": "No relevant information found in indexed documents.",
308
+ "chunks": [],
309
+ "num_chunks": 0,
310
+ "relevance_scores": [],
311
+ "instruction": "Inform the user that no relevant information was found in the documents for their query.",
312
+ }
313
+ if debug_info:
314
+ result["debug_info"] = debug_info
315
+ return result
316
+
317
+ # Remove duplicate chunks and keep best scores
318
+ # OPTIMIZED: Use hash-based deduplication instead of full text comparison
319
+ unique_chunks = {} # {chunk_hash: (chunk_text, score)}
320
+
321
+ for chunk, score in zip(all_chunks, all_scores):
322
+ # Use hash for O(1) lookup instead of O(N) string comparison
323
+ chunk_hash = hash(chunk)
324
+
325
+ if (
326
+ chunk_hash not in unique_chunks
327
+ or unique_chunks[chunk_hash][1] < score
328
+ ):
329
+ unique_chunks[chunk_hash] = (chunk, score)
330
+
331
+ # Update debug info after deduplication - track which chunks remain
332
+ if debug_info:
333
+ debug_info["total_chunks_after_dedup"] = len(unique_chunks)
334
+ debug_info["duplicates_removed"] = debug_info[
335
+ "total_chunks_before_dedup"
336
+ ] - len(unique_chunks)
337
+ # Show which chunks remain after deduplication
338
+ dedup_chunk_indices = []
339
+ for chunk_text, score in unique_chunks.values():
340
+ try:
341
+ idx = self.rag.chunks.index(chunk_text)
342
+ dedup_chunk_indices.append(idx)
343
+ except ValueError:
344
+ dedup_chunk_indices.append("keyword_context")
345
+ debug_info["chunks_after_dedup_indices"] = dedup_chunk_indices
346
+
347
+ # Sort by score and take top chunks
348
+ sorted_items = sorted(
349
+ unique_chunks.values(), key=lambda x: x[1], reverse=True
350
+ )
351
+
352
+ # Adaptive max_chunks: use more chunks for larger documents
353
+ # With 32K context, we can afford to retrieve more chunks for better coverage
354
+ total_chunks = len(self.rag.chunks)
355
+ if total_chunks > 200:
356
+ adaptive_max = min(
357
+ 25, self.max_chunks * 5
358
+ ) # Up to 25 chunks for very large docs (200+ pages)
359
+ elif total_chunks > 100:
360
+ adaptive_max = min(
361
+ 20, self.max_chunks * 4
362
+ ) # Up to 20 chunks for large docs (100+ pages)
363
+ elif total_chunks > 50:
364
+ adaptive_max = min(
365
+ 10, self.max_chunks * 2
366
+ ) # Up to 10 chunks for medium docs
367
+ else:
368
+ adaptive_max = self.max_chunks # Default (5) for small docs
369
+
370
+ top_chunks = [chunk for chunk, score in sorted_items[:adaptive_max]]
371
+ top_scores = [score for chunk, score in sorted_items[:adaptive_max]]
372
+
373
+ # Find the actual chunk indices from the RAG system
374
+ chunk_indices = []
375
+ for chunk in top_chunks:
376
+ # Find this chunk's index in the global chunks list
377
+ try:
378
+ idx = self.rag.chunks.index(chunk)
379
+ chunk_indices.append(idx)
380
+ except ValueError:
381
+ chunk_indices.append(-1) # Not found
382
+
383
+ # Format chunks with context markers for better readability
384
+ formatted_chunks = []
385
+ for i, chunk in enumerate(top_chunks):
386
+ formatted_chunks.append(
387
+ {
388
+ "chunk_id": i + 1, # Sequential for display
389
+ "page": extract_page_from_chunk(
390
+ chunk,
391
+ chunk_indices[i] if i < len(chunk_indices) else -1,
392
+ self.rag.chunks,
393
+ ), # PDF page (with lookback)
394
+ "content": chunk,
395
+ "relevance_score": float(top_scores[i]),
396
+ "_debug_chunk_index": (
397
+ chunk_indices[i] if i < len(chunk_indices) else -1
398
+ ), # Internal index (for debugging)
399
+ }
400
+ )
401
+
402
+ # Update debug info with final chunks
403
+ if debug_info:
404
+ debug_info["final_chunks_returned"] = len(top_chunks)
405
+ debug_info["score_distribution"] = {
406
+ "max": float(max(top_scores)) if top_scores else 0,
407
+ "min": float(min(top_scores)) if top_scores else 0,
408
+ "avg": (
409
+ float(sum(top_scores) / len(top_scores))
410
+ if top_scores
411
+ else 0
412
+ ),
413
+ }
414
+ # Add preview of returned chunks
415
+ debug_info["chunks_preview"] = [
416
+ {
417
+ "chunk_id": c["chunk_id"],
418
+ "score": c["relevance_score"],
419
+ "preview": (
420
+ c["content"][:100] + "..."
421
+ if len(c["content"]) > 100
422
+ else c["content"]
423
+ ),
424
+ }
425
+ for c in formatted_chunks[:3] # Show first 3 chunks
426
+ ]
427
+
428
+ # Return chunks for agent to use in answer generation
429
+ result = {
430
+ "status": "success",
431
+ "message": f"Found {len(top_chunks)} relevant document chunks",
432
+ "chunks": formatted_chunks,
433
+ "num_chunks": len(top_chunks),
434
+ "search_keys_used": search_keys,
435
+ "source_files": (
436
+ list(
437
+ set(
438
+ [
439
+ self.rag.chunk_to_file.get(i, "Unknown")
440
+ for i in range(len(self.rag.chunks))
441
+ ]
442
+ )
443
+ )
444
+ if hasattr(self.rag, "chunk_to_file")
445
+ else []
446
+ ),
447
+ "instruction": "Use the provided document chunks to answer the user's question.\n\nCRITICAL CITATION REQUIREMENT:\nYour answer MUST start with: 'According to [document name], page X:' where X is the page number from each chunk's 'page' field.\n\nExample: If chunk has 'page': 2, say 'According to document.pdf, page 2:'\nIf info from pages 2 and 5, say 'According to document.pdf, pages 2 and 5:'",
448
+ }
449
+
450
+ # Add debug info to result if debug mode is enabled
451
+ if debug_info:
452
+ result["debug_info"] = debug_info
453
+ logger.info(
454
+ f"[DEBUG] Query complete: {debug_info['final_chunks_returned']} chunks returned from {debug_info['total_chunks_before_dedup']} total ({debug_info['duplicates_removed']} duplicates removed)"
455
+ )
456
+
457
+ return result
458
+ except Exception as e:
459
+ logger.error(f"Error in query_documents: {e}")
460
+ # Graceful degradation - inform agent to use general knowledge
461
+ return {
462
+ "status": "fallback",
463
+ "message": "Document search is temporarily unavailable",
464
+ "error": str(e),
465
+ "instruction": (
466
+ "The document search system encountered an error. "
467
+ "Please answer the user's question using your general knowledge "
468
+ "and inform them that document search is unavailable."
469
+ ),
470
+ "fallback_response": (
471
+ "I apologize, but I'm currently unable to search the indexed documents. "
472
+ "Let me try to answer your question based on my general knowledge instead."
473
+ ),
474
+ }
475
+
476
+ @tool(
477
+ name="query_specific_file",
478
+ description="Query a SPECIFIC file by name for targeted, fast retrieval. Use when user mentions a specific file or needs information from one document.",
479
+ parameters={
480
+ "file_path": {
481
+ "type": "str",
482
+ "description": "Name or path of the specific file to query (e.g., 'document.pdf' or full path)",
483
+ "required": True,
484
+ },
485
+ "query": {
486
+ "type": "str",
487
+ "description": "Question to ask about this specific file",
488
+ "required": True,
489
+ },
490
+ },
491
+ )
492
+ def query_specific_file(file_path: str, query: str) -> Dict[str, Any]:
493
+ """
494
+ Query a specific file for fast, targeted retrieval.
495
+
496
+ This is faster than query_documents because it searches only one file.
497
+ """
498
+ try:
499
+ # Debug information collection
500
+ debug_info = (
501
+ {
502
+ "tool": "query_specific_file",
503
+ "file_path": file_path,
504
+ "query": query,
505
+ "search_keys": [],
506
+ "embedding_retrieval": [],
507
+ "keyword_retrieval": [],
508
+ "total_chunks_before_dedup": 0,
509
+ "total_chunks_after_dedup": 0,
510
+ }
511
+ if hasattr(self, "debug") and self.debug
512
+ else None
513
+ )
514
+
515
+ # Find the file in indexed files
516
+ matching_files = [
517
+ f for f in self.rag.indexed_files if file_path in str(f)
518
+ ]
519
+
520
+ if not matching_files:
521
+ return {
522
+ "status": "error",
523
+ "error": f"File '{file_path}' not found in indexed documents. Use search_files to find it first.",
524
+ }
525
+
526
+ # For now, use the first match
527
+ # TODO: Let user disambiguate if multiple matches
528
+ target_file = matching_files[0]
529
+
530
+ # Generate search keys for better retrieval
531
+ search_keys = self._generate_search_keys(query)
532
+
533
+ if debug_info:
534
+ debug_info["search_keys"] = search_keys
535
+ debug_info["target_file"] = str(target_file)
536
+ logger.info(
537
+ f"[DEBUG] query_specific_file: Searching '{Path(target_file).name}' with {len(search_keys)} search keys"
538
+ )
539
+
540
+ # Use per-file retrieval for efficient search
541
+ all_chunks = []
542
+ all_scores = []
543
+
544
+ # Add fields for hybrid search debug info
545
+ if debug_info:
546
+ debug_info["embedding_retrieval"] = []
547
+ debug_info["keyword_retrieval"] = []
548
+
549
+ # First, do embedding-based retrieval
550
+ for search_key in search_keys:
551
+ try:
552
+ # Use the new per-file retrieval method
553
+ # pylint: disable=protected-access
554
+ chunks, scores = self.rag._retrieve_chunks_from_file(
555
+ search_key, str(target_file)
556
+ )
557
+ if chunks:
558
+ all_chunks.extend(chunks)
559
+ all_scores.extend(scores)
560
+
561
+ # Capture debug info with full chunk content and indices
562
+ if debug_info:
563
+ # Get global indices for these chunks
564
+ chunk_global_indices = []
565
+ for chunk in chunks[:5]:
566
+ try:
567
+ idx = self.rag.chunks.index(chunk)
568
+ chunk_global_indices.append(idx)
569
+ except ValueError:
570
+ chunk_global_indices.append(-1)
571
+
572
+ debug_info["embedding_retrieval"].append(
573
+ {
574
+ "search_key": search_key,
575
+ "chunks_found": len(chunks),
576
+ "chunk_indices": chunk_global_indices, # Which chunks
577
+ "scores": [
578
+ float(s) for s in scores[:5]
579
+ ], # Top 5 scores
580
+ "top_chunk_preview": (
581
+ chunks[0][:100] if chunks else None
582
+ ),
583
+ "all_chunks": (
584
+ [
585
+ {
586
+ "global_index": (
587
+ chunk_global_indices[i]
588
+ if i < len(chunk_global_indices)
589
+ else -1
590
+ ),
591
+ "content": chunk[
592
+ :500
593
+ ], # First 500 chars
594
+ "score": (
595
+ float(scores[i])
596
+ if i < len(scores)
597
+ else 0
598
+ ),
599
+ "full_length": len(chunk),
600
+ }
601
+ for i, chunk in enumerate(
602
+ chunks[:5]
603
+ ) # Top 5 chunks
604
+ ]
605
+ if chunks
606
+ else []
607
+ ),
608
+ }
609
+ )
610
+ except Exception as e:
611
+ logger.warning(f"Search key '{search_key}' failed: {e}")
612
+ if debug_info:
613
+ debug_info["embedding_retrieval"].append(
614
+ {"search_key": search_key, "error": str(e)}
615
+ )
616
+
617
+ # HYBRID SEARCH: Boost scores of chunks containing keywords
618
+ # Instead of creating new text snippets, we boost the scores of existing chunks
619
+ if (
620
+ str(target_file) in self.rag.file_metadata
621
+ and "full_text" in self.rag.file_metadata[str(target_file)]
622
+ ):
623
+ query_lower = query.lower()
624
+
625
+ # Identify important terms (not common words)
626
+
627
+ query_words = re.findall(r"\b[a-z]+\b", query_lower)
628
+ # Filter out common words
629
+ stop_words = {
630
+ "the",
631
+ "is",
632
+ "what",
633
+ "of",
634
+ "and",
635
+ "a",
636
+ "an",
637
+ "in",
638
+ "to",
639
+ "for",
640
+ }
641
+ important_terms = [
642
+ w for w in query_words if w not in stop_words and len(w) > 2
643
+ ]
644
+
645
+ if important_terms:
646
+ file_keyword_info = []
647
+
648
+ # Check each chunk from this file for keyword matches
649
+ file_chunk_indices = self.rag.file_to_chunk_indices.get(
650
+ str(target_file), []
651
+ )
652
+
653
+ for chunk_idx in file_chunk_indices:
654
+ if chunk_idx < len(self.rag.chunks):
655
+ chunk_text = self.rag.chunks[chunk_idx].lower()
656
+
657
+ # Count matching terms in this chunk (whole word matching)
658
+ matching_terms = []
659
+ for term in important_terms:
660
+ # Use word boundary regex for whole-word matching
661
+ if re.search(
662
+ r"\b" + re.escape(term) + r"\b", chunk_text
663
+ ):
664
+ matching_terms.append(term)
665
+
666
+ if matching_terms:
667
+ # Calculate boost score based on match ratio
668
+ match_ratio = (
669
+ len(matching_terms) / len(important_terms)
670
+ if important_terms
671
+ else 0
672
+ )
673
+ boost_score = 0.6 + (
674
+ 0.2 * match_ratio
675
+ ) # Range: 0.6-0.8
676
+
677
+ # Add this chunk with boosted score if not already in all_chunks
678
+ chunk_content = self.rag.chunks[chunk_idx]
679
+ if chunk_content not in all_chunks:
680
+ all_chunks.append(chunk_content)
681
+ all_scores.append(boost_score)
682
+
683
+ file_keyword_info.append(
684
+ {
685
+ "chunk_index": chunk_idx,
686
+ "matching_terms": matching_terms,
687
+ "boost_score": boost_score,
688
+ "match_ratio": match_ratio,
689
+ }
690
+ )
691
+
692
+ # Limit boosted chunks
693
+ if len(file_keyword_info) >= 5:
694
+ break
695
+
696
+ # Capture debug info for keyword search
697
+ if debug_info and file_keyword_info:
698
+ debug_info["keyword_retrieval"].append(
699
+ {
700
+ "file": Path(target_file).name,
701
+ "chunks_boosted": len(file_keyword_info),
702
+ "boosted_chunks": file_keyword_info[
703
+ :5
704
+ ], # Show first 5 boosted chunks
705
+ }
706
+ )
707
+ logger.info(
708
+ f"[DEBUG] Keyword search in {Path(target_file).name}: boosted {len(file_keyword_info)} chunks"
709
+ )
710
+
711
+ # Update debug info before deduplication - track which chunks before dedup
712
+ if debug_info:
713
+ debug_info["total_chunks_before_dedup"] = len(all_chunks)
714
+ # Show which chunks were found before deduplication
715
+ all_chunk_indices = []
716
+ for chunk in all_chunks:
717
+ try:
718
+ idx = self.rag.chunks.index(chunk)
719
+ all_chunk_indices.append(idx)
720
+ except ValueError:
721
+ all_chunk_indices.append(
722
+ "keyword_context"
723
+ ) # Keyword match context, not a full indexed chunk
724
+ debug_info["chunks_before_dedup_indices"] = all_chunk_indices
725
+ debug_info["deduplication_note"] = (
726
+ "Removes duplicate chunks found by both embedding and keyword search, keeping the version with higher score"
727
+ )
728
+
729
+ if not all_chunks:
730
+ result = {
731
+ "status": "success",
732
+ "message": f"No relevant information found in {Path(target_file).name}",
733
+ "chunks": [],
734
+ "file": str(target_file),
735
+ }
736
+ if debug_info:
737
+ result["debug_info"] = debug_info
738
+ return result
739
+
740
+ # Remove duplicates and sort using hash-based deduplication
741
+ unique_chunks = {} # {chunk_hash: (chunk_text, score)}
742
+
743
+ for chunk, score in zip(all_chunks, all_scores):
744
+ chunk_hash = hash(chunk)
745
+ if (
746
+ chunk_hash not in unique_chunks
747
+ or unique_chunks[chunk_hash][1] < score
748
+ ):
749
+ unique_chunks[chunk_hash] = (chunk, score)
750
+
751
+ # Update debug info after deduplication - track which chunks remain
752
+ if debug_info:
753
+ debug_info["total_chunks_after_dedup"] = len(unique_chunks)
754
+ debug_info["duplicates_removed"] = debug_info[
755
+ "total_chunks_before_dedup"
756
+ ] - len(unique_chunks)
757
+ # Show which chunks remain after deduplication
758
+ dedup_chunk_indices = []
759
+ for chunk_text, score in unique_chunks.values():
760
+ try:
761
+ idx = self.rag.chunks.index(chunk_text)
762
+ dedup_chunk_indices.append(idx)
763
+ except ValueError:
764
+ dedup_chunk_indices.append("keyword_context")
765
+ debug_info["chunks_after_dedup_indices"] = dedup_chunk_indices
766
+
767
+ sorted_items = sorted(
768
+ unique_chunks.values(), key=lambda x: x[1], reverse=True
769
+ )
770
+
771
+ # Adaptive max_chunks: use more chunks for larger documents
772
+ # With 32K context, we can afford to retrieve more chunks for better coverage
773
+ total_chunks = len(self.rag.chunks)
774
+ if total_chunks > 200:
775
+ adaptive_max = min(
776
+ 25, self.max_chunks * 5
777
+ ) # Up to 25 chunks for very large docs (200+ pages)
778
+ elif total_chunks > 100:
779
+ adaptive_max = min(
780
+ 20, self.max_chunks * 4
781
+ ) # Up to 20 chunks for large docs (100+ pages)
782
+ elif total_chunks > 50:
783
+ adaptive_max = min(
784
+ 10, self.max_chunks * 2
785
+ ) # Up to 10 chunks for medium docs
786
+ else:
787
+ adaptive_max = self.max_chunks # Default (5) for small docs
788
+
789
+ top_chunks = [chunk for chunk, score in sorted_items[:adaptive_max]]
790
+ top_scores = [score for chunk, score in sorted_items[:adaptive_max]]
791
+
792
+ # Update debug info with final chunks
793
+ if debug_info:
794
+ debug_info["final_chunks_returned"] = len(top_chunks)
795
+ debug_info["score_distribution"] = {
796
+ "max": float(max(top_scores)) if top_scores else 0,
797
+ "min": float(min(top_scores)) if top_scores else 0,
798
+ "avg": (
799
+ float(sum(top_scores) / len(top_scores))
800
+ if top_scores
801
+ else 0
802
+ ),
803
+ }
804
+ logger.info(
805
+ f"[DEBUG] query_specific_file complete: {debug_info['final_chunks_returned']} chunks returned from {debug_info['total_chunks_before_dedup']} total"
806
+ )
807
+
808
+ # Find the actual chunk indices from the RAG system
809
+ chunk_indices = []
810
+ for chunk in top_chunks:
811
+ # Find this chunk's index in the global chunks list
812
+ try:
813
+ idx = self.rag.chunks.index(chunk)
814
+ chunk_indices.append(idx)
815
+ except ValueError:
816
+ chunk_indices.append(-1) # Not found
817
+
818
+ formatted_chunks = [
819
+ {
820
+ "chunk_id": i + 1, # Sequential for display
821
+ "page": extract_page_from_chunk(
822
+ chunk,
823
+ chunk_indices[i] if i < len(chunk_indices) else -1,
824
+ self.rag.chunks,
825
+ ), # PDF page (with lookback)
826
+ "content": chunk,
827
+ "relevance_score": float(score),
828
+ "_debug_chunk_index": (
829
+ chunk_indices[i] if i < len(chunk_indices) else -1
830
+ ), # Internal index (for debugging)
831
+ }
832
+ for i, (chunk, score) in enumerate(zip(top_chunks, top_scores))
833
+ ]
834
+
835
+ result = {
836
+ "status": "success",
837
+ "message": f"Found {len(top_chunks)} relevant chunks in {Path(target_file).name}",
838
+ "chunks": formatted_chunks,
839
+ "file": str(target_file),
840
+ "instruction": f"Use these chunks from {Path(target_file).name} to answer the question. Read through ALL {len(top_chunks)} chunks completely before answering.\n\nCRITICAL CITATION REQUIREMENT:\nYour answer MUST start with: 'According to {Path(target_file).name}, page X:' where X is the page number from the chunk's 'page' field.\n\nExample: If chunk has 'page': 2, say 'According to {Path(target_file).name}, page 2:'\nIf info from multiple pages, say 'According to {Path(target_file).name}, pages 2 and 5:'",
841
+ }
842
+
843
+ # Add debug info to result if debug mode is enabled
844
+ if debug_info:
845
+ result["debug_info"] = debug_info
846
+
847
+ return result
848
+
849
+ except Exception as e:
850
+ logger.error(f"Error in query_specific_file: {e}")
851
+ # Graceful degradation
852
+ return {
853
+ "status": "fallback",
854
+ "message": f"Unable to search in {file_path}",
855
+ "error": str(e),
856
+ "instruction": (
857
+ f"Could not search in the specific file '{file_path}'. "
858
+ "Inform the user about this issue and offer to help with general knowledge."
859
+ ),
860
+ "fallback_response": (
861
+ f"I encountered an error while trying to search in '{file_path}'. "
862
+ "The file might not be properly indexed or there was a technical issue. "
863
+ "Would you like me to try answering based on general knowledge instead?"
864
+ ),
865
+ }
866
+
867
+ @tool(
868
+ name="search_indexed_chunks",
869
+ description="Search for exact text patterns within RAG-indexed document chunks. Use for finding specific phrases in indexed documents.",
870
+ parameters={
871
+ "pattern": {
872
+ "type": "str",
873
+ "description": "Text pattern or keyword to search for",
874
+ "required": True,
875
+ },
876
+ },
877
+ )
878
+ def search_indexed_chunks(pattern: str) -> Dict[str, Any]:
879
+ """
880
+ Search for exact text patterns in RAG-indexed chunks.
881
+
882
+ Searches in-memory RAG chunks, not files on disk.
883
+ Faster than semantic RAG for exact matches.
884
+ """
885
+ try:
886
+ # Debug information collection
887
+ debug_info = (
888
+ {
889
+ "tool": "search_indexed_chunks",
890
+ "pattern": pattern,
891
+ "total_chunks_searched": 0,
892
+ "matches_found": 0,
893
+ "chunks_with_matches": [],
894
+ }
895
+ if hasattr(self, "debug") and self.debug
896
+ else None
897
+ )
898
+
899
+ if not self.rag.chunks:
900
+ return {"status": "error", "error": "No documents indexed."}
901
+
902
+ # Search through chunks for pattern
903
+ matching_chunks = []
904
+ pattern_lower = pattern.lower()
905
+
906
+ if debug_info:
907
+ debug_info["total_chunks_searched"] = len(self.rag.chunks)
908
+ logger.info(
909
+ f"[DEBUG] search_indexed_chunks: Searching for '{pattern}' in {len(self.rag.chunks)} chunks"
910
+ )
911
+
912
+ for i, chunk in enumerate(self.rag.chunks):
913
+ if pattern_lower in chunk.lower():
914
+ matching_chunks.append(chunk)
915
+
916
+ # Capture debug info for first few matches
917
+ if debug_info and len(debug_info["chunks_with_matches"]) < 5:
918
+ # Find the line containing the pattern
919
+ lines = chunk.split("\n")
920
+ matching_lines = [
921
+ line for line in lines if pattern_lower in line.lower()
922
+ ]
923
+ debug_info["chunks_with_matches"].append(
924
+ {
925
+ "chunk_index": i,
926
+ "chunk_preview": (
927
+ chunk[:100] + "..."
928
+ if len(chunk) > 100
929
+ else chunk
930
+ ),
931
+ "matching_lines": matching_lines[
932
+ :2
933
+ ], # First 2 matching lines
934
+ }
935
+ )
936
+
937
+ if debug_info:
938
+ debug_info["matches_found"] = len(matching_chunks)
939
+ logger.info(
940
+ f"[DEBUG] search_indexed_chunks complete: Found {len(matching_chunks)} matches"
941
+ )
942
+
943
+ if not matching_chunks:
944
+ result = {
945
+ "status": "success",
946
+ "message": f"Pattern '{pattern}' not found in indexed documents",
947
+ "matches": [],
948
+ "count": 0,
949
+ }
950
+ if debug_info:
951
+ result["debug_info"] = debug_info
952
+ return result
953
+
954
+ # Limit results
955
+ limited_matches = matching_chunks[:10]
956
+
957
+ result = {
958
+ "status": "success",
959
+ "message": f"Found {len(matching_chunks)} matches for '{pattern}'",
960
+ "matches": limited_matches,
961
+ "count": len(matching_chunks),
962
+ "showing": len(limited_matches),
963
+ "instruction": "Use these exact matches to answer the user's question.",
964
+ }
965
+
966
+ # Add debug info to result if debug mode is enabled
967
+ if debug_info:
968
+ result["debug_info"] = debug_info
969
+
970
+ return result
971
+
972
+ except Exception as e:
973
+ logger.error(f"Error in search_indexed_chunks: {e}")
974
+ # Consistent error handling with graceful degradation
975
+ return {
976
+ "status": "error",
977
+ "error": str(e),
978
+ "has_errors": True,
979
+ "operation": "search_indexed_chunks",
980
+ "hint": "The text search failed. Try using query_documents for semantic search instead.",
981
+ }
982
+
983
+ # NOTE: search_file_content (disk-based grep) and write_file are now
984
+ # provided by FileSearchToolsMixin from gaia.agents.tools.file_tools
985
+
986
+ @tool(
987
+ name="evaluate_retrieval",
988
+ description="Evaluate if retrieved information is sufficient to answer the question. Use before providing final answer.",
989
+ parameters={
990
+ "question": {
991
+ "type": "str",
992
+ "description": "The original question",
993
+ "required": True,
994
+ },
995
+ "retrieved_info": {
996
+ "type": "str",
997
+ "description": "Summary of information retrieved so far",
998
+ "required": True,
999
+ },
1000
+ },
1001
+ )
1002
+ def evaluate_retrieval(question: str, retrieved_info: str) -> Dict[str, Any]:
1003
+ """
1004
+ Evaluate if retrieved information sufficiently answers the question.
1005
+
1006
+ Returns recommendation for next steps.
1007
+ """
1008
+ try:
1009
+ # Simple heuristic evaluation
1010
+ # In production, this could use LLM or more sophisticated metrics
1011
+
1012
+ info_length = len(retrieved_info.strip())
1013
+ has_content = info_length > 50
1014
+
1015
+ # Check if question keywords appear in retrieved info
1016
+ question_words = set(question.lower().split())
1017
+ info_words = set(retrieved_info.lower().split())
1018
+ keyword_overlap = len(question_words & info_words) / max(
1019
+ len(question_words), 1
1020
+ )
1021
+
1022
+ is_sufficient = has_content and keyword_overlap > 0.3
1023
+
1024
+ if is_sufficient:
1025
+ return {
1026
+ "status": "success",
1027
+ "sufficient": True,
1028
+ "confidence": "high" if keyword_overlap > 0.5 else "medium",
1029
+ "recommendation": "Provide answer based on retrieved information",
1030
+ "keyword_overlap": round(keyword_overlap, 2),
1031
+ }
1032
+ else:
1033
+ return {
1034
+ "status": "success",
1035
+ "sufficient": False,
1036
+ "confidence": "low",
1037
+ "recommendation": "Try query_specific_file for targeted search or search_file_content for exact matches",
1038
+ "keyword_overlap": round(keyword_overlap, 2),
1039
+ "issues": [
1040
+ "Low information content" if not has_content else None,
1041
+ "Low keyword overlap" if keyword_overlap < 0.3 else None,
1042
+ ],
1043
+ }
1044
+
1045
+ except Exception as e:
1046
+ logger.error(f"Error in evaluate_retrieval: {e}")
1047
+ return {
1048
+ "status": "error",
1049
+ "error": str(e),
1050
+ "has_errors": True,
1051
+ "operation": "evaluate_retrieval",
1052
+ "hint": "Failed to evaluate retrieval quality. You can proceed with answering based on retrieved chunks.",
1053
+ }
1054
+
1055
+ @tool(
1056
+ name="index_document",
1057
+ description="Add a document to the RAG index",
1058
+ parameters={
1059
+ "file_path": {
1060
+ "type": "str",
1061
+ "description": "Path to the document (PDF) to index",
1062
+ "required": True,
1063
+ }
1064
+ },
1065
+ )
1066
+ def index_document(file_path: str) -> Dict[str, Any]:
1067
+ """Index a document with path validation and detailed statistics."""
1068
+ try:
1069
+
1070
+ if not os.path.exists(file_path):
1071
+ return {"status": "error", "error": f"File not found: {file_path}"}
1072
+
1073
+ # Validate path with user confirmation
1074
+ if not self.session_manager.validate_path(file_path, operation="index"):
1075
+ return {"status": "error", "error": f"Access denied: {file_path}"}
1076
+
1077
+ # Index the document (now returns dict with stats)
1078
+ result = self.rag.index_document(file_path)
1079
+
1080
+ if result.get("success"):
1081
+ self.indexed_files.add(file_path)
1082
+
1083
+ # Add to current session
1084
+ if self.current_session:
1085
+ if file_path not in self.current_session.indexed_documents:
1086
+ self.current_session.indexed_documents.append(file_path)
1087
+ self.session_manager.save_session(self.current_session)
1088
+
1089
+ # Update system prompt to include the new document
1090
+ if hasattr(self, "_update_system_prompt"):
1091
+ self._update_system_prompt()
1092
+
1093
+ # Return detailed stats from RAG SDK
1094
+ return {
1095
+ "status": "success",
1096
+ "message": f"Successfully indexed: {result.get('file_name', file_path)}",
1097
+ "file_name": result.get("file_name"),
1098
+ "file_type": result.get("file_type"),
1099
+ "file_size_mb": result.get("file_size_mb"),
1100
+ "num_pages": result.get("num_pages"),
1101
+ "num_chunks": result.get("num_chunks"),
1102
+ "total_indexed_files": result.get("total_indexed_files"),
1103
+ "total_chunks": result.get("total_chunks"),
1104
+ "from_cache": result.get("from_cache", False),
1105
+ "already_indexed": result.get("already_indexed", False),
1106
+ "reindexed": result.get("reindexed", False),
1107
+ }
1108
+ else:
1109
+ return {
1110
+ "status": "error",
1111
+ "error": result.get("error", f"Failed to index: {file_path}"),
1112
+ "file_name": result.get("file_name", Path(file_path).name),
1113
+ }
1114
+ except Exception as e:
1115
+ logger.error(f"Error indexing document: {e}")
1116
+ return {
1117
+ "status": "error",
1118
+ "error": str(e),
1119
+ "has_errors": True,
1120
+ "operation": "index_document",
1121
+ "file": file_path,
1122
+ "hint": "Failed to index document. Check if file exists and is readable.",
1123
+ }
1124
+
1125
+ @tool(
1126
+ name="list_indexed_documents",
1127
+ description="List all currently indexed documents",
1128
+ parameters={},
1129
+ )
1130
+ def list_indexed_documents() -> Dict[str, Any]:
1131
+ """List indexed documents."""
1132
+ try:
1133
+ docs = list(self.rag.indexed_files)
1134
+ return {
1135
+ "status": "success",
1136
+ "documents": [str(Path(d).name) for d in docs],
1137
+ "count": len(docs),
1138
+ "total_chunks": len(self.rag.chunks),
1139
+ }
1140
+ except Exception as e:
1141
+ logger.error(f"Error in list_indexed_documents: {e}")
1142
+ return {
1143
+ "status": "error",
1144
+ "error": str(e),
1145
+ "has_errors": True,
1146
+ "operation": "list_indexed_documents",
1147
+ }
1148
+
1149
+ @tool(
1150
+ name="rag_status",
1151
+ description="Get the status of the RAG system",
1152
+ parameters={},
1153
+ )
1154
+ def rag_status() -> Dict[str, Any]:
1155
+ """Get RAG system status."""
1156
+ try:
1157
+ status = self.rag.get_status()
1158
+ return {
1159
+ "status": "success",
1160
+ **status,
1161
+ "watched_directories": self.watch_directories,
1162
+ }
1163
+ except Exception as e:
1164
+ logger.error(f"Error in rag_status: {e}")
1165
+ return {
1166
+ "status": "error",
1167
+ "error": str(e),
1168
+ "has_errors": True,
1169
+ "operation": "rag_status",
1170
+ }
1171
+
1172
+ @tool(
1173
+ name="summarize_document",
1174
+ description="Generate a comprehensive summary of a large indexed document by iterating through its content in sections. Best for getting an overview of lengthy documents.",
1175
+ parameters={
1176
+ "file_path": {
1177
+ "type": "str",
1178
+ "description": "Name or path of the document to summarize",
1179
+ "required": True,
1180
+ },
1181
+ "summary_type": {
1182
+ "type": "str",
1183
+ "description": "Type of summary: 'brief' (2-3 paragraphs), 'detailed' (comprehensive with all key points), 'bullets' (key points as bullets) - default: 'detailed'",
1184
+ "required": False,
1185
+ },
1186
+ "max_words_per_section": {
1187
+ "type": "int",
1188
+ "description": "Maximum words to process per section (default: 20000). Larger documents will be split into multiple sections and summarized iteratively.",
1189
+ "required": False,
1190
+ },
1191
+ },
1192
+ )
1193
+ def summarize_document(
1194
+ file_path: str,
1195
+ summary_type: str = "detailed",
1196
+ max_words_per_section: int = 20000,
1197
+ ) -> Dict[str, Any]:
1198
+ """
1199
+ Summarize a large document by iterating through its content.
1200
+
1201
+ For large documents, this will:
1202
+ 1. Get the full text from cache (already extracted with VLM)
1203
+ 2. Split into manageable sections based on word count
1204
+ 3. Summarize each section with the LLM
1205
+ 4. Combine section summaries into a final comprehensive summary
1206
+ """
1207
+ try:
1208
+
1209
+ # Find the file in indexed files
1210
+ matching_files = [
1211
+ f for f in self.rag.indexed_files if file_path in str(f)
1212
+ ]
1213
+
1214
+ if not matching_files:
1215
+ return {
1216
+ "status": "error",
1217
+ "error": f"Document '{file_path}' not found in indexed documents. Use index_document first.",
1218
+ }
1219
+
1220
+ target_file = matching_files[0]
1221
+
1222
+ # Validate summary type
1223
+ valid_types = ["brief", "detailed", "bullets"]
1224
+ if summary_type not in valid_types:
1225
+ return {
1226
+ "status": "error",
1227
+ "error": f"Invalid summary_type '{summary_type}'. Valid types: {', '.join(valid_types)}",
1228
+ }
1229
+
1230
+ # Get type-specific instruction
1231
+ type_instructions = {
1232
+ "brief": "Create a concise 2-3 paragraph summary highlighting the most important points and main themes.",
1233
+ "detailed": "Create a comprehensive summary covering all major topics, key points, and important details. Organize by sections if applicable.",
1234
+ "bullets": "Create a bullet-point summary of the key points, organizing related items together. Use sub-bullets for details.",
1235
+ }
1236
+
1237
+ summary_instruction = type_instructions[summary_type]
1238
+
1239
+ # Get all chunks from the RAG index
1240
+ # Since we can't directly filter chunks by document, we'll use a workaround:
1241
+ # Extract text from the original PDF and chunk it
1242
+
1243
+ logger.info(f"Summarizing document: {target_file}")
1244
+
1245
+ # Use cached extracted text if available, otherwise extract
1246
+ try:
1247
+ # Check if we have cached metadata with full_text
1248
+ if (
1249
+ target_file in self.rag.file_metadata
1250
+ and "full_text" in self.rag.file_metadata[target_file]
1251
+ ):
1252
+ # Use cached text - no need to re-run VLM or extraction!
1253
+ full_text = self.rag.file_metadata[target_file]["full_text"]
1254
+ logger.debug(
1255
+ f"Using cached extracted text for {Path(target_file).name}"
1256
+ )
1257
+ else:
1258
+ # Fallback: Extract text using RAG SDK's file extraction
1259
+ logger.warning(
1260
+ f"No cached text found for {Path(target_file).name}, extracting..."
1261
+ )
1262
+ # pylint: disable=protected-access
1263
+ full_text, _ = self.rag._extract_text_from_file(target_file)
1264
+
1265
+ if not full_text or not full_text.strip():
1266
+ return {
1267
+ "status": "error",
1268
+ "error": f"No text could be extracted from {Path(target_file).name}",
1269
+ }
1270
+
1271
+ except Exception as e:
1272
+ return {
1273
+ "status": "error",
1274
+ "error": f"Failed to extract text from document: {e}",
1275
+ }
1276
+
1277
+ # Split text into sections based on page boundaries
1278
+ # This is the simplest and most reliable semantic boundary
1279
+
1280
+ # Split by page markers while keeping the markers
1281
+ page_sections = re.split(r"(\[Page \d+\])", full_text)
1282
+
1283
+ # Recombine into complete pages
1284
+ pages = []
1285
+ current_page = ""
1286
+
1287
+ for part in page_sections:
1288
+ if re.match(r"\[Page \d+\]", part):
1289
+ # This is a page marker
1290
+ if current_page.strip():
1291
+ pages.append(current_page.strip())
1292
+ current_page = part + "\n"
1293
+ else:
1294
+ current_page += part
1295
+
1296
+ # Add last page
1297
+ if current_page.strip():
1298
+ pages.append(current_page.strip())
1299
+
1300
+ # Group pages into sections that fit within max_words_per_section
1301
+ # Include overlap: last page of previous section is included in next section
1302
+ sections = []
1303
+ current_section_pages = []
1304
+ current_word_count = 0
1305
+ overlap_pages = 1 # Number of pages to overlap between sections
1306
+
1307
+ for _page_idx, page in enumerate(pages):
1308
+ page_words = len(page.split())
1309
+
1310
+ if (
1311
+ current_word_count + page_words > max_words_per_section
1312
+ and current_section_pages
1313
+ ):
1314
+ # Would exceed limit, save current section and start new with overlap
1315
+ sections.append("\n\n".join(current_section_pages))
1316
+
1317
+ # Start new section with overlap (include last N pages from previous section)
1318
+ overlap_start = max(
1319
+ 0, len(current_section_pages) - overlap_pages
1320
+ )
1321
+ current_section_pages = current_section_pages[overlap_start:]
1322
+ current_word_count = sum(
1323
+ len(p.split()) for p in current_section_pages
1324
+ )
1325
+
1326
+ # Add page to current section
1327
+ current_section_pages.append(page)
1328
+ current_word_count += page_words
1329
+
1330
+ # Add last section
1331
+ if current_section_pages:
1332
+ sections.append("\n\n".join(current_section_pages))
1333
+
1334
+ total_words = len(full_text.split())
1335
+ logger.info(
1336
+ f"Document has {total_words} words, {len(pages)} pages, grouped into {len(sections)} sections"
1337
+ )
1338
+
1339
+ # Get document metadata for enhanced summary
1340
+ file_metadata = self.rag.file_metadata.get(target_file, {})
1341
+ num_pages = file_metadata.get("num_pages", len(pages))
1342
+ _vlm_pages = file_metadata.get("vlm_pages", 0)
1343
+
1344
+ # If document is small enough (single section), summarize in one pass
1345
+ if len(sections) <= 1:
1346
+ prompt = f"""{summary_instruction}
1347
+
1348
+ Document to summarize: {Path(target_file).name}
1349
+
1350
+ Document content:
1351
+ {full_text}
1352
+
1353
+ Generate a well-structured summary with the following format:
1354
+
1355
+ # Document Summary: {Path(target_file).name}
1356
+
1357
+ ## Document Information
1358
+ - **File**: {Path(target_file).name}
1359
+ - **Pages**: {num_pages}
1360
+ - **Total Words**: ~{total_words:,}
1361
+
1362
+ ## Overview
1363
+ [2-3 sentence overview of what this document is]
1364
+
1365
+ ## Key Content
1366
+ [Main content organized by topics/sections - reference page numbers where applicable]
1367
+
1368
+ ## Key Takeaways
1369
+ [Bullet points of the most important points]
1370
+
1371
+ Use the {summary_type} style for the content sections."""
1372
+
1373
+ # Use chat SDK to generate summary
1374
+ try:
1375
+ # Use RAG's chat SDK for summary generation
1376
+ response = self.rag.chat.send(prompt)
1377
+ summary_text = response.text
1378
+
1379
+ return {
1380
+ "status": "success",
1381
+ "summary": summary_text,
1382
+ "summary_type": summary_type,
1383
+ "document": str(Path(target_file).name),
1384
+ "total_words": total_words,
1385
+ "sections_processed": 1,
1386
+ "instruction": "Present the summary to the user. The summary includes document metadata, structured sections, and page references.",
1387
+ }
1388
+ except Exception as e:
1389
+ logger.error(f"Error generating summary: {e}")
1390
+ return {
1391
+ "status": "error",
1392
+ "error": f"Failed to generate summary: {e}",
1393
+ }
1394
+
1395
+ # For long documents, iterate over sections (preserving semantic boundaries)
1396
+ section_summaries = []
1397
+ num_sections = len(sections)
1398
+
1399
+ logger.info(f"Processing {num_sections} sections for summarization")
1400
+
1401
+ for section_num, section_text in enumerate(sections, 1):
1402
+ logger.info(
1403
+ f"Summarizing section {section_num}/{num_sections} ({len(section_text.split())} words)"
1404
+ )
1405
+
1406
+ # Generate summary for this section
1407
+ section_prompt = f"""This is section {section_num} of {num_sections} from the document.
1408
+ {summary_instruction}
1409
+
1410
+ Section content:
1411
+ {section_text}
1412
+
1413
+ Generate a summary of this section:"""
1414
+
1415
+ try:
1416
+ # Use RAG's chat SDK for section summary
1417
+ response = self.rag.chat.send(section_prompt)
1418
+ segment_summary = response.text
1419
+
1420
+ section_summaries.append(
1421
+ {"section": section_num, "summary": segment_summary}
1422
+ )
1423
+ except Exception as e:
1424
+ logger.warning(
1425
+ f"Failed to summarize segment {section_num}: {e}"
1426
+ )
1427
+ continue
1428
+
1429
+ # Combine section summaries into final summary
1430
+ if not section_summaries:
1431
+ return {
1432
+ "status": "error",
1433
+ "error": "Failed to generate any section summaries",
1434
+ }
1435
+
1436
+ # Final synthesis prompt with structured format
1437
+ combined_text = "\n\n".join(
1438
+ [
1439
+ f"Section {s['section']} Summary:\n{s['summary']}"
1440
+ for s in section_summaries
1441
+ ]
1442
+ )
1443
+
1444
+ final_prompt = f"""You have summaries of {len(section_summaries)} sections from the document: {Path(target_file).name}
1445
+
1446
+ Section summaries:
1447
+ {combined_text}
1448
+
1449
+ Synthesize these into a single, well-structured summary using this format:
1450
+
1451
+ # Document Summary: {Path(target_file).name}
1452
+
1453
+ ## Document Information
1454
+ - **File**: {Path(target_file).name}
1455
+ - **Pages**: {num_pages}
1456
+ - **Total Words**: ~{total_words:,}
1457
+ - **Sections Processed**: {len(section_summaries)}
1458
+
1459
+ ## Overview
1460
+ [2-3 sentence overview synthesizing all sections]
1461
+
1462
+ ## Key Content
1463
+ [Main content organized by topics - consolidate from all section summaries, reference page numbers]
1464
+
1465
+ ## Key Takeaways
1466
+ [Bullet points of the most important points from across all sections]
1467
+
1468
+ Use the {summary_type} style. Ensure page references from section summaries are preserved."""
1469
+
1470
+ try:
1471
+ # Use RAG's chat SDK for final summary synthesis
1472
+ response = self.rag.chat.send(final_prompt)
1473
+ final_summary = response.text
1474
+
1475
+ return {
1476
+ "status": "success",
1477
+ "summary": final_summary,
1478
+ "summary_type": summary_type,
1479
+ "document": str(Path(target_file).name),
1480
+ "total_words": total_words,
1481
+ "sections_processed": len(section_summaries),
1482
+ "section_summaries": section_summaries,
1483
+ "instruction": "Present the formatted summary to the user. The summary includes document metadata, organized sections with page references, and key takeaways.",
1484
+ }
1485
+ except Exception as e:
1486
+ logger.error(f"Error synthesizing final summary: {e}")
1487
+ # Return segment summaries as fallback
1488
+ return {
1489
+ "status": "partial",
1490
+ "message": "Could not synthesize final summary, returning segment summaries",
1491
+ "summary_style": summary_type,
1492
+ "document": str(Path(target_file).name),
1493
+ "total_words": total_words,
1494
+ "iterations": len(section_summaries),
1495
+ "segment_summaries": section_summaries,
1496
+ }
1497
+
1498
+ except Exception as e:
1499
+ logger.error(f"Error in summarize_document: {e}")
1500
+ return {
1501
+ "status": "error",
1502
+ "error": str(e),
1503
+ "has_errors": True,
1504
+ "operation": "summarize_document",
1505
+ "file": target_file,
1506
+ "hint": "Failed to generate summary. Try using query_documents to get specific information instead.",
1507
+ }
1508
+
1509
+ # NOTE: search_file and search_directory tools are now provided by
1510
+ # FileSearchToolsMixin from gaia.agents.tools.file_tools
1511
+ # This provides shared file search functionality across all agents
1512
+
1513
+ @tool(
1514
+ name="dump_document",
1515
+ description="Export the cached extracted text from an indexed document to a markdown file. Useful for reviewing extracted content or debugging.",
1516
+ parameters={
1517
+ "file_name": {
1518
+ "type": "str",
1519
+ "description": "Name or path of the indexed document to dump",
1520
+ "required": True,
1521
+ },
1522
+ "output_path": {
1523
+ "type": "str",
1524
+ "description": "Output path for the markdown file (optional, defaults to .gaia/{filename}.md)",
1525
+ "required": False,
1526
+ },
1527
+ },
1528
+ )
1529
+ def dump_document(file_name: str, output_path: str = None) -> Dict[str, Any]:
1530
+ """
1531
+ Export cached extracted text from an indexed document.
1532
+
1533
+ This uses the cached full_text from file_metadata, avoiding re-extraction.
1534
+ """
1535
+ try:
1536
+
1537
+ # Find the file in indexed files
1538
+ matching_files = [
1539
+ f for f in self.rag.indexed_files if file_name in str(f)
1540
+ ]
1541
+
1542
+ if not matching_files:
1543
+ return {
1544
+ "status": "error",
1545
+ "error": f"Document '{file_name}' not found in indexed documents.",
1546
+ "hint": "Use list_indexed_documents to see available documents.",
1547
+ }
1548
+
1549
+ target_file = matching_files[0]
1550
+
1551
+ # Get cached text from metadata
1552
+ if target_file not in self.rag.file_metadata:
1553
+ return {
1554
+ "status": "error",
1555
+ "error": f"No cached metadata found for {Path(target_file).name}",
1556
+ "hint": "Document may need to be re-indexed.",
1557
+ }
1558
+
1559
+ metadata = self.rag.file_metadata[target_file]
1560
+ full_text = metadata.get("full_text", "")
1561
+
1562
+ if not full_text:
1563
+ return {
1564
+ "status": "error",
1565
+ "error": f"No extracted text found in cache for {Path(target_file).name}",
1566
+ }
1567
+
1568
+ # Determine output path
1569
+ if output_path is None:
1570
+ output_filename = Path(target_file).stem + "_extracted.md"
1571
+ output_path = os.path.join(
1572
+ self.rag.config.cache_dir, output_filename
1573
+ )
1574
+ else:
1575
+ output_path = str(Path(output_path).resolve())
1576
+
1577
+ # Write markdown file with metadata header
1578
+ markdown_content = f"""# Extracted Text from {Path(target_file).name}
1579
+
1580
+ **Source File:** {target_file}
1581
+ **Extraction Date:** {metadata.get('index_time', 'Unknown')}
1582
+ **Pages:** {metadata.get('num_pages', 'N/A')}
1583
+ **VLM Pages:** {metadata.get('vlm_pages', 0)}
1584
+ **Total Images:** {metadata.get('total_images', 0)}
1585
+
1586
+ ---
1587
+
1588
+ {full_text}
1589
+ """
1590
+
1591
+ # Ensure output directory exists
1592
+ os.makedirs(
1593
+ (
1594
+ os.path.dirname(output_path)
1595
+ if os.path.dirname(output_path)
1596
+ else "."
1597
+ ),
1598
+ exist_ok=True,
1599
+ )
1600
+
1601
+ with open(output_path, "w", encoding="utf-8") as f:
1602
+ f.write(markdown_content)
1603
+
1604
+ return {
1605
+ "status": "success",
1606
+ "output_path": output_path,
1607
+ "text_length": len(full_text),
1608
+ "num_pages": metadata.get("num_pages", "N/A"),
1609
+ "vlm_pages": metadata.get("vlm_pages", 0),
1610
+ "message": f"Exported extracted text to {output_path}",
1611
+ }
1612
+
1613
+ except Exception as e:
1614
+ logger.error(f"Error dumping document: {e}")
1615
+ return {
1616
+ "status": "error",
1617
+ "error": str(e),
1618
+ "has_errors": True,
1619
+ "operation": "dump_document",
1620
+ }
1621
+
1622
+ @tool(
1623
+ name="index_directory",
1624
+ description="Index all supported files in a directory. Supports PDF, TXT, CSV, JSON, and code files.",
1625
+ parameters={
1626
+ "directory_path": {
1627
+ "type": "str",
1628
+ "description": "Path to directory to index",
1629
+ "required": True,
1630
+ },
1631
+ "recursive": {
1632
+ "type": "bool",
1633
+ "description": "Whether to recursively index subdirectories (default: False)",
1634
+ "required": False,
1635
+ },
1636
+ },
1637
+ )
1638
+ def index_directory(
1639
+ directory_path: str, recursive: bool = False
1640
+ ) -> Dict[str, Any]:
1641
+ """
1642
+ Index all supported files in a directory.
1643
+
1644
+ Returns statistics about indexed files.
1645
+ """
1646
+ try:
1647
+ dir_path = Path(directory_path).resolve()
1648
+
1649
+ if not dir_path.exists():
1650
+ return {
1651
+ "status": "error",
1652
+ "error": f"Directory does not exist: {directory_path}",
1653
+ "has_errors": True,
1654
+ }
1655
+
1656
+ if not dir_path.is_dir():
1657
+ return {
1658
+ "status": "error",
1659
+ "error": f"Path is not a directory: {directory_path}",
1660
+ "has_errors": True,
1661
+ }
1662
+
1663
+ logger.info(f"Indexing directory: {dir_path} (recursive={recursive})")
1664
+
1665
+ # Supported file extensions
1666
+ supported_extensions = {
1667
+ ".pdf",
1668
+ ".txt",
1669
+ ".csv",
1670
+ ".json",
1671
+ ".py",
1672
+ ".js",
1673
+ ".java",
1674
+ ".cpp",
1675
+ ".c",
1676
+ ".h",
1677
+ ".md",
1678
+ }
1679
+
1680
+ indexed_files = []
1681
+ failed_files = []
1682
+ skipped_files = []
1683
+
1684
+ # Get files to index
1685
+ if recursive:
1686
+ files_to_index = [f for f in dir_path.rglob("*") if f.is_file()]
1687
+ else:
1688
+ files_to_index = [f for f in dir_path.iterdir() if f.is_file()]
1689
+
1690
+ for file_path in files_to_index:
1691
+ if file_path.suffix.lower() in supported_extensions:
1692
+ try:
1693
+ # Use the RAG SDK to index the file
1694
+ success = self.rag.index_document(str(file_path))
1695
+ if success:
1696
+ indexed_files.append(str(file_path))
1697
+ logger.info(f"Indexed: {file_path.name}")
1698
+ else:
1699
+ failed_files.append(str(file_path))
1700
+ except Exception as e:
1701
+ logger.warning(f"Failed to index {file_path}: {e}")
1702
+ failed_files.append(str(file_path))
1703
+ else:
1704
+ skipped_files.append(str(file_path))
1705
+
1706
+ # Update system prompt after indexing directory
1707
+ if indexed_files and hasattr(self, "_update_system_prompt"):
1708
+ self._update_system_prompt()
1709
+
1710
+ return {
1711
+ "status": "success",
1712
+ "indexed_count": len(indexed_files),
1713
+ "failed_count": len(failed_files),
1714
+ "skipped_count": len(skipped_files),
1715
+ "indexed_files": [Path(f).name for f in indexed_files],
1716
+ "failed_files": (
1717
+ [Path(f).name for f in failed_files] if failed_files else []
1718
+ ),
1719
+ "message": f"Indexed {len(indexed_files)} files from {dir_path.name}",
1720
+ }
1721
+
1722
+ except Exception as e:
1723
+ logger.error(f"Error indexing directory: {e}")
1724
+ return {
1725
+ "status": "error",
1726
+ "error": str(e),
1727
+ "has_errors": True,
1728
+ "operation": "index_directory",
1729
+ }