amd-gaia 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (800) hide show
  1. amd_gaia-0.14.1.dist-info/METADATA +768 -0
  2. amd_gaia-0.14.1.dist-info/RECORD +800 -0
  3. amd_gaia-0.14.1.dist-info/WHEEL +5 -0
  4. amd_gaia-0.14.1.dist-info/entry_points.txt +5 -0
  5. amd_gaia-0.14.1.dist-info/licenses/LICENSE.md +21 -0
  6. amd_gaia-0.14.1.dist-info/top_level.txt +1 -0
  7. gaia/__init__.py +2 -0
  8. gaia/agents/__init__.py +19 -0
  9. gaia/agents/base/__init__.py +9 -0
  10. gaia/agents/base/agent.py +2072 -0
  11. gaia/agents/base/api_agent.py +120 -0
  12. gaia/agents/base/console.py +1457 -0
  13. gaia/agents/base/mcp_agent.py +86 -0
  14. gaia/agents/base/tools.py +83 -0
  15. gaia/agents/blender/agent.py +556 -0
  16. gaia/agents/blender/agent_simple.py +135 -0
  17. gaia/agents/blender/app.py +211 -0
  18. gaia/agents/blender/app_simple.py +41 -0
  19. gaia/agents/blender/core/__init__.py +16 -0
  20. gaia/agents/blender/core/materials.py +506 -0
  21. gaia/agents/blender/core/objects.py +316 -0
  22. gaia/agents/blender/core/rendering.py +225 -0
  23. gaia/agents/blender/core/scene.py +220 -0
  24. gaia/agents/blender/core/view.py +146 -0
  25. gaia/agents/chat/__init__.py +9 -0
  26. gaia/agents/chat/agent.py +975 -0
  27. gaia/agents/chat/app.py +1058 -0
  28. gaia/agents/chat/session.py +508 -0
  29. gaia/agents/chat/tools/__init__.py +15 -0
  30. gaia/agents/chat/tools/file_tools.py +96 -0
  31. gaia/agents/chat/tools/rag_tools.py +1729 -0
  32. gaia/agents/chat/tools/shell_tools.py +436 -0
  33. gaia/agents/code/__init__.py +7 -0
  34. gaia/agents/code/agent.py +547 -0
  35. gaia/agents/code/app.py +266 -0
  36. gaia/agents/code/models.py +135 -0
  37. gaia/agents/code/orchestration/__init__.py +24 -0
  38. gaia/agents/code/orchestration/checklist_executor.py +1739 -0
  39. gaia/agents/code/orchestration/checklist_generator.py +709 -0
  40. gaia/agents/code/orchestration/factories/__init__.py +9 -0
  41. gaia/agents/code/orchestration/factories/base.py +63 -0
  42. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -0
  43. gaia/agents/code/orchestration/factories/python_factory.py +106 -0
  44. gaia/agents/code/orchestration/orchestrator.py +610 -0
  45. gaia/agents/code/orchestration/project_analyzer.py +391 -0
  46. gaia/agents/code/orchestration/steps/__init__.py +67 -0
  47. gaia/agents/code/orchestration/steps/base.py +188 -0
  48. gaia/agents/code/orchestration/steps/error_handler.py +314 -0
  49. gaia/agents/code/orchestration/steps/nextjs.py +828 -0
  50. gaia/agents/code/orchestration/steps/python.py +307 -0
  51. gaia/agents/code/orchestration/template_catalog.py +463 -0
  52. gaia/agents/code/orchestration/workflows/__init__.py +14 -0
  53. gaia/agents/code/orchestration/workflows/base.py +80 -0
  54. gaia/agents/code/orchestration/workflows/nextjs.py +186 -0
  55. gaia/agents/code/orchestration/workflows/python.py +94 -0
  56. gaia/agents/code/prompts/__init__.py +11 -0
  57. gaia/agents/code/prompts/base_prompt.py +77 -0
  58. gaia/agents/code/prompts/code_patterns.py +1925 -0
  59. gaia/agents/code/prompts/nextjs_prompt.py +40 -0
  60. gaia/agents/code/prompts/python_prompt.py +109 -0
  61. gaia/agents/code/schema_inference.py +365 -0
  62. gaia/agents/code/system_prompt.py +41 -0
  63. gaia/agents/code/tools/__init__.py +42 -0
  64. gaia/agents/code/tools/cli_tools.py +1138 -0
  65. gaia/agents/code/tools/code_formatting.py +319 -0
  66. gaia/agents/code/tools/code_tools.py +769 -0
  67. gaia/agents/code/tools/error_fixing.py +1347 -0
  68. gaia/agents/code/tools/external_tools.py +180 -0
  69. gaia/agents/code/tools/file_io.py +845 -0
  70. gaia/agents/code/tools/prisma_tools.py +190 -0
  71. gaia/agents/code/tools/project_management.py +1016 -0
  72. gaia/agents/code/tools/testing.py +321 -0
  73. gaia/agents/code/tools/typescript_tools.py +122 -0
  74. gaia/agents/code/tools/validation_parsing.py +461 -0
  75. gaia/agents/code/tools/validation_tools.py +803 -0
  76. gaia/agents/code/tools/web_dev_tools.py +1744 -0
  77. gaia/agents/code/validators/__init__.py +16 -0
  78. gaia/agents/code/validators/antipattern_checker.py +241 -0
  79. gaia/agents/code/validators/ast_analyzer.py +197 -0
  80. gaia/agents/code/validators/requirements_validator.py +145 -0
  81. gaia/agents/code/validators/syntax_validator.py +171 -0
  82. gaia/agents/docker/__init__.py +7 -0
  83. gaia/agents/docker/agent.py +642 -0
  84. gaia/agents/jira/__init__.py +11 -0
  85. gaia/agents/jira/agent.py +894 -0
  86. gaia/agents/jira/jql_templates.py +299 -0
  87. gaia/agents/routing/__init__.py +7 -0
  88. gaia/agents/routing/agent.py +512 -0
  89. gaia/agents/routing/system_prompt.py +75 -0
  90. gaia/api/__init__.py +23 -0
  91. gaia/api/agent_registry.py +238 -0
  92. gaia/api/app.py +305 -0
  93. gaia/api/openai_server.py +575 -0
  94. gaia/api/schemas.py +186 -0
  95. gaia/api/sse_handler.py +370 -0
  96. gaia/apps/__init__.py +4 -0
  97. gaia/apps/llm/__init__.py +6 -0
  98. gaia/apps/llm/app.py +169 -0
  99. gaia/apps/summarize/app.py +633 -0
  100. gaia/apps/summarize/html_viewer.py +133 -0
  101. gaia/apps/summarize/pdf_formatter.py +284 -0
  102. gaia/audio/__init__.py +2 -0
  103. gaia/audio/audio_client.py +439 -0
  104. gaia/audio/audio_recorder.py +269 -0
  105. gaia/audio/kokoro_tts.py +599 -0
  106. gaia/audio/whisper_asr.py +432 -0
  107. gaia/chat/__init__.py +16 -0
  108. gaia/chat/app.py +430 -0
  109. gaia/chat/prompts.py +522 -0
  110. gaia/chat/sdk.py +1200 -0
  111. gaia/cli.py +5621 -0
  112. gaia/eval/batch_experiment.py +2332 -0
  113. gaia/eval/claude.py +542 -0
  114. gaia/eval/config.py +37 -0
  115. gaia/eval/email_generator.py +512 -0
  116. gaia/eval/eval.py +3179 -0
  117. gaia/eval/groundtruth.py +1130 -0
  118. gaia/eval/transcript_generator.py +582 -0
  119. gaia/eval/webapp/README.md +168 -0
  120. gaia/eval/webapp/node_modules/.bin/mime +16 -0
  121. gaia/eval/webapp/node_modules/.bin/mime.cmd +17 -0
  122. gaia/eval/webapp/node_modules/.bin/mime.ps1 +28 -0
  123. gaia/eval/webapp/node_modules/.package-lock.json +865 -0
  124. gaia/eval/webapp/node_modules/accepts/HISTORY.md +243 -0
  125. gaia/eval/webapp/node_modules/accepts/LICENSE +23 -0
  126. gaia/eval/webapp/node_modules/accepts/README.md +140 -0
  127. gaia/eval/webapp/node_modules/accepts/index.js +238 -0
  128. gaia/eval/webapp/node_modules/accepts/package.json +47 -0
  129. gaia/eval/webapp/node_modules/array-flatten/LICENSE +21 -0
  130. gaia/eval/webapp/node_modules/array-flatten/README.md +43 -0
  131. gaia/eval/webapp/node_modules/array-flatten/array-flatten.js +64 -0
  132. gaia/eval/webapp/node_modules/array-flatten/package.json +39 -0
  133. gaia/eval/webapp/node_modules/body-parser/HISTORY.md +672 -0
  134. gaia/eval/webapp/node_modules/body-parser/LICENSE +23 -0
  135. gaia/eval/webapp/node_modules/body-parser/README.md +476 -0
  136. gaia/eval/webapp/node_modules/body-parser/SECURITY.md +25 -0
  137. gaia/eval/webapp/node_modules/body-parser/index.js +156 -0
  138. gaia/eval/webapp/node_modules/body-parser/lib/read.js +205 -0
  139. gaia/eval/webapp/node_modules/body-parser/lib/types/json.js +247 -0
  140. gaia/eval/webapp/node_modules/body-parser/lib/types/raw.js +101 -0
  141. gaia/eval/webapp/node_modules/body-parser/lib/types/text.js +121 -0
  142. gaia/eval/webapp/node_modules/body-parser/lib/types/urlencoded.js +307 -0
  143. gaia/eval/webapp/node_modules/body-parser/package.json +56 -0
  144. gaia/eval/webapp/node_modules/bytes/History.md +97 -0
  145. gaia/eval/webapp/node_modules/bytes/LICENSE +23 -0
  146. gaia/eval/webapp/node_modules/bytes/Readme.md +152 -0
  147. gaia/eval/webapp/node_modules/bytes/index.js +170 -0
  148. gaia/eval/webapp/node_modules/bytes/package.json +42 -0
  149. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.eslintrc +17 -0
  150. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.github/FUNDING.yml +12 -0
  151. gaia/eval/webapp/node_modules/call-bind-apply-helpers/.nycrc +9 -0
  152. gaia/eval/webapp/node_modules/call-bind-apply-helpers/CHANGELOG.md +30 -0
  153. gaia/eval/webapp/node_modules/call-bind-apply-helpers/LICENSE +21 -0
  154. gaia/eval/webapp/node_modules/call-bind-apply-helpers/README.md +62 -0
  155. gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.d.ts +1 -0
  156. gaia/eval/webapp/node_modules/call-bind-apply-helpers/actualApply.js +10 -0
  157. gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.d.ts +19 -0
  158. gaia/eval/webapp/node_modules/call-bind-apply-helpers/applyBind.js +10 -0
  159. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.d.ts +1 -0
  160. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionApply.js +4 -0
  161. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.d.ts +1 -0
  162. gaia/eval/webapp/node_modules/call-bind-apply-helpers/functionCall.js +4 -0
  163. gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.d.ts +64 -0
  164. gaia/eval/webapp/node_modules/call-bind-apply-helpers/index.js +15 -0
  165. gaia/eval/webapp/node_modules/call-bind-apply-helpers/package.json +85 -0
  166. gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.d.ts +3 -0
  167. gaia/eval/webapp/node_modules/call-bind-apply-helpers/reflectApply.js +4 -0
  168. gaia/eval/webapp/node_modules/call-bind-apply-helpers/test/index.js +63 -0
  169. gaia/eval/webapp/node_modules/call-bind-apply-helpers/tsconfig.json +9 -0
  170. gaia/eval/webapp/node_modules/call-bound/.eslintrc +13 -0
  171. gaia/eval/webapp/node_modules/call-bound/.github/FUNDING.yml +12 -0
  172. gaia/eval/webapp/node_modules/call-bound/.nycrc +9 -0
  173. gaia/eval/webapp/node_modules/call-bound/CHANGELOG.md +42 -0
  174. gaia/eval/webapp/node_modules/call-bound/LICENSE +21 -0
  175. gaia/eval/webapp/node_modules/call-bound/README.md +53 -0
  176. gaia/eval/webapp/node_modules/call-bound/index.d.ts +94 -0
  177. gaia/eval/webapp/node_modules/call-bound/index.js +19 -0
  178. gaia/eval/webapp/node_modules/call-bound/package.json +99 -0
  179. gaia/eval/webapp/node_modules/call-bound/test/index.js +61 -0
  180. gaia/eval/webapp/node_modules/call-bound/tsconfig.json +10 -0
  181. gaia/eval/webapp/node_modules/content-disposition/HISTORY.md +60 -0
  182. gaia/eval/webapp/node_modules/content-disposition/LICENSE +22 -0
  183. gaia/eval/webapp/node_modules/content-disposition/README.md +142 -0
  184. gaia/eval/webapp/node_modules/content-disposition/index.js +458 -0
  185. gaia/eval/webapp/node_modules/content-disposition/package.json +44 -0
  186. gaia/eval/webapp/node_modules/content-type/HISTORY.md +29 -0
  187. gaia/eval/webapp/node_modules/content-type/LICENSE +22 -0
  188. gaia/eval/webapp/node_modules/content-type/README.md +94 -0
  189. gaia/eval/webapp/node_modules/content-type/index.js +225 -0
  190. gaia/eval/webapp/node_modules/content-type/package.json +42 -0
  191. gaia/eval/webapp/node_modules/cookie/LICENSE +24 -0
  192. gaia/eval/webapp/node_modules/cookie/README.md +317 -0
  193. gaia/eval/webapp/node_modules/cookie/SECURITY.md +25 -0
  194. gaia/eval/webapp/node_modules/cookie/index.js +334 -0
  195. gaia/eval/webapp/node_modules/cookie/package.json +44 -0
  196. gaia/eval/webapp/node_modules/cookie-signature/.npmignore +4 -0
  197. gaia/eval/webapp/node_modules/cookie-signature/History.md +38 -0
  198. gaia/eval/webapp/node_modules/cookie-signature/Readme.md +42 -0
  199. gaia/eval/webapp/node_modules/cookie-signature/index.js +51 -0
  200. gaia/eval/webapp/node_modules/cookie-signature/package.json +18 -0
  201. gaia/eval/webapp/node_modules/debug/.coveralls.yml +1 -0
  202. gaia/eval/webapp/node_modules/debug/.eslintrc +11 -0
  203. gaia/eval/webapp/node_modules/debug/.npmignore +9 -0
  204. gaia/eval/webapp/node_modules/debug/.travis.yml +14 -0
  205. gaia/eval/webapp/node_modules/debug/CHANGELOG.md +362 -0
  206. gaia/eval/webapp/node_modules/debug/LICENSE +19 -0
  207. gaia/eval/webapp/node_modules/debug/Makefile +50 -0
  208. gaia/eval/webapp/node_modules/debug/README.md +312 -0
  209. gaia/eval/webapp/node_modules/debug/component.json +19 -0
  210. gaia/eval/webapp/node_modules/debug/karma.conf.js +70 -0
  211. gaia/eval/webapp/node_modules/debug/node.js +1 -0
  212. gaia/eval/webapp/node_modules/debug/package.json +49 -0
  213. gaia/eval/webapp/node_modules/debug/src/browser.js +185 -0
  214. gaia/eval/webapp/node_modules/debug/src/debug.js +202 -0
  215. gaia/eval/webapp/node_modules/debug/src/index.js +10 -0
  216. gaia/eval/webapp/node_modules/debug/src/inspector-log.js +15 -0
  217. gaia/eval/webapp/node_modules/debug/src/node.js +248 -0
  218. gaia/eval/webapp/node_modules/depd/History.md +103 -0
  219. gaia/eval/webapp/node_modules/depd/LICENSE +22 -0
  220. gaia/eval/webapp/node_modules/depd/Readme.md +280 -0
  221. gaia/eval/webapp/node_modules/depd/index.js +538 -0
  222. gaia/eval/webapp/node_modules/depd/lib/browser/index.js +77 -0
  223. gaia/eval/webapp/node_modules/depd/package.json +45 -0
  224. gaia/eval/webapp/node_modules/destroy/LICENSE +23 -0
  225. gaia/eval/webapp/node_modules/destroy/README.md +63 -0
  226. gaia/eval/webapp/node_modules/destroy/index.js +209 -0
  227. gaia/eval/webapp/node_modules/destroy/package.json +48 -0
  228. gaia/eval/webapp/node_modules/dunder-proto/.eslintrc +5 -0
  229. gaia/eval/webapp/node_modules/dunder-proto/.github/FUNDING.yml +12 -0
  230. gaia/eval/webapp/node_modules/dunder-proto/.nycrc +13 -0
  231. gaia/eval/webapp/node_modules/dunder-proto/CHANGELOG.md +24 -0
  232. gaia/eval/webapp/node_modules/dunder-proto/LICENSE +21 -0
  233. gaia/eval/webapp/node_modules/dunder-proto/README.md +54 -0
  234. gaia/eval/webapp/node_modules/dunder-proto/get.d.ts +5 -0
  235. gaia/eval/webapp/node_modules/dunder-proto/get.js +30 -0
  236. gaia/eval/webapp/node_modules/dunder-proto/package.json +76 -0
  237. gaia/eval/webapp/node_modules/dunder-proto/set.d.ts +5 -0
  238. gaia/eval/webapp/node_modules/dunder-proto/set.js +35 -0
  239. gaia/eval/webapp/node_modules/dunder-proto/test/get.js +34 -0
  240. gaia/eval/webapp/node_modules/dunder-proto/test/index.js +4 -0
  241. gaia/eval/webapp/node_modules/dunder-proto/test/set.js +50 -0
  242. gaia/eval/webapp/node_modules/dunder-proto/tsconfig.json +9 -0
  243. gaia/eval/webapp/node_modules/ee-first/LICENSE +22 -0
  244. gaia/eval/webapp/node_modules/ee-first/README.md +80 -0
  245. gaia/eval/webapp/node_modules/ee-first/index.js +95 -0
  246. gaia/eval/webapp/node_modules/ee-first/package.json +29 -0
  247. gaia/eval/webapp/node_modules/encodeurl/LICENSE +22 -0
  248. gaia/eval/webapp/node_modules/encodeurl/README.md +109 -0
  249. gaia/eval/webapp/node_modules/encodeurl/index.js +60 -0
  250. gaia/eval/webapp/node_modules/encodeurl/package.json +40 -0
  251. gaia/eval/webapp/node_modules/es-define-property/.eslintrc +13 -0
  252. gaia/eval/webapp/node_modules/es-define-property/.github/FUNDING.yml +12 -0
  253. gaia/eval/webapp/node_modules/es-define-property/.nycrc +9 -0
  254. gaia/eval/webapp/node_modules/es-define-property/CHANGELOG.md +29 -0
  255. gaia/eval/webapp/node_modules/es-define-property/LICENSE +21 -0
  256. gaia/eval/webapp/node_modules/es-define-property/README.md +49 -0
  257. gaia/eval/webapp/node_modules/es-define-property/index.d.ts +3 -0
  258. gaia/eval/webapp/node_modules/es-define-property/index.js +14 -0
  259. gaia/eval/webapp/node_modules/es-define-property/package.json +81 -0
  260. gaia/eval/webapp/node_modules/es-define-property/test/index.js +56 -0
  261. gaia/eval/webapp/node_modules/es-define-property/tsconfig.json +10 -0
  262. gaia/eval/webapp/node_modules/es-errors/.eslintrc +5 -0
  263. gaia/eval/webapp/node_modules/es-errors/.github/FUNDING.yml +12 -0
  264. gaia/eval/webapp/node_modules/es-errors/CHANGELOG.md +40 -0
  265. gaia/eval/webapp/node_modules/es-errors/LICENSE +21 -0
  266. gaia/eval/webapp/node_modules/es-errors/README.md +55 -0
  267. gaia/eval/webapp/node_modules/es-errors/eval.d.ts +3 -0
  268. gaia/eval/webapp/node_modules/es-errors/eval.js +4 -0
  269. gaia/eval/webapp/node_modules/es-errors/index.d.ts +3 -0
  270. gaia/eval/webapp/node_modules/es-errors/index.js +4 -0
  271. gaia/eval/webapp/node_modules/es-errors/package.json +80 -0
  272. gaia/eval/webapp/node_modules/es-errors/range.d.ts +3 -0
  273. gaia/eval/webapp/node_modules/es-errors/range.js +4 -0
  274. gaia/eval/webapp/node_modules/es-errors/ref.d.ts +3 -0
  275. gaia/eval/webapp/node_modules/es-errors/ref.js +4 -0
  276. gaia/eval/webapp/node_modules/es-errors/syntax.d.ts +3 -0
  277. gaia/eval/webapp/node_modules/es-errors/syntax.js +4 -0
  278. gaia/eval/webapp/node_modules/es-errors/test/index.js +19 -0
  279. gaia/eval/webapp/node_modules/es-errors/tsconfig.json +49 -0
  280. gaia/eval/webapp/node_modules/es-errors/type.d.ts +3 -0
  281. gaia/eval/webapp/node_modules/es-errors/type.js +4 -0
  282. gaia/eval/webapp/node_modules/es-errors/uri.d.ts +3 -0
  283. gaia/eval/webapp/node_modules/es-errors/uri.js +4 -0
  284. gaia/eval/webapp/node_modules/es-object-atoms/.eslintrc +16 -0
  285. gaia/eval/webapp/node_modules/es-object-atoms/.github/FUNDING.yml +12 -0
  286. gaia/eval/webapp/node_modules/es-object-atoms/CHANGELOG.md +37 -0
  287. gaia/eval/webapp/node_modules/es-object-atoms/LICENSE +21 -0
  288. gaia/eval/webapp/node_modules/es-object-atoms/README.md +63 -0
  289. gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.d.ts +3 -0
  290. gaia/eval/webapp/node_modules/es-object-atoms/RequireObjectCoercible.js +11 -0
  291. gaia/eval/webapp/node_modules/es-object-atoms/ToObject.d.ts +7 -0
  292. gaia/eval/webapp/node_modules/es-object-atoms/ToObject.js +10 -0
  293. gaia/eval/webapp/node_modules/es-object-atoms/index.d.ts +3 -0
  294. gaia/eval/webapp/node_modules/es-object-atoms/index.js +4 -0
  295. gaia/eval/webapp/node_modules/es-object-atoms/isObject.d.ts +3 -0
  296. gaia/eval/webapp/node_modules/es-object-atoms/isObject.js +6 -0
  297. gaia/eval/webapp/node_modules/es-object-atoms/package.json +80 -0
  298. gaia/eval/webapp/node_modules/es-object-atoms/test/index.js +38 -0
  299. gaia/eval/webapp/node_modules/es-object-atoms/tsconfig.json +6 -0
  300. gaia/eval/webapp/node_modules/escape-html/LICENSE +24 -0
  301. gaia/eval/webapp/node_modules/escape-html/Readme.md +43 -0
  302. gaia/eval/webapp/node_modules/escape-html/index.js +78 -0
  303. gaia/eval/webapp/node_modules/escape-html/package.json +24 -0
  304. gaia/eval/webapp/node_modules/etag/HISTORY.md +83 -0
  305. gaia/eval/webapp/node_modules/etag/LICENSE +22 -0
  306. gaia/eval/webapp/node_modules/etag/README.md +159 -0
  307. gaia/eval/webapp/node_modules/etag/index.js +131 -0
  308. gaia/eval/webapp/node_modules/etag/package.json +47 -0
  309. gaia/eval/webapp/node_modules/express/History.md +3656 -0
  310. gaia/eval/webapp/node_modules/express/LICENSE +24 -0
  311. gaia/eval/webapp/node_modules/express/Readme.md +260 -0
  312. gaia/eval/webapp/node_modules/express/index.js +11 -0
  313. gaia/eval/webapp/node_modules/express/lib/application.js +661 -0
  314. gaia/eval/webapp/node_modules/express/lib/express.js +116 -0
  315. gaia/eval/webapp/node_modules/express/lib/middleware/init.js +43 -0
  316. gaia/eval/webapp/node_modules/express/lib/middleware/query.js +47 -0
  317. gaia/eval/webapp/node_modules/express/lib/request.js +525 -0
  318. gaia/eval/webapp/node_modules/express/lib/response.js +1179 -0
  319. gaia/eval/webapp/node_modules/express/lib/router/index.js +673 -0
  320. gaia/eval/webapp/node_modules/express/lib/router/layer.js +181 -0
  321. gaia/eval/webapp/node_modules/express/lib/router/route.js +230 -0
  322. gaia/eval/webapp/node_modules/express/lib/utils.js +303 -0
  323. gaia/eval/webapp/node_modules/express/lib/view.js +182 -0
  324. gaia/eval/webapp/node_modules/express/package.json +102 -0
  325. gaia/eval/webapp/node_modules/finalhandler/HISTORY.md +210 -0
  326. gaia/eval/webapp/node_modules/finalhandler/LICENSE +22 -0
  327. gaia/eval/webapp/node_modules/finalhandler/README.md +147 -0
  328. gaia/eval/webapp/node_modules/finalhandler/SECURITY.md +25 -0
  329. gaia/eval/webapp/node_modules/finalhandler/index.js +341 -0
  330. gaia/eval/webapp/node_modules/finalhandler/package.json +47 -0
  331. gaia/eval/webapp/node_modules/forwarded/HISTORY.md +21 -0
  332. gaia/eval/webapp/node_modules/forwarded/LICENSE +22 -0
  333. gaia/eval/webapp/node_modules/forwarded/README.md +57 -0
  334. gaia/eval/webapp/node_modules/forwarded/index.js +90 -0
  335. gaia/eval/webapp/node_modules/forwarded/package.json +45 -0
  336. gaia/eval/webapp/node_modules/fresh/HISTORY.md +70 -0
  337. gaia/eval/webapp/node_modules/fresh/LICENSE +23 -0
  338. gaia/eval/webapp/node_modules/fresh/README.md +119 -0
  339. gaia/eval/webapp/node_modules/fresh/index.js +137 -0
  340. gaia/eval/webapp/node_modules/fresh/package.json +46 -0
  341. gaia/eval/webapp/node_modules/fs/README.md +9 -0
  342. gaia/eval/webapp/node_modules/fs/package.json +20 -0
  343. gaia/eval/webapp/node_modules/function-bind/.eslintrc +21 -0
  344. gaia/eval/webapp/node_modules/function-bind/.github/FUNDING.yml +12 -0
  345. gaia/eval/webapp/node_modules/function-bind/.github/SECURITY.md +3 -0
  346. gaia/eval/webapp/node_modules/function-bind/.nycrc +13 -0
  347. gaia/eval/webapp/node_modules/function-bind/CHANGELOG.md +136 -0
  348. gaia/eval/webapp/node_modules/function-bind/LICENSE +20 -0
  349. gaia/eval/webapp/node_modules/function-bind/README.md +46 -0
  350. gaia/eval/webapp/node_modules/function-bind/implementation.js +84 -0
  351. gaia/eval/webapp/node_modules/function-bind/index.js +5 -0
  352. gaia/eval/webapp/node_modules/function-bind/package.json +87 -0
  353. gaia/eval/webapp/node_modules/function-bind/test/.eslintrc +9 -0
  354. gaia/eval/webapp/node_modules/function-bind/test/index.js +252 -0
  355. gaia/eval/webapp/node_modules/get-intrinsic/.eslintrc +42 -0
  356. gaia/eval/webapp/node_modules/get-intrinsic/.github/FUNDING.yml +12 -0
  357. gaia/eval/webapp/node_modules/get-intrinsic/.nycrc +9 -0
  358. gaia/eval/webapp/node_modules/get-intrinsic/CHANGELOG.md +186 -0
  359. gaia/eval/webapp/node_modules/get-intrinsic/LICENSE +21 -0
  360. gaia/eval/webapp/node_modules/get-intrinsic/README.md +71 -0
  361. gaia/eval/webapp/node_modules/get-intrinsic/index.js +378 -0
  362. gaia/eval/webapp/node_modules/get-intrinsic/package.json +97 -0
  363. gaia/eval/webapp/node_modules/get-intrinsic/test/GetIntrinsic.js +274 -0
  364. gaia/eval/webapp/node_modules/get-proto/.eslintrc +10 -0
  365. gaia/eval/webapp/node_modules/get-proto/.github/FUNDING.yml +12 -0
  366. gaia/eval/webapp/node_modules/get-proto/.nycrc +9 -0
  367. gaia/eval/webapp/node_modules/get-proto/CHANGELOG.md +21 -0
  368. gaia/eval/webapp/node_modules/get-proto/LICENSE +21 -0
  369. gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.d.ts +5 -0
  370. gaia/eval/webapp/node_modules/get-proto/Object.getPrototypeOf.js +6 -0
  371. gaia/eval/webapp/node_modules/get-proto/README.md +50 -0
  372. gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.d.ts +3 -0
  373. gaia/eval/webapp/node_modules/get-proto/Reflect.getPrototypeOf.js +4 -0
  374. gaia/eval/webapp/node_modules/get-proto/index.d.ts +5 -0
  375. gaia/eval/webapp/node_modules/get-proto/index.js +27 -0
  376. gaia/eval/webapp/node_modules/get-proto/package.json +81 -0
  377. gaia/eval/webapp/node_modules/get-proto/test/index.js +68 -0
  378. gaia/eval/webapp/node_modules/get-proto/tsconfig.json +9 -0
  379. gaia/eval/webapp/node_modules/gopd/.eslintrc +16 -0
  380. gaia/eval/webapp/node_modules/gopd/.github/FUNDING.yml +12 -0
  381. gaia/eval/webapp/node_modules/gopd/CHANGELOG.md +45 -0
  382. gaia/eval/webapp/node_modules/gopd/LICENSE +21 -0
  383. gaia/eval/webapp/node_modules/gopd/README.md +40 -0
  384. gaia/eval/webapp/node_modules/gopd/gOPD.d.ts +1 -0
  385. gaia/eval/webapp/node_modules/gopd/gOPD.js +4 -0
  386. gaia/eval/webapp/node_modules/gopd/index.d.ts +5 -0
  387. gaia/eval/webapp/node_modules/gopd/index.js +15 -0
  388. gaia/eval/webapp/node_modules/gopd/package.json +77 -0
  389. gaia/eval/webapp/node_modules/gopd/test/index.js +36 -0
  390. gaia/eval/webapp/node_modules/gopd/tsconfig.json +9 -0
  391. gaia/eval/webapp/node_modules/has-symbols/.eslintrc +11 -0
  392. gaia/eval/webapp/node_modules/has-symbols/.github/FUNDING.yml +12 -0
  393. gaia/eval/webapp/node_modules/has-symbols/.nycrc +9 -0
  394. gaia/eval/webapp/node_modules/has-symbols/CHANGELOG.md +91 -0
  395. gaia/eval/webapp/node_modules/has-symbols/LICENSE +21 -0
  396. gaia/eval/webapp/node_modules/has-symbols/README.md +46 -0
  397. gaia/eval/webapp/node_modules/has-symbols/index.d.ts +3 -0
  398. gaia/eval/webapp/node_modules/has-symbols/index.js +14 -0
  399. gaia/eval/webapp/node_modules/has-symbols/package.json +111 -0
  400. gaia/eval/webapp/node_modules/has-symbols/shams.d.ts +3 -0
  401. gaia/eval/webapp/node_modules/has-symbols/shams.js +45 -0
  402. gaia/eval/webapp/node_modules/has-symbols/test/index.js +22 -0
  403. gaia/eval/webapp/node_modules/has-symbols/test/shams/core-js.js +29 -0
  404. gaia/eval/webapp/node_modules/has-symbols/test/shams/get-own-property-symbols.js +29 -0
  405. gaia/eval/webapp/node_modules/has-symbols/test/tests.js +58 -0
  406. gaia/eval/webapp/node_modules/has-symbols/tsconfig.json +10 -0
  407. gaia/eval/webapp/node_modules/hasown/.eslintrc +5 -0
  408. gaia/eval/webapp/node_modules/hasown/.github/FUNDING.yml +12 -0
  409. gaia/eval/webapp/node_modules/hasown/.nycrc +13 -0
  410. gaia/eval/webapp/node_modules/hasown/CHANGELOG.md +40 -0
  411. gaia/eval/webapp/node_modules/hasown/LICENSE +21 -0
  412. gaia/eval/webapp/node_modules/hasown/README.md +40 -0
  413. gaia/eval/webapp/node_modules/hasown/index.d.ts +3 -0
  414. gaia/eval/webapp/node_modules/hasown/index.js +8 -0
  415. gaia/eval/webapp/node_modules/hasown/package.json +92 -0
  416. gaia/eval/webapp/node_modules/hasown/tsconfig.json +6 -0
  417. gaia/eval/webapp/node_modules/http-errors/HISTORY.md +180 -0
  418. gaia/eval/webapp/node_modules/http-errors/LICENSE +23 -0
  419. gaia/eval/webapp/node_modules/http-errors/README.md +169 -0
  420. gaia/eval/webapp/node_modules/http-errors/index.js +289 -0
  421. gaia/eval/webapp/node_modules/http-errors/package.json +50 -0
  422. gaia/eval/webapp/node_modules/iconv-lite/Changelog.md +162 -0
  423. gaia/eval/webapp/node_modules/iconv-lite/LICENSE +21 -0
  424. gaia/eval/webapp/node_modules/iconv-lite/README.md +156 -0
  425. gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-codec.js +555 -0
  426. gaia/eval/webapp/node_modules/iconv-lite/encodings/dbcs-data.js +176 -0
  427. gaia/eval/webapp/node_modules/iconv-lite/encodings/index.js +22 -0
  428. gaia/eval/webapp/node_modules/iconv-lite/encodings/internal.js +188 -0
  429. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-codec.js +72 -0
  430. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data-generated.js +451 -0
  431. gaia/eval/webapp/node_modules/iconv-lite/encodings/sbcs-data.js +174 -0
  432. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/big5-added.json +122 -0
  433. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp936.json +264 -0
  434. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp949.json +273 -0
  435. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/cp950.json +177 -0
  436. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/eucjp.json +182 -0
  437. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gb18030-ranges.json +1 -0
  438. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/gbk-added.json +55 -0
  439. gaia/eval/webapp/node_modules/iconv-lite/encodings/tables/shiftjis.json +125 -0
  440. gaia/eval/webapp/node_modules/iconv-lite/encodings/utf16.js +177 -0
  441. gaia/eval/webapp/node_modules/iconv-lite/encodings/utf7.js +290 -0
  442. gaia/eval/webapp/node_modules/iconv-lite/lib/bom-handling.js +52 -0
  443. gaia/eval/webapp/node_modules/iconv-lite/lib/extend-node.js +217 -0
  444. gaia/eval/webapp/node_modules/iconv-lite/lib/index.d.ts +24 -0
  445. gaia/eval/webapp/node_modules/iconv-lite/lib/index.js +153 -0
  446. gaia/eval/webapp/node_modules/iconv-lite/lib/streams.js +121 -0
  447. gaia/eval/webapp/node_modules/iconv-lite/package.json +46 -0
  448. gaia/eval/webapp/node_modules/inherits/LICENSE +16 -0
  449. gaia/eval/webapp/node_modules/inherits/README.md +42 -0
  450. gaia/eval/webapp/node_modules/inherits/inherits.js +9 -0
  451. gaia/eval/webapp/node_modules/inherits/inherits_browser.js +27 -0
  452. gaia/eval/webapp/node_modules/inherits/package.json +29 -0
  453. gaia/eval/webapp/node_modules/ipaddr.js/LICENSE +19 -0
  454. gaia/eval/webapp/node_modules/ipaddr.js/README.md +233 -0
  455. gaia/eval/webapp/node_modules/ipaddr.js/ipaddr.min.js +1 -0
  456. gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js +673 -0
  457. gaia/eval/webapp/node_modules/ipaddr.js/lib/ipaddr.js.d.ts +68 -0
  458. gaia/eval/webapp/node_modules/ipaddr.js/package.json +35 -0
  459. gaia/eval/webapp/node_modules/math-intrinsics/.eslintrc +16 -0
  460. gaia/eval/webapp/node_modules/math-intrinsics/.github/FUNDING.yml +12 -0
  461. gaia/eval/webapp/node_modules/math-intrinsics/CHANGELOG.md +24 -0
  462. gaia/eval/webapp/node_modules/math-intrinsics/LICENSE +21 -0
  463. gaia/eval/webapp/node_modules/math-intrinsics/README.md +50 -0
  464. gaia/eval/webapp/node_modules/math-intrinsics/abs.d.ts +1 -0
  465. gaia/eval/webapp/node_modules/math-intrinsics/abs.js +4 -0
  466. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.d.ts +3 -0
  467. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxArrayLength.js +4 -0
  468. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.d.ts +3 -0
  469. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxSafeInteger.js +5 -0
  470. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.d.ts +3 -0
  471. gaia/eval/webapp/node_modules/math-intrinsics/constants/maxValue.js +5 -0
  472. gaia/eval/webapp/node_modules/math-intrinsics/floor.d.ts +1 -0
  473. gaia/eval/webapp/node_modules/math-intrinsics/floor.js +4 -0
  474. gaia/eval/webapp/node_modules/math-intrinsics/isFinite.d.ts +3 -0
  475. gaia/eval/webapp/node_modules/math-intrinsics/isFinite.js +12 -0
  476. gaia/eval/webapp/node_modules/math-intrinsics/isInteger.d.ts +3 -0
  477. gaia/eval/webapp/node_modules/math-intrinsics/isInteger.js +16 -0
  478. gaia/eval/webapp/node_modules/math-intrinsics/isNaN.d.ts +1 -0
  479. gaia/eval/webapp/node_modules/math-intrinsics/isNaN.js +6 -0
  480. gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.d.ts +3 -0
  481. gaia/eval/webapp/node_modules/math-intrinsics/isNegativeZero.js +6 -0
  482. gaia/eval/webapp/node_modules/math-intrinsics/max.d.ts +1 -0
  483. gaia/eval/webapp/node_modules/math-intrinsics/max.js +4 -0
  484. gaia/eval/webapp/node_modules/math-intrinsics/min.d.ts +1 -0
  485. gaia/eval/webapp/node_modules/math-intrinsics/min.js +4 -0
  486. gaia/eval/webapp/node_modules/math-intrinsics/mod.d.ts +3 -0
  487. gaia/eval/webapp/node_modules/math-intrinsics/mod.js +9 -0
  488. gaia/eval/webapp/node_modules/math-intrinsics/package.json +86 -0
  489. gaia/eval/webapp/node_modules/math-intrinsics/pow.d.ts +1 -0
  490. gaia/eval/webapp/node_modules/math-intrinsics/pow.js +4 -0
  491. gaia/eval/webapp/node_modules/math-intrinsics/round.d.ts +1 -0
  492. gaia/eval/webapp/node_modules/math-intrinsics/round.js +4 -0
  493. gaia/eval/webapp/node_modules/math-intrinsics/sign.d.ts +3 -0
  494. gaia/eval/webapp/node_modules/math-intrinsics/sign.js +11 -0
  495. gaia/eval/webapp/node_modules/math-intrinsics/test/index.js +192 -0
  496. gaia/eval/webapp/node_modules/math-intrinsics/tsconfig.json +3 -0
  497. gaia/eval/webapp/node_modules/media-typer/HISTORY.md +22 -0
  498. gaia/eval/webapp/node_modules/media-typer/LICENSE +22 -0
  499. gaia/eval/webapp/node_modules/media-typer/README.md +81 -0
  500. gaia/eval/webapp/node_modules/media-typer/index.js +270 -0
  501. gaia/eval/webapp/node_modules/media-typer/package.json +26 -0
  502. gaia/eval/webapp/node_modules/merge-descriptors/HISTORY.md +21 -0
  503. gaia/eval/webapp/node_modules/merge-descriptors/LICENSE +23 -0
  504. gaia/eval/webapp/node_modules/merge-descriptors/README.md +49 -0
  505. gaia/eval/webapp/node_modules/merge-descriptors/index.js +60 -0
  506. gaia/eval/webapp/node_modules/merge-descriptors/package.json +39 -0
  507. gaia/eval/webapp/node_modules/methods/HISTORY.md +29 -0
  508. gaia/eval/webapp/node_modules/methods/LICENSE +24 -0
  509. gaia/eval/webapp/node_modules/methods/README.md +51 -0
  510. gaia/eval/webapp/node_modules/methods/index.js +69 -0
  511. gaia/eval/webapp/node_modules/methods/package.json +36 -0
  512. gaia/eval/webapp/node_modules/mime/.npmignore +0 -0
  513. gaia/eval/webapp/node_modules/mime/CHANGELOG.md +164 -0
  514. gaia/eval/webapp/node_modules/mime/LICENSE +21 -0
  515. gaia/eval/webapp/node_modules/mime/README.md +90 -0
  516. gaia/eval/webapp/node_modules/mime/cli.js +8 -0
  517. gaia/eval/webapp/node_modules/mime/mime.js +108 -0
  518. gaia/eval/webapp/node_modules/mime/package.json +44 -0
  519. gaia/eval/webapp/node_modules/mime/src/build.js +53 -0
  520. gaia/eval/webapp/node_modules/mime/src/test.js +60 -0
  521. gaia/eval/webapp/node_modules/mime/types.json +1 -0
  522. gaia/eval/webapp/node_modules/mime-db/HISTORY.md +507 -0
  523. gaia/eval/webapp/node_modules/mime-db/LICENSE +23 -0
  524. gaia/eval/webapp/node_modules/mime-db/README.md +100 -0
  525. gaia/eval/webapp/node_modules/mime-db/db.json +8519 -0
  526. gaia/eval/webapp/node_modules/mime-db/index.js +12 -0
  527. gaia/eval/webapp/node_modules/mime-db/package.json +60 -0
  528. gaia/eval/webapp/node_modules/mime-types/HISTORY.md +397 -0
  529. gaia/eval/webapp/node_modules/mime-types/LICENSE +23 -0
  530. gaia/eval/webapp/node_modules/mime-types/README.md +113 -0
  531. gaia/eval/webapp/node_modules/mime-types/index.js +188 -0
  532. gaia/eval/webapp/node_modules/mime-types/package.json +44 -0
  533. gaia/eval/webapp/node_modules/ms/index.js +152 -0
  534. gaia/eval/webapp/node_modules/ms/license.md +21 -0
  535. gaia/eval/webapp/node_modules/ms/package.json +37 -0
  536. gaia/eval/webapp/node_modules/ms/readme.md +51 -0
  537. gaia/eval/webapp/node_modules/negotiator/HISTORY.md +108 -0
  538. gaia/eval/webapp/node_modules/negotiator/LICENSE +24 -0
  539. gaia/eval/webapp/node_modules/negotiator/README.md +203 -0
  540. gaia/eval/webapp/node_modules/negotiator/index.js +82 -0
  541. gaia/eval/webapp/node_modules/negotiator/lib/charset.js +169 -0
  542. gaia/eval/webapp/node_modules/negotiator/lib/encoding.js +184 -0
  543. gaia/eval/webapp/node_modules/negotiator/lib/language.js +179 -0
  544. gaia/eval/webapp/node_modules/negotiator/lib/mediaType.js +294 -0
  545. gaia/eval/webapp/node_modules/negotiator/package.json +42 -0
  546. gaia/eval/webapp/node_modules/object-inspect/.eslintrc +53 -0
  547. gaia/eval/webapp/node_modules/object-inspect/.github/FUNDING.yml +12 -0
  548. gaia/eval/webapp/node_modules/object-inspect/.nycrc +13 -0
  549. gaia/eval/webapp/node_modules/object-inspect/CHANGELOG.md +424 -0
  550. gaia/eval/webapp/node_modules/object-inspect/LICENSE +21 -0
  551. gaia/eval/webapp/node_modules/object-inspect/example/all.js +23 -0
  552. gaia/eval/webapp/node_modules/object-inspect/example/circular.js +6 -0
  553. gaia/eval/webapp/node_modules/object-inspect/example/fn.js +5 -0
  554. gaia/eval/webapp/node_modules/object-inspect/example/inspect.js +10 -0
  555. gaia/eval/webapp/node_modules/object-inspect/index.js +544 -0
  556. gaia/eval/webapp/node_modules/object-inspect/package-support.json +20 -0
  557. gaia/eval/webapp/node_modules/object-inspect/package.json +105 -0
  558. gaia/eval/webapp/node_modules/object-inspect/readme.markdown +84 -0
  559. gaia/eval/webapp/node_modules/object-inspect/test/bigint.js +58 -0
  560. gaia/eval/webapp/node_modules/object-inspect/test/browser/dom.js +15 -0
  561. gaia/eval/webapp/node_modules/object-inspect/test/circular.js +16 -0
  562. gaia/eval/webapp/node_modules/object-inspect/test/deep.js +12 -0
  563. gaia/eval/webapp/node_modules/object-inspect/test/element.js +53 -0
  564. gaia/eval/webapp/node_modules/object-inspect/test/err.js +48 -0
  565. gaia/eval/webapp/node_modules/object-inspect/test/fakes.js +29 -0
  566. gaia/eval/webapp/node_modules/object-inspect/test/fn.js +76 -0
  567. gaia/eval/webapp/node_modules/object-inspect/test/global.js +17 -0
  568. gaia/eval/webapp/node_modules/object-inspect/test/has.js +15 -0
  569. gaia/eval/webapp/node_modules/object-inspect/test/holes.js +15 -0
  570. gaia/eval/webapp/node_modules/object-inspect/test/indent-option.js +271 -0
  571. gaia/eval/webapp/node_modules/object-inspect/test/inspect.js +139 -0
  572. gaia/eval/webapp/node_modules/object-inspect/test/lowbyte.js +12 -0
  573. gaia/eval/webapp/node_modules/object-inspect/test/number.js +58 -0
  574. gaia/eval/webapp/node_modules/object-inspect/test/quoteStyle.js +26 -0
  575. gaia/eval/webapp/node_modules/object-inspect/test/toStringTag.js +40 -0
  576. gaia/eval/webapp/node_modules/object-inspect/test/undef.js +12 -0
  577. gaia/eval/webapp/node_modules/object-inspect/test/values.js +261 -0
  578. gaia/eval/webapp/node_modules/object-inspect/test-core-js.js +26 -0
  579. gaia/eval/webapp/node_modules/object-inspect/util.inspect.js +1 -0
  580. gaia/eval/webapp/node_modules/on-finished/HISTORY.md +98 -0
  581. gaia/eval/webapp/node_modules/on-finished/LICENSE +23 -0
  582. gaia/eval/webapp/node_modules/on-finished/README.md +162 -0
  583. gaia/eval/webapp/node_modules/on-finished/index.js +234 -0
  584. gaia/eval/webapp/node_modules/on-finished/package.json +39 -0
  585. gaia/eval/webapp/node_modules/parseurl/HISTORY.md +58 -0
  586. gaia/eval/webapp/node_modules/parseurl/LICENSE +24 -0
  587. gaia/eval/webapp/node_modules/parseurl/README.md +133 -0
  588. gaia/eval/webapp/node_modules/parseurl/index.js +158 -0
  589. gaia/eval/webapp/node_modules/parseurl/package.json +40 -0
  590. gaia/eval/webapp/node_modules/path/.npmignore +1 -0
  591. gaia/eval/webapp/node_modules/path/LICENSE +18 -0
  592. gaia/eval/webapp/node_modules/path/README.md +15 -0
  593. gaia/eval/webapp/node_modules/path/package.json +24 -0
  594. gaia/eval/webapp/node_modules/path/path.js +628 -0
  595. gaia/eval/webapp/node_modules/path-to-regexp/LICENSE +21 -0
  596. gaia/eval/webapp/node_modules/path-to-regexp/Readme.md +35 -0
  597. gaia/eval/webapp/node_modules/path-to-regexp/index.js +156 -0
  598. gaia/eval/webapp/node_modules/path-to-regexp/package.json +30 -0
  599. gaia/eval/webapp/node_modules/process/.eslintrc +21 -0
  600. gaia/eval/webapp/node_modules/process/LICENSE +22 -0
  601. gaia/eval/webapp/node_modules/process/README.md +26 -0
  602. gaia/eval/webapp/node_modules/process/browser.js +184 -0
  603. gaia/eval/webapp/node_modules/process/index.js +2 -0
  604. gaia/eval/webapp/node_modules/process/package.json +27 -0
  605. gaia/eval/webapp/node_modules/process/test.js +199 -0
  606. gaia/eval/webapp/node_modules/proxy-addr/HISTORY.md +161 -0
  607. gaia/eval/webapp/node_modules/proxy-addr/LICENSE +22 -0
  608. gaia/eval/webapp/node_modules/proxy-addr/README.md +139 -0
  609. gaia/eval/webapp/node_modules/proxy-addr/index.js +327 -0
  610. gaia/eval/webapp/node_modules/proxy-addr/package.json +47 -0
  611. gaia/eval/webapp/node_modules/qs/.editorconfig +46 -0
  612. gaia/eval/webapp/node_modules/qs/.eslintrc +38 -0
  613. gaia/eval/webapp/node_modules/qs/.github/FUNDING.yml +12 -0
  614. gaia/eval/webapp/node_modules/qs/.nycrc +13 -0
  615. gaia/eval/webapp/node_modules/qs/CHANGELOG.md +600 -0
  616. gaia/eval/webapp/node_modules/qs/LICENSE.md +29 -0
  617. gaia/eval/webapp/node_modules/qs/README.md +709 -0
  618. gaia/eval/webapp/node_modules/qs/dist/qs.js +90 -0
  619. gaia/eval/webapp/node_modules/qs/lib/formats.js +23 -0
  620. gaia/eval/webapp/node_modules/qs/lib/index.js +11 -0
  621. gaia/eval/webapp/node_modules/qs/lib/parse.js +296 -0
  622. gaia/eval/webapp/node_modules/qs/lib/stringify.js +351 -0
  623. gaia/eval/webapp/node_modules/qs/lib/utils.js +265 -0
  624. gaia/eval/webapp/node_modules/qs/package.json +91 -0
  625. gaia/eval/webapp/node_modules/qs/test/empty-keys-cases.js +267 -0
  626. gaia/eval/webapp/node_modules/qs/test/parse.js +1170 -0
  627. gaia/eval/webapp/node_modules/qs/test/stringify.js +1298 -0
  628. gaia/eval/webapp/node_modules/qs/test/utils.js +136 -0
  629. gaia/eval/webapp/node_modules/range-parser/HISTORY.md +56 -0
  630. gaia/eval/webapp/node_modules/range-parser/LICENSE +23 -0
  631. gaia/eval/webapp/node_modules/range-parser/README.md +84 -0
  632. gaia/eval/webapp/node_modules/range-parser/index.js +162 -0
  633. gaia/eval/webapp/node_modules/range-parser/package.json +44 -0
  634. gaia/eval/webapp/node_modules/raw-body/HISTORY.md +308 -0
  635. gaia/eval/webapp/node_modules/raw-body/LICENSE +22 -0
  636. gaia/eval/webapp/node_modules/raw-body/README.md +223 -0
  637. gaia/eval/webapp/node_modules/raw-body/SECURITY.md +24 -0
  638. gaia/eval/webapp/node_modules/raw-body/index.d.ts +87 -0
  639. gaia/eval/webapp/node_modules/raw-body/index.js +336 -0
  640. gaia/eval/webapp/node_modules/raw-body/package.json +49 -0
  641. gaia/eval/webapp/node_modules/safe-buffer/LICENSE +21 -0
  642. gaia/eval/webapp/node_modules/safe-buffer/README.md +584 -0
  643. gaia/eval/webapp/node_modules/safe-buffer/index.d.ts +187 -0
  644. gaia/eval/webapp/node_modules/safe-buffer/index.js +65 -0
  645. gaia/eval/webapp/node_modules/safe-buffer/package.json +51 -0
  646. gaia/eval/webapp/node_modules/safer-buffer/LICENSE +21 -0
  647. gaia/eval/webapp/node_modules/safer-buffer/Porting-Buffer.md +268 -0
  648. gaia/eval/webapp/node_modules/safer-buffer/Readme.md +156 -0
  649. gaia/eval/webapp/node_modules/safer-buffer/dangerous.js +58 -0
  650. gaia/eval/webapp/node_modules/safer-buffer/package.json +34 -0
  651. gaia/eval/webapp/node_modules/safer-buffer/safer.js +77 -0
  652. gaia/eval/webapp/node_modules/safer-buffer/tests.js +406 -0
  653. gaia/eval/webapp/node_modules/send/HISTORY.md +526 -0
  654. gaia/eval/webapp/node_modules/send/LICENSE +23 -0
  655. gaia/eval/webapp/node_modules/send/README.md +327 -0
  656. gaia/eval/webapp/node_modules/send/SECURITY.md +24 -0
  657. gaia/eval/webapp/node_modules/send/index.js +1142 -0
  658. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/HISTORY.md +14 -0
  659. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/LICENSE +22 -0
  660. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/README.md +128 -0
  661. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/index.js +60 -0
  662. gaia/eval/webapp/node_modules/send/node_modules/encodeurl/package.json +40 -0
  663. gaia/eval/webapp/node_modules/send/node_modules/ms/index.js +162 -0
  664. gaia/eval/webapp/node_modules/send/node_modules/ms/license.md +21 -0
  665. gaia/eval/webapp/node_modules/send/node_modules/ms/package.json +38 -0
  666. gaia/eval/webapp/node_modules/send/node_modules/ms/readme.md +59 -0
  667. gaia/eval/webapp/node_modules/send/package.json +62 -0
  668. gaia/eval/webapp/node_modules/serve-static/HISTORY.md +487 -0
  669. gaia/eval/webapp/node_modules/serve-static/LICENSE +25 -0
  670. gaia/eval/webapp/node_modules/serve-static/README.md +257 -0
  671. gaia/eval/webapp/node_modules/serve-static/index.js +209 -0
  672. gaia/eval/webapp/node_modules/serve-static/package.json +42 -0
  673. gaia/eval/webapp/node_modules/setprototypeof/LICENSE +13 -0
  674. gaia/eval/webapp/node_modules/setprototypeof/README.md +31 -0
  675. gaia/eval/webapp/node_modules/setprototypeof/index.d.ts +2 -0
  676. gaia/eval/webapp/node_modules/setprototypeof/index.js +17 -0
  677. gaia/eval/webapp/node_modules/setprototypeof/package.json +38 -0
  678. gaia/eval/webapp/node_modules/setprototypeof/test/index.js +24 -0
  679. gaia/eval/webapp/node_modules/side-channel/.editorconfig +9 -0
  680. gaia/eval/webapp/node_modules/side-channel/.eslintrc +12 -0
  681. gaia/eval/webapp/node_modules/side-channel/.github/FUNDING.yml +12 -0
  682. gaia/eval/webapp/node_modules/side-channel/.nycrc +13 -0
  683. gaia/eval/webapp/node_modules/side-channel/CHANGELOG.md +110 -0
  684. gaia/eval/webapp/node_modules/side-channel/LICENSE +21 -0
  685. gaia/eval/webapp/node_modules/side-channel/README.md +61 -0
  686. gaia/eval/webapp/node_modules/side-channel/index.d.ts +14 -0
  687. gaia/eval/webapp/node_modules/side-channel/index.js +43 -0
  688. gaia/eval/webapp/node_modules/side-channel/package.json +85 -0
  689. gaia/eval/webapp/node_modules/side-channel/test/index.js +104 -0
  690. gaia/eval/webapp/node_modules/side-channel/tsconfig.json +9 -0
  691. gaia/eval/webapp/node_modules/side-channel-list/.editorconfig +9 -0
  692. gaia/eval/webapp/node_modules/side-channel-list/.eslintrc +11 -0
  693. gaia/eval/webapp/node_modules/side-channel-list/.github/FUNDING.yml +12 -0
  694. gaia/eval/webapp/node_modules/side-channel-list/.nycrc +13 -0
  695. gaia/eval/webapp/node_modules/side-channel-list/CHANGELOG.md +15 -0
  696. gaia/eval/webapp/node_modules/side-channel-list/LICENSE +21 -0
  697. gaia/eval/webapp/node_modules/side-channel-list/README.md +62 -0
  698. gaia/eval/webapp/node_modules/side-channel-list/index.d.ts +13 -0
  699. gaia/eval/webapp/node_modules/side-channel-list/index.js +113 -0
  700. gaia/eval/webapp/node_modules/side-channel-list/list.d.ts +14 -0
  701. gaia/eval/webapp/node_modules/side-channel-list/package.json +77 -0
  702. gaia/eval/webapp/node_modules/side-channel-list/test/index.js +104 -0
  703. gaia/eval/webapp/node_modules/side-channel-list/tsconfig.json +9 -0
  704. gaia/eval/webapp/node_modules/side-channel-map/.editorconfig +9 -0
  705. gaia/eval/webapp/node_modules/side-channel-map/.eslintrc +11 -0
  706. gaia/eval/webapp/node_modules/side-channel-map/.github/FUNDING.yml +12 -0
  707. gaia/eval/webapp/node_modules/side-channel-map/.nycrc +13 -0
  708. gaia/eval/webapp/node_modules/side-channel-map/CHANGELOG.md +22 -0
  709. gaia/eval/webapp/node_modules/side-channel-map/LICENSE +21 -0
  710. gaia/eval/webapp/node_modules/side-channel-map/README.md +62 -0
  711. gaia/eval/webapp/node_modules/side-channel-map/index.d.ts +15 -0
  712. gaia/eval/webapp/node_modules/side-channel-map/index.js +68 -0
  713. gaia/eval/webapp/node_modules/side-channel-map/package.json +80 -0
  714. gaia/eval/webapp/node_modules/side-channel-map/test/index.js +114 -0
  715. gaia/eval/webapp/node_modules/side-channel-map/tsconfig.json +9 -0
  716. gaia/eval/webapp/node_modules/side-channel-weakmap/.editorconfig +9 -0
  717. gaia/eval/webapp/node_modules/side-channel-weakmap/.eslintrc +12 -0
  718. gaia/eval/webapp/node_modules/side-channel-weakmap/.github/FUNDING.yml +12 -0
  719. gaia/eval/webapp/node_modules/side-channel-weakmap/.nycrc +13 -0
  720. gaia/eval/webapp/node_modules/side-channel-weakmap/CHANGELOG.md +28 -0
  721. gaia/eval/webapp/node_modules/side-channel-weakmap/LICENSE +21 -0
  722. gaia/eval/webapp/node_modules/side-channel-weakmap/README.md +62 -0
  723. gaia/eval/webapp/node_modules/side-channel-weakmap/index.d.ts +15 -0
  724. gaia/eval/webapp/node_modules/side-channel-weakmap/index.js +84 -0
  725. gaia/eval/webapp/node_modules/side-channel-weakmap/package.json +87 -0
  726. gaia/eval/webapp/node_modules/side-channel-weakmap/test/index.js +114 -0
  727. gaia/eval/webapp/node_modules/side-channel-weakmap/tsconfig.json +9 -0
  728. gaia/eval/webapp/node_modules/statuses/HISTORY.md +82 -0
  729. gaia/eval/webapp/node_modules/statuses/LICENSE +23 -0
  730. gaia/eval/webapp/node_modules/statuses/README.md +136 -0
  731. gaia/eval/webapp/node_modules/statuses/codes.json +65 -0
  732. gaia/eval/webapp/node_modules/statuses/index.js +146 -0
  733. gaia/eval/webapp/node_modules/statuses/package.json +49 -0
  734. gaia/eval/webapp/node_modules/toidentifier/HISTORY.md +9 -0
  735. gaia/eval/webapp/node_modules/toidentifier/LICENSE +21 -0
  736. gaia/eval/webapp/node_modules/toidentifier/README.md +61 -0
  737. gaia/eval/webapp/node_modules/toidentifier/index.js +32 -0
  738. gaia/eval/webapp/node_modules/toidentifier/package.json +38 -0
  739. gaia/eval/webapp/node_modules/type-is/HISTORY.md +259 -0
  740. gaia/eval/webapp/node_modules/type-is/LICENSE +23 -0
  741. gaia/eval/webapp/node_modules/type-is/README.md +170 -0
  742. gaia/eval/webapp/node_modules/type-is/index.js +266 -0
  743. gaia/eval/webapp/node_modules/type-is/package.json +45 -0
  744. gaia/eval/webapp/node_modules/unpipe/HISTORY.md +4 -0
  745. gaia/eval/webapp/node_modules/unpipe/LICENSE +22 -0
  746. gaia/eval/webapp/node_modules/unpipe/README.md +43 -0
  747. gaia/eval/webapp/node_modules/unpipe/index.js +69 -0
  748. gaia/eval/webapp/node_modules/unpipe/package.json +27 -0
  749. gaia/eval/webapp/node_modules/util/LICENSE +18 -0
  750. gaia/eval/webapp/node_modules/util/README.md +15 -0
  751. gaia/eval/webapp/node_modules/util/node_modules/inherits/LICENSE +16 -0
  752. gaia/eval/webapp/node_modules/util/node_modules/inherits/README.md +42 -0
  753. gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits.js +7 -0
  754. gaia/eval/webapp/node_modules/util/node_modules/inherits/inherits_browser.js +23 -0
  755. gaia/eval/webapp/node_modules/util/node_modules/inherits/package.json +29 -0
  756. gaia/eval/webapp/node_modules/util/package.json +35 -0
  757. gaia/eval/webapp/node_modules/util/support/isBuffer.js +3 -0
  758. gaia/eval/webapp/node_modules/util/support/isBufferBrowser.js +6 -0
  759. gaia/eval/webapp/node_modules/util/util.js +586 -0
  760. gaia/eval/webapp/node_modules/utils-merge/.npmignore +9 -0
  761. gaia/eval/webapp/node_modules/utils-merge/LICENSE +20 -0
  762. gaia/eval/webapp/node_modules/utils-merge/README.md +34 -0
  763. gaia/eval/webapp/node_modules/utils-merge/index.js +23 -0
  764. gaia/eval/webapp/node_modules/utils-merge/package.json +40 -0
  765. gaia/eval/webapp/node_modules/vary/HISTORY.md +39 -0
  766. gaia/eval/webapp/node_modules/vary/LICENSE +22 -0
  767. gaia/eval/webapp/node_modules/vary/README.md +101 -0
  768. gaia/eval/webapp/node_modules/vary/index.js +149 -0
  769. gaia/eval/webapp/node_modules/vary/package.json +43 -0
  770. gaia/eval/webapp/package-lock.json +875 -0
  771. gaia/eval/webapp/package.json +21 -0
  772. gaia/eval/webapp/public/app.js +3403 -0
  773. gaia/eval/webapp/public/index.html +88 -0
  774. gaia/eval/webapp/public/styles.css +3661 -0
  775. gaia/eval/webapp/server.js +416 -0
  776. gaia/eval/webapp/test-setup.js +73 -0
  777. gaia/llm/__init__.py +2 -0
  778. gaia/llm/lemonade_client.py +3083 -0
  779. gaia/llm/lemonade_manager.py +269 -0
  780. gaia/llm/llm_client.py +729 -0
  781. gaia/llm/vlm_client.py +307 -0
  782. gaia/logger.py +189 -0
  783. gaia/mcp/agent_mcp_server.py +245 -0
  784. gaia/mcp/blender_mcp_client.py +138 -0
  785. gaia/mcp/blender_mcp_server.py +648 -0
  786. gaia/mcp/context7_cache.py +332 -0
  787. gaia/mcp/external_services.py +518 -0
  788. gaia/mcp/mcp_bridge.py +550 -0
  789. gaia/mcp/servers/__init__.py +6 -0
  790. gaia/mcp/servers/docker_mcp.py +83 -0
  791. gaia/rag/__init__.py +10 -0
  792. gaia/rag/app.py +293 -0
  793. gaia/rag/demo.py +304 -0
  794. gaia/rag/pdf_utils.py +235 -0
  795. gaia/rag/sdk.py +2194 -0
  796. gaia/security.py +163 -0
  797. gaia/talk/app.py +289 -0
  798. gaia/talk/sdk.py +538 -0
  799. gaia/util.py +46 -0
  800. gaia/version.py +100 -0
@@ -0,0 +1,2332 @@
1
+ # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ import json
5
+ import re
6
+ import time
7
+ from dataclasses import dataclass
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Tuple
11
+
12
+ import numpy as np
13
+
14
+ from gaia.chat.prompts import Prompts
15
+ from gaia.eval.claude import ClaudeClient
16
+ from gaia.eval.config import DEFAULT_CLAUDE_MODEL
17
+ from gaia.llm.lemonade_client import LemonadeClient
18
+ from gaia.logger import get_logger
19
+
20
+ # Import PDF reader
21
+ try:
22
+ from pypdf import PdfReader
23
+ except ImportError:
24
+ PdfReader = None
25
+
26
+ # Experiment configuration constants
27
+ CREATIVE_TEMPERATURE_MAX = 0.7
28
+ CREATIVE_TEMPERATURE_INCREMENT = 0.3
29
+
30
+
31
+ def should_use_chat_template(task_type: str) -> bool:
32
+ """
33
+ Determine if chat template formatting should be used for a given task.
34
+
35
+ Args:
36
+ task_type: Type of task ('chat', 'qa', 'summarization', etc.)
37
+
38
+ Returns:
39
+ True if chat template should be used, False otherwise
40
+ """
41
+ # Only use chat templates for actual conversation/QA tasks
42
+ # NOT for completion tasks like summarization
43
+ return task_type in ["chat", "qa"]
44
+
45
+
46
+ def format_prompt_with_template(
47
+ model: str,
48
+ system_prompt: str,
49
+ user_content: str,
50
+ document_content: str = "",
51
+ use_chat_template: bool = False,
52
+ ) -> str:
53
+ """
54
+ Format prompt using model-specific template from Prompts class, or simple format.
55
+
56
+ Args:
57
+ model: Model name/path
58
+ system_prompt: The system instruction
59
+ user_content: The user's query/content
60
+ document_content: Optional document/transcript context to include
61
+ use_chat_template: Whether to use chat template formatting (for QA/chat tasks)
62
+
63
+ Returns:
64
+ Formatted prompt
65
+ """
66
+ if use_chat_template:
67
+ # Use ChatML formatting for chat/QA tasks
68
+ if document_content:
69
+ enhanced_system_prompt = (
70
+ f"{system_prompt}\n\nContext Document:\n{document_content}"
71
+ )
72
+ else:
73
+ enhanced_system_prompt = system_prompt
74
+
75
+ # Convert to chat history format expected by Prompts.format_chat_history
76
+ chat_history = [f"user: {user_content}"]
77
+ return Prompts.format_chat_history(
78
+ model=model, chat_history=chat_history, system_prompt=enhanced_system_prompt
79
+ )
80
+ else:
81
+ # Simple format for completion tasks (like summarization)
82
+ if document_content:
83
+ return f"{system_prompt}\n\nDocument Content:\n{document_content}\n\n{user_content}"
84
+ else:
85
+ return f"{system_prompt}\n\n{user_content}"
86
+
87
+
88
+ def extract_thinking_from_response(response_text: str) -> Dict[str, Any]:
89
+ """
90
+ Extract thinking content from <think>...</think> tags if present.
91
+
92
+ Args:
93
+ response_text: The raw model response
94
+
95
+ Returns:
96
+ Dict with 'response' (final answer) and 'thinking' (reasoning process, or None)
97
+ """
98
+ if not response_text:
99
+ return {"response": "", "thinking": None}
100
+
101
+ # Check for thinking tags
102
+ think_pattern = r"<think>(.*?)</think>"
103
+ match = re.search(think_pattern, response_text, flags=re.DOTALL)
104
+
105
+ if match:
106
+ thinking = match.group(1).strip()
107
+ # Extract everything after the </think> tag as the final response
108
+ final_response = re.sub(
109
+ think_pattern, "", response_text, flags=re.DOTALL
110
+ ).strip()
111
+ return {"response": final_response, "thinking": thinking}
112
+
113
+ # No thinking tags found, return full response
114
+ return {"response": response_text.strip(), "thinking": None}
115
+
116
+
117
+ @dataclass
118
+ class ExperimentConfig:
119
+ """Configuration for a single experiment."""
120
+
121
+ name: str
122
+ llm_type: str # "claude" or "lemonade"
123
+ model: str
124
+ system_prompt: str
125
+ experiment_type: str = "qa" # "qa" or "summarization"
126
+ max_tokens: int = 512
127
+ temperature: float = 0.7
128
+ parameters: Dict[str, Any] = None
129
+
130
+ def __post_init__(self):
131
+ if self.parameters is None:
132
+ self.parameters = {}
133
+ if self.experiment_type not in ["qa", "summarization"]:
134
+ raise ValueError(
135
+ f"experiment_type must be 'qa' or 'summarization', got: {self.experiment_type}"
136
+ )
137
+
138
+
139
+ class BatchExperimentRunner:
140
+ """Run batch experiments with different LLM configurations on transcript data.
141
+
142
+ Summarization experiments make independent LLM calls for each component
143
+ (executive summary, detailed summary, action items, etc.) to produce
144
+ natural, focused outputs without complex JSON formatting.
145
+ """
146
+
147
+ def __init__(self, config_file: str):
148
+ self.log = get_logger(__name__)
149
+ self.config_file = config_file
150
+ self.experiments = []
151
+ self.load_config()
152
+
153
+ def _extract_text_from_pdf(self, pdf_path: str) -> str:
154
+ """Extract text from PDF file using local PDF library."""
155
+ if PdfReader is None:
156
+ raise ImportError(
157
+ "PDF reading library not found. Please install pypdf:\n"
158
+ " pip install pypdf"
159
+ )
160
+
161
+ try:
162
+ reader = PdfReader(pdf_path)
163
+ total_pages = len(reader.pages)
164
+ self.log.info(
165
+ f"📄 Extracting text from {total_pages} pages of {pdf_path}..."
166
+ )
167
+
168
+ text = ""
169
+ for i, page in enumerate(reader.pages, 1):
170
+ # Show progress for large PDFs
171
+ if i % 10 == 0 or i == total_pages:
172
+ self.log.debug(f" Processing page {i}/{total_pages}...")
173
+ page_text = page.extract_text()
174
+ if page_text:
175
+ text += page_text + "\n"
176
+
177
+ extracted_text = text.strip()
178
+ self.log.info(f"📝 Extracted {len(extracted_text):,} characters from PDF")
179
+ return extracted_text
180
+
181
+ except Exception as e:
182
+ self.log.error(f"Error reading PDF {pdf_path}: {e}")
183
+ raise
184
+
185
+ def load_config(self):
186
+ """Load experiment configuration from JSON file."""
187
+ try:
188
+ with open(self.config_file, "r", encoding="utf-8") as f:
189
+ config_data = json.load(f)
190
+
191
+ # Validate config structure
192
+ if "experiments" not in config_data:
193
+ raise ValueError("Configuration file must contain 'experiments' array")
194
+
195
+ # Parse experiments
196
+ for exp_data in config_data["experiments"]:
197
+ experiment = ExperimentConfig(
198
+ name=exp_data["name"],
199
+ llm_type=exp_data["llm_type"],
200
+ model=exp_data["model"],
201
+ system_prompt=exp_data["system_prompt"],
202
+ experiment_type=exp_data.get("experiment_type", "qa"),
203
+ max_tokens=exp_data.get("max_tokens", 512),
204
+ temperature=exp_data.get("temperature", 0.7),
205
+ parameters=exp_data.get("parameters", {}),
206
+ )
207
+ self.experiments.append(experiment)
208
+
209
+ self.log.info(f"Loaded {len(self.experiments)} experiments from config")
210
+
211
+ except Exception as e:
212
+ self.log.error(f"Error loading config file: {e}")
213
+ raise
214
+
215
+ def create_llm_client(self, experiment: ExperimentConfig):
216
+ """Create appropriate LLM client based on experiment config."""
217
+ if experiment.llm_type.lower() == "claude":
218
+ return ClaudeClient(
219
+ model=experiment.model, max_tokens=experiment.max_tokens
220
+ )
221
+ elif experiment.llm_type.lower() == "lemonade":
222
+ # Filter out non-LLM client parameters before passing to client constructor
223
+ # Parameters like 'stop', 'combined_prompt' are for completions API, not client init
224
+ llm_params = {
225
+ k: v
226
+ for k, v in experiment.parameters.items()
227
+ if k not in ["combined_prompt", "stop"]
228
+ }
229
+ return LemonadeClient(model=experiment.model, verbose=False, **llm_params)
230
+ else:
231
+ raise ValueError(f"Unsupported LLM type: {experiment.llm_type}")
232
+
233
+ def process_question_claude(
234
+ self,
235
+ client: ClaudeClient,
236
+ question: str,
237
+ system_prompt: str,
238
+ document_content: str = "",
239
+ ) -> Dict:
240
+ """Process a question using Claude client."""
241
+ try:
242
+ if document_content:
243
+ # Include document content in the prompt
244
+ prompt = f"{system_prompt}\n\nDocument Content:\n{document_content}\n\nQuestion: {question}\n\nAnswer:"
245
+ else:
246
+ prompt = f"{system_prompt}\n\nQuestion: {question}\n\nAnswer:"
247
+ response_data = client.get_completion_with_usage(prompt)
248
+
249
+ # Extract response text
250
+ response = response_data["content"]
251
+ if isinstance(response, list):
252
+ response_text = (
253
+ response[0].text
254
+ if hasattr(response[0], "text")
255
+ else str(response[0])
256
+ )
257
+ else:
258
+ response_text = (
259
+ response.text if hasattr(response, "text") else str(response)
260
+ )
261
+
262
+ return {
263
+ "response": response_text.strip(),
264
+ "usage": response_data["usage"],
265
+ "cost": response_data["cost"],
266
+ "error": None,
267
+ }
268
+ except Exception as e:
269
+ self.log.error(f"Error processing question with Claude: {e}")
270
+ return {
271
+ "response": f"ERROR: {str(e)}",
272
+ "usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
273
+ "cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
274
+ "error": str(e),
275
+ }
276
+
277
+ def process_question_lemonade(
278
+ self,
279
+ client: LemonadeClient,
280
+ question: str,
281
+ system_prompt: str,
282
+ max_tokens: int,
283
+ temperature: float,
284
+ document_content: str = "",
285
+ ) -> Dict:
286
+ """Process a question using Lemonade client."""
287
+ try:
288
+ # Format prompt for QA (uses ChatML for supported models)
289
+ # Document content is passed separately and added to system prompt
290
+ formatted_prompt = format_prompt_with_template(
291
+ model=client.model,
292
+ system_prompt=system_prompt,
293
+ user_content=question,
294
+ document_content=document_content,
295
+ use_chat_template=True, # Use chat template for QA tasks
296
+ )
297
+
298
+ # Use completions method with the client's loaded model
299
+ response_data = client.completions(
300
+ model=client.model, # Use model from experiment config
301
+ prompt=formatted_prompt,
302
+ max_tokens=max_tokens,
303
+ temperature=temperature,
304
+ stream=False,
305
+ )
306
+
307
+ # Extract text from the response
308
+ response_text = ""
309
+ if "choices" in response_data and response_data["choices"]:
310
+ response_text = response_data["choices"][0].get("text", "")
311
+
312
+ # Extract thinking tokens if present
313
+ extracted = extract_thinking_from_response(response_text)
314
+
315
+ # Get token statistics from Lemonade
316
+ try:
317
+ stats = client.get_stats()
318
+ input_tokens = stats.get("input_tokens", 0) if stats else 0
319
+ output_tokens = stats.get("output_tokens", 0) if stats else 0
320
+ total_tokens = input_tokens + output_tokens
321
+ except Exception as e:
322
+ self.log.warning(f"Failed to get stats from Lemonade: {e}")
323
+ input_tokens = output_tokens = total_tokens = 0
324
+
325
+ result = {
326
+ "response": extracted["response"],
327
+ "usage": {
328
+ "input_tokens": input_tokens,
329
+ "output_tokens": output_tokens,
330
+ "total_tokens": total_tokens,
331
+ },
332
+ "cost": {
333
+ "input_cost": 0.0,
334
+ "output_cost": 0.0,
335
+ "total_cost": 0.0,
336
+ }, # Local inference has no cost
337
+ "error": None,
338
+ }
339
+
340
+ # Add thinking tokens if present
341
+ if extracted["thinking"]:
342
+ result["thinking"] = extracted["thinking"]
343
+
344
+ return result
345
+
346
+ except Exception as e:
347
+ self.log.error(f"Error processing question with Lemonade: {e}")
348
+ return {
349
+ "response": f"ERROR: {str(e)}",
350
+ "usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
351
+ "cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
352
+ "error": str(e),
353
+ }
354
+
355
+ def _get_summary_prompts(self) -> Dict[str, str]:
356
+ """Generate individual user prompts for each summary component.
357
+
358
+ Note: System prompt is passed separately when formatting with chat template.
359
+ These are just the user-facing questions/tasks.
360
+ """
361
+ return {
362
+ "executive_summary": "Provide a brief executive summary (2-3 sentences) of the key outcomes and decisions from this transcript.",
363
+ "detailed_summary": "Provide a detailed summary of the transcript, covering all major topics, discussions, and outcomes in paragraph form.",
364
+ "action_items": "List the specific action items that were assigned during this meeting. Include who is responsible for each item when mentioned. Provide as a simple list.",
365
+ "key_decisions": "List the key decisions that were made during this meeting. Focus on concrete decisions and outcomes. Provide as a simple list.",
366
+ "participants": "List the participants mentioned in this transcript. Include their roles or titles when available. Provide as a simple list.",
367
+ "topics_discussed": "List the main topics and subjects that were discussed in this meeting. Provide as a simple list.",
368
+ }
369
+
370
+ def process_summarization_claude(
371
+ self,
372
+ client: ClaudeClient,
373
+ transcript: str,
374
+ system_prompt: str,
375
+ combined_prompt: bool = False,
376
+ ) -> Dict:
377
+ """Process summarization by making independent or combined calls for each component."""
378
+ try:
379
+ summary_prompts = self._get_summary_prompts()
380
+ results = {}
381
+ total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
382
+ total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
383
+ errors = []
384
+
385
+ if combined_prompt:
386
+ # Make a single call with all components
387
+ self.log.info(
388
+ f"Summarizing transcript with 1 combined model call for: {', '.join(summary_prompts.keys())}"
389
+ )
390
+
391
+ # Build combined prompt
392
+ combined_request = f"{system_prompt}\n\nPlease provide the following summaries for the transcript:\n\n"
393
+ for component, user_prompt in summary_prompts.items():
394
+ combined_request += f"**{component.upper()}**:\n{user_prompt}\n\n"
395
+ combined_request += f"\nTranscript:\n{transcript}\n\nPlease structure your response with clear headers for each section."
396
+
397
+ response_data = client.get_completion_with_usage(combined_request)
398
+
399
+ # Extract response text
400
+ response = response_data["content"]
401
+ if isinstance(response, list):
402
+ response_text = (
403
+ response[0].text
404
+ if hasattr(response[0], "text")
405
+ else str(response[0])
406
+ )
407
+ else:
408
+ response_text = (
409
+ response.text if hasattr(response, "text") else str(response)
410
+ )
411
+
412
+ # Parse response into components
413
+ for component in summary_prompts.keys():
414
+ # Try to extract each component from the combined response
415
+ component_upper = component.upper()
416
+ start_markers = [
417
+ f"**{component_upper}**:",
418
+ f"{component_upper}:",
419
+ f"# {component_upper}",
420
+ f"## {component_upper}",
421
+ ]
422
+
423
+ section_text = ""
424
+ for marker in start_markers:
425
+ if marker in response_text:
426
+ start_idx = response_text.find(marker) + len(marker)
427
+ # Find the next section or end
428
+ end_idx = len(response_text)
429
+ for other_component in summary_prompts.keys():
430
+ if other_component == component:
431
+ continue
432
+ other_upper = other_component.upper()
433
+ for other_marker in [
434
+ f"**{other_upper}**:",
435
+ f"{other_upper}:",
436
+ f"# {other_upper}",
437
+ f"## {other_upper}",
438
+ ]:
439
+ idx = response_text.find(other_marker, start_idx)
440
+ if idx != -1 and idx < end_idx:
441
+ end_idx = idx
442
+ section_text = response_text[start_idx:end_idx].strip()
443
+ break
444
+
445
+ results[component] = (
446
+ section_text if section_text else response_text.strip()
447
+ )
448
+
449
+ # Use combined usage and cost
450
+ if response_data["usage"]:
451
+ total_usage = response_data["usage"]
452
+ if response_data["cost"]:
453
+ total_cost = response_data["cost"]
454
+
455
+ else:
456
+ # Original behavior: independent calls
457
+ self.log.info(
458
+ f"Summarizing transcript with {len(summary_prompts)} independent model calls: {', '.join(summary_prompts.keys())}"
459
+ )
460
+
461
+ for component, user_prompt in summary_prompts.items():
462
+ try:
463
+ # Create full prompt with system prompt, user prompt, and transcript
464
+ full_prompt = f"{system_prompt}\n\n{user_prompt}\n\nTranscript:\n{transcript}\n\nResponse:"
465
+
466
+ response_data = client.get_completion_with_usage(full_prompt)
467
+
468
+ # Extract response text
469
+ response = response_data["content"]
470
+ if isinstance(response, list):
471
+ response_text = (
472
+ response[0].text
473
+ if hasattr(response[0], "text")
474
+ else str(response[0])
475
+ )
476
+ else:
477
+ response_text = (
478
+ response.text
479
+ if hasattr(response, "text")
480
+ else str(response)
481
+ )
482
+
483
+ results[component] = response_text.strip()
484
+
485
+ # Accumulate usage and cost
486
+ if response_data["usage"]:
487
+ for key in total_usage:
488
+ total_usage[key] += response_data["usage"].get(key, 0)
489
+ if response_data["cost"]:
490
+ for key in total_cost:
491
+ total_cost[key] += response_data["cost"].get(key, 0.0)
492
+
493
+ # Small delay between component calls to avoid rate limiting
494
+ time.sleep(0.5)
495
+
496
+ except Exception as e:
497
+ self.log.error(f"Error processing {component} with Claude: {e}")
498
+ results[component] = f"ERROR: {str(e)}"
499
+ errors.append(f"{component}: {str(e)}")
500
+
501
+ return {
502
+ "response": results,
503
+ "usage": total_usage,
504
+ "cost": total_cost,
505
+ "error": "; ".join(errors) if errors else None,
506
+ }
507
+ except Exception as e:
508
+ self.log.error(f"Error in independent summarization with Claude: {e}")
509
+ return {
510
+ "response": f"ERROR: {str(e)}",
511
+ "usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
512
+ "cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
513
+ "error": str(e),
514
+ }
515
+
516
+ def process_summarization_lemonade(
517
+ self,
518
+ client: LemonadeClient,
519
+ transcript: str,
520
+ system_prompt: str,
521
+ max_tokens: int,
522
+ temperature: float,
523
+ combined_prompt: bool = False,
524
+ extra_params: Dict[str, Any] = None,
525
+ ) -> Dict:
526
+ """Process summarization by making independent or combined calls for each component."""
527
+ try:
528
+ summary_prompts = self._get_summary_prompts()
529
+ results = {}
530
+ total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
531
+ total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
532
+ errors = []
533
+
534
+ # Prepare extra parameters (like stop sequences)
535
+ if extra_params is None:
536
+ extra_params = {}
537
+
538
+ if combined_prompt:
539
+ # Make a single call with all components
540
+ self.log.info(
541
+ f"Summarizing transcript with 1 combined model call for: {', '.join(summary_prompts.keys())}"
542
+ )
543
+
544
+ # Build user request for all components
545
+ user_request = (
546
+ "Please provide the following summaries for the transcript:\n\n"
547
+ )
548
+ for component, user_prompt in summary_prompts.items():
549
+ user_request += f"**{component.upper()}**:\n{user_prompt}\n\n"
550
+ user_request += "\nPlease structure your response with clear headers for each section."
551
+
552
+ # Format prompt for summarization using chat template
553
+ formatted_prompt = format_prompt_with_template(
554
+ model=client.model,
555
+ system_prompt=system_prompt,
556
+ user_content=user_request,
557
+ document_content=transcript, # Pass transcript as context
558
+ use_chat_template=True, # Use chat template for summarization
559
+ )
560
+
561
+ response_data = client.completions(
562
+ model=client.model,
563
+ prompt=formatted_prompt,
564
+ max_tokens=max_tokens,
565
+ temperature=temperature,
566
+ stream=False,
567
+ **extra_params, # Pass stop sequences and other params
568
+ )
569
+
570
+ # Extract text from the response
571
+ response_text = ""
572
+ if "choices" in response_data and response_data["choices"]:
573
+ response_text = response_data["choices"][0].get("text", "")
574
+
575
+ # Extract thinking tokens if present
576
+ extracted = extract_thinking_from_response(response_text)
577
+ response_text = extracted["response"]
578
+ thinking_content = extracted["thinking"]
579
+
580
+ # Get token statistics from Lemonade
581
+ try:
582
+ stats = client.get_stats()
583
+ input_tokens = stats.get("input_tokens", 0) if stats else 0
584
+ output_tokens = stats.get("output_tokens", 0) if stats else 0
585
+ total_tokens = input_tokens + output_tokens
586
+ total_usage = {
587
+ "input_tokens": input_tokens,
588
+ "output_tokens": output_tokens,
589
+ "total_tokens": total_tokens,
590
+ }
591
+ except Exception as e:
592
+ self.log.warning(f"Failed to get stats from Lemonade: {e}")
593
+ total_usage = {
594
+ "input_tokens": 0,
595
+ "output_tokens": 0,
596
+ "total_tokens": 0,
597
+ }
598
+
599
+ # Parse response into components
600
+ for component in summary_prompts.keys():
601
+ # Try to extract each component from the combined response
602
+ component_upper = component.upper()
603
+ start_markers = [
604
+ f"**{component_upper}**:",
605
+ f"{component_upper}:",
606
+ f"# {component_upper}",
607
+ f"## {component_upper}",
608
+ ]
609
+
610
+ section_text = ""
611
+ for marker in start_markers:
612
+ if marker in response_text:
613
+ start_idx = response_text.find(marker) + len(marker)
614
+ # Find the next section or end
615
+ end_idx = len(response_text)
616
+ for other_component in summary_prompts.keys():
617
+ if other_component == component:
618
+ continue
619
+ other_upper = other_component.upper()
620
+ for other_marker in [
621
+ f"**{other_upper}**:",
622
+ f"{other_upper}:",
623
+ f"# {other_upper}",
624
+ f"## {other_upper}",
625
+ ]:
626
+ idx = response_text.find(other_marker, start_idx)
627
+ if idx != -1 and idx < end_idx:
628
+ end_idx = idx
629
+ section_text = response_text[start_idx:end_idx].strip()
630
+ break
631
+
632
+ results[component] = (
633
+ section_text if section_text else response_text.strip()
634
+ )
635
+
636
+ # Total usage already calculated above, cost is always 0 for local
637
+ total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
638
+
639
+ # Store thinking content if present (for combined mode)
640
+ if thinking_content:
641
+ results["_thinking"] = thinking_content
642
+
643
+ else:
644
+ # Original behavior: independent calls
645
+ self.log.info(
646
+ f"Summarizing transcript with {len(summary_prompts)} independent model calls: {', '.join(summary_prompts.keys())}"
647
+ )
648
+
649
+ for component, user_prompt in summary_prompts.items():
650
+ try:
651
+ # Format using chat template with separate system prompt and user prompt
652
+ formatted_prompt = format_prompt_with_template(
653
+ model=client.model,
654
+ system_prompt=system_prompt,
655
+ user_content=user_prompt,
656
+ document_content=transcript,
657
+ use_chat_template=True, # Use chat template for summarization
658
+ )
659
+
660
+ response_data = client.completions(
661
+ model=client.model,
662
+ prompt=formatted_prompt,
663
+ max_tokens=max_tokens,
664
+ temperature=temperature,
665
+ stream=False,
666
+ **extra_params, # Pass stop sequences and other params
667
+ )
668
+
669
+ # Extract text from the response
670
+ response_text = ""
671
+ if "choices" in response_data and response_data["choices"]:
672
+ response_text = response_data["choices"][0].get("text", "")
673
+
674
+ # Extract thinking tokens if present
675
+ extracted = extract_thinking_from_response(response_text)
676
+
677
+ results[component] = extracted["response"]
678
+
679
+ # Get token statistics from Lemonade
680
+ try:
681
+ stats = client.get_stats()
682
+ if stats:
683
+ total_usage["input_tokens"] += stats.get(
684
+ "input_tokens", 0
685
+ )
686
+ total_usage["output_tokens"] += stats.get(
687
+ "output_tokens", 0
688
+ )
689
+ total_usage["total_tokens"] += stats.get(
690
+ "input_tokens", 0
691
+ ) + stats.get("output_tokens", 0)
692
+ except Exception as e:
693
+ self.log.warning(f"Failed to get stats from Lemonade: {e}")
694
+
695
+ # Small delay between component calls to avoid rate limiting
696
+ time.sleep(0.5)
697
+
698
+ except Exception as e:
699
+ self.log.error(
700
+ f"Error processing {component} with Lemonade: {e}"
701
+ )
702
+ results[component] = f"ERROR: {str(e)}"
703
+ errors.append(f"{component}: {str(e)}")
704
+
705
+ result_dict = {
706
+ "response": results,
707
+ "usage": total_usage,
708
+ "cost": total_cost,
709
+ "error": "; ".join(errors) if errors else None,
710
+ }
711
+
712
+ # Add thinking if present (stored with key "_thinking" in results dict)
713
+ if "_thinking" in results:
714
+ result_dict["thinking"] = results.pop("_thinking")
715
+
716
+ return result_dict
717
+
718
+ except Exception as e:
719
+ self.log.error(f"Error in independent summarization with Lemonade: {e}")
720
+ return {
721
+ "response": f"ERROR: {str(e)}",
722
+ "usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
723
+ "cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
724
+ "error": str(e),
725
+ }
726
+
727
+ def check_experiment_exists(
728
+ self, experiment: ExperimentConfig, output_dir: str
729
+ ) -> bool:
730
+ """Check if an experiment file already exists in the output directory.
731
+
732
+ Args:
733
+ experiment: The experiment configuration
734
+ output_dir: The output directory path
735
+
736
+ Returns:
737
+ True if experiment file exists, False otherwise
738
+ """
739
+ output_base_path = Path(output_dir)
740
+
741
+ # Generate the same safe filename that would be used for the output
742
+ safe_name = "".join(
743
+ c if (c.isalnum() or c in (" ", "-", "_")) else "_" if c == "." else ""
744
+ for c in experiment.name
745
+ ).rstrip()
746
+ safe_name = safe_name.replace(" ", "_")
747
+
748
+ # Check for consolidated file
749
+ consolidated_filename = f"{safe_name}.experiment.json"
750
+ consolidated_path = output_base_path / consolidated_filename
751
+
752
+ if consolidated_path.exists():
753
+ self.log.info(f"Experiment file already exists: {consolidated_path}")
754
+ return True
755
+
756
+ return False
757
+
758
+ def load_data_from_source(
759
+ self, input_path: str, experiment_type: str = "qa", queries_source: str = None
760
+ ) -> List[Dict]:
761
+ """Load data from various input sources: groundtruth files, transcript files, or directories."""
762
+ input_path = Path(input_path)
763
+
764
+ self.log.info(f"Loading data from source: {input_path}, type={experiment_type}")
765
+ self.log.info(
766
+ f"Input path exists: {input_path.exists()}, is_file: {input_path.is_file()}, suffix: {input_path.suffix}"
767
+ )
768
+
769
+ try:
770
+ if input_path.is_file():
771
+ if input_path.suffix == ".json":
772
+ # Handle groundtruth JSON files
773
+ return self._load_from_groundtruth_file(
774
+ str(input_path), experiment_type
775
+ )
776
+ else:
777
+ # Handle individual transcript files
778
+ return self._load_from_transcript_file(
779
+ str(input_path), experiment_type, queries_source
780
+ )
781
+ elif input_path.is_dir():
782
+ # Handle directories of transcript files
783
+ return self._load_from_transcript_directory(
784
+ str(input_path), experiment_type, queries_source
785
+ )
786
+ else:
787
+ raise FileNotFoundError(f"Input path not found: {input_path}")
788
+
789
+ except Exception as e:
790
+ self.log.error(f"Error loading data from source: {e}")
791
+ raise
792
+
793
+ def _load_queries_from_groundtruth(self, groundtruth_file: str) -> List[str]:
794
+ """Extract queries from a groundtruth file for use with raw transcripts."""
795
+ with open(groundtruth_file, "r", encoding="utf-8") as f:
796
+ groundtruth_data = json.load(f)
797
+
798
+ analysis = groundtruth_data.get("analysis", {})
799
+ qa_pairs = analysis.get("qa_pairs", [])
800
+
801
+ if not qa_pairs:
802
+ raise ValueError(
803
+ f"No QA pairs found in groundtruth file: {groundtruth_file}"
804
+ )
805
+
806
+ queries = []
807
+ for qa_pair in qa_pairs:
808
+ query = qa_pair.get("query", qa_pair.get("question", ""))
809
+ if query:
810
+ queries.append(query)
811
+
812
+ if not queries:
813
+ raise ValueError(
814
+ f"No valid queries found in groundtruth file: {groundtruth_file}"
815
+ )
816
+
817
+ return queries
818
+
819
+ def _get_default_queries(self) -> List[str]:
820
+ """Return default questions for QA experiments on raw transcripts."""
821
+ return [
822
+ "What were the main topics discussed in this meeting?",
823
+ "What action items were assigned and to whom?",
824
+ "What decisions were made during this meeting?",
825
+ "Who participated in this meeting and what were their roles?",
826
+ "What are the next steps or follow-up items?",
827
+ ]
828
+
829
+ def _load_from_groundtruth_file(
830
+ self, groundtruth_file: str, experiment_type: str
831
+ ) -> List[Dict]:
832
+ """Load data from a groundtruth JSON file (individual or consolidated)."""
833
+ self.log.info(
834
+ f"Loading groundtruth file: {groundtruth_file} for experiment type: {experiment_type}"
835
+ )
836
+
837
+ with open(groundtruth_file, "r", encoding="utf-8") as f:
838
+ groundtruth_data = json.load(f)
839
+
840
+ analysis = groundtruth_data.get("analysis", {})
841
+ metadata = groundtruth_data.get("metadata", {})
842
+
843
+ # Check if this is a consolidated groundtruth file
844
+ is_consolidated = "consolidated_from" in metadata or "source_files" in metadata
845
+
846
+ self.log.info(f"Metadata keys: {list(metadata.keys())}")
847
+ self.log.info(f"Is consolidated: {is_consolidated}")
848
+
849
+ if is_consolidated:
850
+ return self._load_from_consolidated_groundtruth(
851
+ groundtruth_data, experiment_type
852
+ )
853
+ else:
854
+ return self._load_from_individual_groundtruth(
855
+ groundtruth_data, experiment_type
856
+ )
857
+
858
+ def _load_from_individual_groundtruth(
859
+ self, groundtruth_data: Dict, experiment_type: str
860
+ ) -> List[Dict]:
861
+ """Load data from an individual groundtruth file."""
862
+ analysis = groundtruth_data.get("analysis", {})
863
+ metadata = groundtruth_data.get("metadata", {})
864
+
865
+ if experiment_type == "qa":
866
+ # Extract QA pairs from groundtruth
867
+ qa_pairs = analysis.get("qa_pairs", [])
868
+
869
+ if not qa_pairs:
870
+ raise ValueError(
871
+ "No QA pairs found in groundtruth file for QA experiment"
872
+ )
873
+
874
+ data = []
875
+ for qa_pair in qa_pairs:
876
+ data.append(
877
+ {
878
+ "type": "qa",
879
+ "query": qa_pair.get("query", qa_pair.get("question", "")),
880
+ "ground_truth": qa_pair.get(
881
+ "response", qa_pair.get("answer", "")
882
+ ),
883
+ }
884
+ )
885
+
886
+ return data
887
+
888
+ elif experiment_type == "summarization":
889
+ # Extract transcript content and summaries from groundtruth
890
+ summaries = analysis.get("summaries", {})
891
+
892
+ if not summaries:
893
+ raise ValueError(
894
+ "No summaries found in groundtruth file for summarization experiment"
895
+ )
896
+
897
+ # Get the source transcript content
898
+ source_file = metadata.get("source_file", "")
899
+
900
+ # Read transcript content
901
+ if not source_file or not Path(source_file).exists():
902
+ raise ValueError(f"Source transcript file not found: {source_file}")
903
+
904
+ with open(source_file, "r", encoding="utf-8") as f:
905
+ transcript_content = f.read().strip()
906
+
907
+ if not transcript_content:
908
+ raise ValueError(f"Empty transcript file: {source_file}")
909
+
910
+ data = [
911
+ {
912
+ "type": "summarization",
913
+ "transcript": transcript_content,
914
+ "groundtruth_summaries": summaries,
915
+ "source_file": source_file,
916
+ }
917
+ ]
918
+
919
+ return data
920
+
921
+ else:
922
+ raise ValueError(f"Unsupported experiment type: {experiment_type}")
923
+
924
+ def _load_from_consolidated_groundtruth(
925
+ self, groundtruth_data: Dict, experiment_type: str
926
+ ) -> List[Dict]:
927
+ """Load data from a consolidated groundtruth file."""
928
+ analysis = groundtruth_data.get("analysis", {})
929
+ metadata = groundtruth_data.get("metadata", {})
930
+
931
+ self.log.info(
932
+ f"Loading consolidated groundtruth for experiment type: {experiment_type}"
933
+ )
934
+ self.log.info(f"Metadata keys: {list(metadata.keys())}")
935
+ self.log.info(f"Analysis keys: {list(analysis.keys())}")
936
+
937
+ if experiment_type == "qa":
938
+ # For consolidated QA files, extract QA pairs from all items
939
+ data = []
940
+
941
+ # Get source file information from metadata for document loading
942
+ source_files_map = {}
943
+ if "source_files" in metadata:
944
+ for source_info in metadata["source_files"]:
945
+ doc_id = source_info.get("transcript_id", "")
946
+ source_file = source_info.get("source_file", "")
947
+ if doc_id and source_file:
948
+ source_files_map[doc_id] = source_file
949
+
950
+ # Cache for document content to avoid loading same document multiple times
951
+ document_content_cache = {}
952
+
953
+ # Check if analysis contains direct qa_pairs (can be dict or list)
954
+ if "qa_pairs" in analysis:
955
+ qa_pairs = analysis["qa_pairs"]
956
+
957
+ # Handle dict format (consolidated files key by document ID)
958
+ if isinstance(qa_pairs, dict):
959
+ for doc_id, doc_qa_pairs in qa_pairs.items():
960
+ # Try to load the source document content
961
+ document_content = ""
962
+ source_file = source_files_map.get(doc_id, "")
963
+
964
+ # Check cache first
965
+ if source_file in document_content_cache:
966
+ document_content = document_content_cache[source_file]
967
+ elif source_file:
968
+ source_path = Path(source_file)
969
+ if source_path.exists():
970
+ try:
971
+ # Handle PDF files
972
+ if source_path.suffix.lower() == ".pdf":
973
+ self.log.info(
974
+ f"PDF file detected: {source_path}"
975
+ )
976
+ # Use local PDF extraction
977
+ document_content = self._extract_text_from_pdf(
978
+ str(source_path)
979
+ )
980
+ # Handle text files
981
+ else:
982
+ with open(
983
+ source_path, "r", encoding="utf-8"
984
+ ) as f:
985
+ document_content = f.read()
986
+
987
+ # Cache the content
988
+ document_content_cache[source_file] = (
989
+ document_content
990
+ )
991
+
992
+ except Exception as e:
993
+ self.log.warning(
994
+ f"Failed to load document {source_path}: {e}"
995
+ )
996
+ document_content = ""
997
+ else:
998
+ self.log.warning(
999
+ f"Source document not found: {source_path}"
1000
+ )
1001
+
1002
+ for qa_pair in doc_qa_pairs:
1003
+ data.append(
1004
+ {
1005
+ "type": "qa",
1006
+ "query": qa_pair.get(
1007
+ "query", qa_pair.get("question", "")
1008
+ ),
1009
+ "ground_truth": qa_pair.get(
1010
+ "response", qa_pair.get("answer", "")
1011
+ ),
1012
+ "source_item": doc_id,
1013
+ "document_content": document_content,
1014
+ "source_file": source_file,
1015
+ }
1016
+ )
1017
+ # Handle list format (non-consolidated files)
1018
+ elif isinstance(qa_pairs, list):
1019
+ for qa_pair in qa_pairs:
1020
+ data.append(
1021
+ {
1022
+ "type": "qa",
1023
+ "query": qa_pair.get(
1024
+ "query", qa_pair.get("question", "")
1025
+ ),
1026
+ "ground_truth": qa_pair.get(
1027
+ "response", qa_pair.get("answer", "")
1028
+ ),
1029
+ }
1030
+ )
1031
+
1032
+ # Also check for nested structure (qa_pairs within individual summaries)
1033
+ summaries = analysis.get("summaries", {})
1034
+ for item_id, item_data in summaries.items():
1035
+ if "qa_pairs" in item_data:
1036
+ for qa_pair in item_data["qa_pairs"]:
1037
+ data.append(
1038
+ {
1039
+ "type": "qa",
1040
+ "query": qa_pair.get(
1041
+ "query", qa_pair.get("question", "")
1042
+ ),
1043
+ "ground_truth": qa_pair.get(
1044
+ "response", qa_pair.get("answer", "")
1045
+ ),
1046
+ "source_item": item_id,
1047
+ }
1048
+ )
1049
+
1050
+ if not data:
1051
+ raise ValueError(
1052
+ "No QA pairs found in consolidated groundtruth file for QA experiment"
1053
+ )
1054
+
1055
+ return data
1056
+
1057
+ elif experiment_type == "summarization":
1058
+ # For consolidated summarization files, create separate items for each source
1059
+ summaries = analysis.get("summaries", {})
1060
+
1061
+ self.log.info(f"Found {len(summaries)} summaries in consolidated file")
1062
+
1063
+ if not summaries:
1064
+ raise ValueError(
1065
+ "No summaries found in consolidated groundtruth file for summarization experiment"
1066
+ )
1067
+
1068
+ data = []
1069
+ source_files_info = metadata.get("source_files", [])
1070
+
1071
+ self.log.info(f"Found {len(source_files_info)} source files in metadata")
1072
+
1073
+ # Create a mapping of transcript_id to source file info
1074
+ source_file_map = {}
1075
+ for source_info in source_files_info:
1076
+ transcript_id = source_info.get("transcript_id", "")
1077
+ source_file_map[transcript_id] = source_info
1078
+ self.log.info(
1079
+ f"Mapped transcript_id '{transcript_id}' to source file '{source_info.get('source_file', '')}'"
1080
+ )
1081
+
1082
+ self.log.info(
1083
+ f"Created source file map with {len(source_file_map)} entries"
1084
+ )
1085
+
1086
+ for item_id, item_summaries in summaries.items():
1087
+ # Get source file information
1088
+ source_info = source_file_map.get(item_id, {})
1089
+ source_file = source_info.get("source_file", "")
1090
+
1091
+ self.log.info(
1092
+ f"Processing item {item_id}, source file: '{source_file}'"
1093
+ )
1094
+
1095
+ if not source_file:
1096
+ self.log.warning(
1097
+ f"No source file found for item {item_id}, skipping"
1098
+ )
1099
+ continue
1100
+
1101
+ # Normalize path separators for current platform
1102
+ source_file = source_file.replace("\\", "/")
1103
+ source_path = Path(source_file)
1104
+
1105
+ self.log.info(
1106
+ f"Normalized source path: '{source_path}', exists: {source_path.exists()}"
1107
+ )
1108
+
1109
+ # Read transcript content
1110
+ if not source_path.exists():
1111
+ self.log.warning(
1112
+ f"Source transcript file not found: {source_path}, skipping {item_id}"
1113
+ )
1114
+ continue
1115
+
1116
+ with open(source_path, "r", encoding="utf-8") as f:
1117
+ transcript_content = f.read().strip()
1118
+
1119
+ if not transcript_content:
1120
+ self.log.warning(
1121
+ f"Empty transcript file: {source_path}, skipping {item_id}"
1122
+ )
1123
+ continue
1124
+
1125
+ data.append(
1126
+ {
1127
+ "type": "summarization",
1128
+ "transcript": transcript_content,
1129
+ "groundtruth_summaries": item_summaries,
1130
+ "source_file": str(source_path),
1131
+ "item_id": item_id,
1132
+ }
1133
+ )
1134
+
1135
+ if not data:
1136
+ raise ValueError(
1137
+ "No valid data items found in consolidated groundtruth file for summarization experiment"
1138
+ )
1139
+
1140
+ return data
1141
+
1142
+ else:
1143
+ raise ValueError(f"Unsupported experiment type: {experiment_type}")
1144
+
1145
+ def _load_from_transcript_file(
1146
+ self, transcript_file: str, experiment_type: str, queries_source: str = None
1147
+ ) -> List[Dict]:
1148
+ """Load data from a single transcript file."""
1149
+ with open(transcript_file, "r", encoding="utf-8") as f:
1150
+ transcript_content = f.read().strip()
1151
+
1152
+ if not transcript_content:
1153
+ raise ValueError(f"Empty transcript file: {transcript_file}")
1154
+
1155
+ if experiment_type == "qa":
1156
+ # Get queries from groundtruth source
1157
+ if not queries_source:
1158
+ queries = self._get_default_queries()
1159
+ else:
1160
+ queries = self._load_queries_from_groundtruth(queries_source)
1161
+ self.log.info(
1162
+ f"Loaded {len(queries)} queries from groundtruth source: {queries_source}"
1163
+ )
1164
+
1165
+ # For QA experiments on raw transcripts, we can't provide ground truth
1166
+ # The experiment will generate responses that can be manually evaluated
1167
+ return [
1168
+ {
1169
+ "type": "qa_raw",
1170
+ "transcript": transcript_content,
1171
+ "source_file": transcript_file,
1172
+ "queries": queries,
1173
+ }
1174
+ ]
1175
+
1176
+ elif experiment_type == "summarization":
1177
+ return [
1178
+ {
1179
+ "type": "summarization",
1180
+ "transcript": transcript_content,
1181
+ "source_file": transcript_file,
1182
+ }
1183
+ ]
1184
+
1185
+ else:
1186
+ raise ValueError(f"Unsupported experiment type: {experiment_type}")
1187
+
1188
+ def _load_from_transcript_directory(
1189
+ self, transcript_dir: str, experiment_type: str, queries_source: str = None
1190
+ ) -> List[Dict]:
1191
+ """Load data from a directory of transcript files."""
1192
+ transcript_dir = Path(transcript_dir)
1193
+
1194
+ # Find all text files in directory (recursively)
1195
+ transcript_files = list(transcript_dir.rglob("*.txt"))
1196
+ if not transcript_files:
1197
+ raise ValueError(f"No .txt files found in directory: {transcript_dir}")
1198
+
1199
+ data = []
1200
+ for transcript_file in transcript_files:
1201
+ file_data = self._load_from_transcript_file(
1202
+ str(transcript_file), experiment_type, queries_source
1203
+ )
1204
+ data.extend(file_data)
1205
+
1206
+ return data
1207
+
1208
+ def run_experiment(
1209
+ self,
1210
+ experiment: ExperimentConfig,
1211
+ data_items: List[Dict],
1212
+ output_dir: str,
1213
+ delay_seconds: float = 1.0,
1214
+ ) -> str:
1215
+ """Run a single experiment with the given data items."""
1216
+ # Start timing the experiment
1217
+ experiment_start_time = time.time()
1218
+
1219
+ self.log.info(
1220
+ f"Running experiment: {experiment.name} (type: {experiment.experiment_type})"
1221
+ )
1222
+
1223
+ # Create LLM client
1224
+ client = self.create_llm_client(experiment)
1225
+
1226
+ # Set up output directories for incremental writing
1227
+ output_base_path = Path(output_dir)
1228
+ output_base_path.mkdir(parents=True, exist_ok=True)
1229
+
1230
+ # Generate safe filename from experiment name
1231
+ safe_name = "".join(
1232
+ c if (c.isalnum() or c in (" ", "-", "_")) else "_" if c == "." else ""
1233
+ for c in experiment.name
1234
+ ).rstrip()
1235
+ safe_name = safe_name.replace(" ", "_")
1236
+
1237
+ # Create intermediate results directory
1238
+ intermediate_dir = output_base_path / f"{safe_name}.intermediate"
1239
+ intermediate_dir.mkdir(parents=True, exist_ok=True)
1240
+
1241
+ # Process each data item
1242
+ results = []
1243
+ total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
1244
+ total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
1245
+ errors = []
1246
+ item_timings = [] # Track timing for each item
1247
+
1248
+ for i, data_item in enumerate(data_items):
1249
+ item_start_time = time.time()
1250
+ data_type = data_item["type"]
1251
+ self.log.info(
1252
+ f"Processing item {i+1}/{len(data_items)} (type: {data_type})"
1253
+ )
1254
+
1255
+ # Process based on experiment and data type
1256
+ if data_type == "qa":
1257
+ # Process Q&A pair with ground truth
1258
+ # Check if document content is available
1259
+ document_content = data_item.get("document_content", "")
1260
+
1261
+ if experiment.llm_type.lower() == "claude":
1262
+ if document_content:
1263
+ # Include document context with the question
1264
+ result = self.process_question_claude(
1265
+ client,
1266
+ data_item["query"],
1267
+ experiment.system_prompt,
1268
+ document_content,
1269
+ )
1270
+ else:
1271
+ result = self.process_question_claude(
1272
+ client, data_item["query"], experiment.system_prompt
1273
+ )
1274
+ elif experiment.llm_type.lower() == "lemonade":
1275
+ if document_content:
1276
+ result = self.process_question_lemonade(
1277
+ client,
1278
+ data_item["query"],
1279
+ experiment.system_prompt,
1280
+ experiment.max_tokens,
1281
+ experiment.temperature,
1282
+ document_content,
1283
+ )
1284
+ else:
1285
+ result = self.process_question_lemonade(
1286
+ client,
1287
+ data_item["query"],
1288
+ experiment.system_prompt,
1289
+ experiment.max_tokens,
1290
+ experiment.temperature,
1291
+ )
1292
+
1293
+ # Create QA result entry
1294
+ result_entry = {
1295
+ "query": data_item["query"],
1296
+ "ground_truth": data_item["ground_truth"],
1297
+ "response": result["response"],
1298
+ }
1299
+
1300
+ # Add thinking tokens if present
1301
+ if "thinking" in result and result["thinking"]:
1302
+ result_entry["thinking"] = result["thinking"]
1303
+
1304
+ elif data_type == "qa_raw":
1305
+ # Process raw transcript with predefined questions
1306
+ qa_results = []
1307
+ total_result = {
1308
+ "response": "",
1309
+ "usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0},
1310
+ "cost": {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0},
1311
+ "error": None,
1312
+ }
1313
+
1314
+ for query in data_item["queries"]:
1315
+ if experiment.llm_type.lower() == "claude":
1316
+ # Create context-aware prompt with transcript
1317
+ context_prompt = f"{experiment.system_prompt}\n\nTranscript:\n{data_item['transcript']}\n\nQuestion: {query}\n\nAnswer:"
1318
+ # For Claude, we can use the context as system prompt
1319
+ query_result = {
1320
+ "response": "",
1321
+ "usage": {
1322
+ "input_tokens": 0,
1323
+ "output_tokens": 0,
1324
+ "total_tokens": 0,
1325
+ },
1326
+ "cost": {
1327
+ "input_cost": 0.0,
1328
+ "output_cost": 0.0,
1329
+ "total_cost": 0.0,
1330
+ },
1331
+ "error": None,
1332
+ }
1333
+ try:
1334
+ response_data = client.get_completion_with_usage(
1335
+ context_prompt
1336
+ )
1337
+ response = response_data["content"]
1338
+ if isinstance(response, list):
1339
+ response_text = (
1340
+ response[0].text
1341
+ if hasattr(response[0], "text")
1342
+ else str(response[0])
1343
+ )
1344
+ else:
1345
+ response_text = (
1346
+ response.text
1347
+ if hasattr(response, "text")
1348
+ else str(response)
1349
+ )
1350
+
1351
+ query_result = {
1352
+ "response": response_text.strip(),
1353
+ "usage": response_data["usage"],
1354
+ "cost": response_data["cost"],
1355
+ "error": None,
1356
+ }
1357
+ except Exception as e:
1358
+ self.log.error(f"Error processing QA with Claude: {e}")
1359
+ query_result["response"] = f"ERROR: {str(e)}"
1360
+ query_result["error"] = str(e)
1361
+
1362
+ elif experiment.llm_type.lower() == "lemonade":
1363
+ # For Lemonade, use ChatML formatting
1364
+ query_result = {
1365
+ "response": "",
1366
+ "usage": {
1367
+ "input_tokens": 0,
1368
+ "output_tokens": 0,
1369
+ "total_tokens": 0,
1370
+ },
1371
+ "cost": {
1372
+ "input_cost": 0.0,
1373
+ "output_cost": 0.0,
1374
+ "total_cost": 0.0,
1375
+ },
1376
+ "error": None,
1377
+ }
1378
+ try:
1379
+ # Format with ChatML template for QA tasks, transcript as document context
1380
+ formatted_prompt = format_prompt_with_template(
1381
+ model=client.model,
1382
+ system_prompt=experiment.system_prompt,
1383
+ user_content=query,
1384
+ document_content=data_item["transcript"],
1385
+ use_chat_template=True, # Use chat template for QA tasks
1386
+ )
1387
+
1388
+ response_data = client.completions(
1389
+ model=client.model, # Use model from experiment config
1390
+ prompt=formatted_prompt,
1391
+ max_tokens=experiment.max_tokens,
1392
+ temperature=experiment.temperature,
1393
+ stream=False,
1394
+ )
1395
+
1396
+ # Extract text from the response
1397
+ response_text = ""
1398
+ if "choices" in response_data and response_data["choices"]:
1399
+ response_text = response_data["choices"][0].get(
1400
+ "text", ""
1401
+ )
1402
+
1403
+ # Extract thinking tokens if present
1404
+ extracted = extract_thinking_from_response(response_text)
1405
+
1406
+ query_result["response"] = extracted["response"]
1407
+ # Store thinking for this query if present
1408
+ if extracted["thinking"]:
1409
+ query_result["thinking"] = extracted["thinking"]
1410
+
1411
+ except Exception as e:
1412
+ self.log.error(f"Error processing QA with Lemonade: {e}")
1413
+ query_result["response"] = f"ERROR: {str(e)}"
1414
+ query_result["error"] = str(e)
1415
+
1416
+ qa_result_item = {
1417
+ "query": query,
1418
+ "response": query_result["response"],
1419
+ }
1420
+ # Add thinking if present
1421
+ if "thinking" in query_result and query_result["thinking"]:
1422
+ qa_result_item["thinking"] = query_result["thinking"]
1423
+ qa_results.append(qa_result_item)
1424
+
1425
+ # Accumulate usage/cost
1426
+ if query_result["usage"]:
1427
+ for key in total_result["usage"]:
1428
+ total_result["usage"][key] += query_result["usage"].get(
1429
+ key, 0
1430
+ )
1431
+ if query_result["cost"]:
1432
+ for key in total_result["cost"]:
1433
+ total_result["cost"][key] += query_result["cost"].get(
1434
+ key, 0.0
1435
+ )
1436
+ if query_result["error"]:
1437
+ if total_result["error"]:
1438
+ total_result["error"] += f"; {query_result['error']}"
1439
+ else:
1440
+ total_result["error"] = query_result["error"]
1441
+
1442
+ result = total_result
1443
+ result_entry = {
1444
+ "transcript": (
1445
+ data_item["transcript"][:500] + "..."
1446
+ if len(data_item["transcript"]) > 500
1447
+ else data_item["transcript"]
1448
+ ),
1449
+ "source_file": data_item.get("source_file", ""),
1450
+ "qa_results": qa_results,
1451
+ }
1452
+
1453
+ elif data_type == "summarization":
1454
+ # Process summarization task using independent calls for each component
1455
+ if experiment.llm_type.lower() == "claude":
1456
+ combined = experiment.parameters.get("combined_prompt", False)
1457
+ result = self.process_summarization_claude(
1458
+ client,
1459
+ data_item["transcript"],
1460
+ experiment.system_prompt,
1461
+ combined,
1462
+ )
1463
+ elif experiment.llm_type.lower() == "lemonade":
1464
+ combined = experiment.parameters.get("combined_prompt", False)
1465
+ # Extract parameters to pass (excluding combined_prompt)
1466
+ extra_params = {
1467
+ k: v
1468
+ for k, v in experiment.parameters.items()
1469
+ if k != "combined_prompt"
1470
+ }
1471
+ result = self.process_summarization_lemonade(
1472
+ client,
1473
+ data_item["transcript"],
1474
+ experiment.system_prompt,
1475
+ experiment.max_tokens,
1476
+ experiment.temperature,
1477
+ combined,
1478
+ extra_params,
1479
+ )
1480
+
1481
+ # Use the structured response directly from independent calls
1482
+ generated_summaries = result["response"]
1483
+
1484
+ # Create summarization result entry
1485
+ result_entry = {
1486
+ "transcript": (
1487
+ data_item["transcript"][:500] + "..."
1488
+ if len(data_item["transcript"]) > 500
1489
+ else data_item["transcript"]
1490
+ ),
1491
+ "generated_summaries": generated_summaries,
1492
+ "source_file": data_item.get("source_file", ""),
1493
+ }
1494
+
1495
+ # Add ground truth summaries if available (from groundtruth files)
1496
+ if "groundtruth_summaries" in data_item:
1497
+ result_entry["groundtruth_summaries"] = data_item[
1498
+ "groundtruth_summaries"
1499
+ ]
1500
+
1501
+ # Add thinking tokens if present (for combined mode)
1502
+ if "thinking" in result and result["thinking"]:
1503
+ result_entry["thinking"] = result["thinking"]
1504
+
1505
+ else:
1506
+ self.log.error(f"Unsupported data type: {data_type}")
1507
+ continue
1508
+
1509
+ # Accumulate usage and cost data
1510
+ if result["usage"]:
1511
+ for key in total_usage:
1512
+ total_usage[key] += result["usage"].get(key, 0)
1513
+ if result["cost"]:
1514
+ for key in total_cost:
1515
+ total_cost[key] += result["cost"].get(key, 0.0)
1516
+
1517
+ if result["error"]:
1518
+ errors.append(f"Item {i+1}: {result['error']}")
1519
+
1520
+ # Add processing time to result entry
1521
+ item_time = time.time() - item_start_time
1522
+ result_entry["processing_time_seconds"] = round(item_time, 3)
1523
+ item_timings.append(item_time)
1524
+
1525
+ results.append(result_entry)
1526
+
1527
+ # Write intermediate result immediately for crash recovery
1528
+ try:
1529
+ intermediate_file = (
1530
+ intermediate_dir / f"item_{i+1:04d}_{data_type}.json"
1531
+ )
1532
+ intermediate_data = {
1533
+ "item_index": i,
1534
+ "data_type": data_type,
1535
+ "data_item": data_item,
1536
+ "result": result_entry,
1537
+ "usage": result.get("usage", {}),
1538
+ "cost": result.get("cost", {}),
1539
+ "error": result.get("error"),
1540
+ "timestamp": datetime.now().isoformat(),
1541
+ "processing_time_seconds": round(item_time, 3),
1542
+ }
1543
+
1544
+ with open(intermediate_file, "w", encoding="utf-8") as f:
1545
+ json.dump(intermediate_data, f, indent=2)
1546
+
1547
+ # Update progress file
1548
+ progress_file = intermediate_dir / "progress.json"
1549
+ progress_data = {
1550
+ "experiment_name": experiment.name,
1551
+ "total_items": len(data_items),
1552
+ "completed_items": i + 1,
1553
+ "progress_percent": round((i + 1) / len(data_items) * 100, 1),
1554
+ "total_usage": total_usage.copy(),
1555
+ "total_cost": total_cost.copy(),
1556
+ "errors_count": len(errors),
1557
+ "last_updated": datetime.now().isoformat(),
1558
+ "estimated_remaining_time": None,
1559
+ }
1560
+
1561
+ # Calculate estimated remaining time
1562
+ if i > 0:
1563
+ avg_time_per_item = sum(item_timings) / len(item_timings)
1564
+ remaining_items = len(data_items) - (i + 1)
1565
+ estimated_remaining = remaining_items * avg_time_per_item
1566
+ progress_data["estimated_remaining_time"] = round(
1567
+ estimated_remaining, 1
1568
+ )
1569
+
1570
+ with open(progress_file, "w", encoding="utf-8") as f:
1571
+ json.dump(progress_data, f, indent=2)
1572
+
1573
+ self.log.info(
1574
+ f"Progress: {i+1}/{len(data_items)} items completed ({progress_data['progress_percent']}%)"
1575
+ )
1576
+
1577
+ except Exception as e:
1578
+ self.log.warning(f"Failed to write intermediate result {i+1}: {e}")
1579
+
1580
+ # Add delay between requests to avoid rate limiting
1581
+ if delay_seconds > 0 and i < len(data_items) - 1:
1582
+ time.sleep(delay_seconds)
1583
+
1584
+ # Calculate total experiment time
1585
+ total_experiment_time = time.time() - experiment_start_time
1586
+
1587
+ # Determine inference type (cloud vs local)
1588
+ inference_type = "cloud" if experiment.llm_type.lower() == "claude" else "local"
1589
+
1590
+ # Create output data in format expected by eval tool
1591
+ output_data = {
1592
+ "metadata": {
1593
+ "experiment_name": experiment.name,
1594
+ "experiment_type": experiment.experiment_type,
1595
+ "llm_type": experiment.llm_type,
1596
+ "model": experiment.model,
1597
+ "inference_type": inference_type, # Add inference type
1598
+ "system_prompt": experiment.system_prompt,
1599
+ "max_tokens": experiment.max_tokens,
1600
+ "temperature": experiment.temperature,
1601
+ "parameters": experiment.parameters,
1602
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
1603
+ "similarity_threshold": 0.7, # Default threshold for eval
1604
+ "total_items": len(data_items),
1605
+ "total_usage": total_usage,
1606
+ "total_cost": total_cost,
1607
+ "errors": errors,
1608
+ "timing": {
1609
+ "total_experiment_time_seconds": round(total_experiment_time, 3),
1610
+ "per_item_times_seconds": [round(t, 3) for t in item_timings],
1611
+ "average_per_item_seconds": (
1612
+ round(np.mean(item_timings), 3) if item_timings else 0
1613
+ ),
1614
+ "max_per_item_seconds": (
1615
+ round(max(item_timings), 3) if item_timings else 0
1616
+ ),
1617
+ "min_per_item_seconds": (
1618
+ round(min(item_timings), 3) if item_timings else 0
1619
+ ),
1620
+ },
1621
+ },
1622
+ "analysis": {},
1623
+ }
1624
+
1625
+ # Set analysis data based on experiment type and data type
1626
+ if experiment.experiment_type == "qa":
1627
+ # Check if we have traditional QA results or raw transcript QA results
1628
+ if results and "qa_results" in results[0]:
1629
+ output_data["analysis"]["transcript_qa_results"] = results
1630
+ else:
1631
+ output_data["analysis"]["qa_results"] = results
1632
+ elif experiment.experiment_type == "summarization":
1633
+ output_data["analysis"]["summarization_results"] = results
1634
+
1635
+ # Determine output structure based on data items
1636
+ # (output_base_path and safe_name already created earlier for incremental writing)
1637
+
1638
+ # Check if we have multiple items with individual source files (hierarchical structure needed)
1639
+ has_individual_items = any(
1640
+ "source_file" in result for result in results if isinstance(result, dict)
1641
+ )
1642
+ has_item_ids = any(
1643
+ "item_id" in data_item
1644
+ for data_item in data_items
1645
+ if isinstance(data_item, dict)
1646
+ )
1647
+
1648
+ if has_individual_items or has_item_ids:
1649
+ # Create hierarchical structure - save individual files and consolidated
1650
+ individual_files = self._save_individual_experiment_files(
1651
+ output_data, data_items, results, output_base_path, safe_name
1652
+ )
1653
+
1654
+ # Create consolidated file at root
1655
+ consolidated_filename = f"{safe_name}.experiment.json"
1656
+ consolidated_path = output_base_path / consolidated_filename
1657
+
1658
+ # Add consolidation metadata
1659
+ output_data["metadata"]["consolidated_from"] = len(individual_files)
1660
+ output_data["metadata"]["individual_files"] = individual_files
1661
+
1662
+ with open(consolidated_path, "w", encoding="utf-8") as f:
1663
+ json.dump(output_data, f, indent=2)
1664
+
1665
+ self.log.info(
1666
+ f"Consolidated experiment results saved to: {consolidated_path}"
1667
+ )
1668
+ self.log.info(f"Individual experiment files: {len(individual_files)}")
1669
+
1670
+ # Clean up intermediate files after successful completion
1671
+ self._cleanup_intermediate_files(intermediate_dir)
1672
+
1673
+ return str(consolidated_path)
1674
+ else:
1675
+ # Single file output (traditional behavior)
1676
+ result_filename = f"{safe_name}.experiment.json"
1677
+ result_path = output_base_path / result_filename
1678
+
1679
+ with open(result_path, "w", encoding="utf-8") as f:
1680
+ json.dump(output_data, f, indent=2)
1681
+
1682
+ self.log.info(f"Experiment results saved to: {result_path}")
1683
+
1684
+ # Clean up intermediate files after successful completion
1685
+ self._cleanup_intermediate_files(intermediate_dir)
1686
+
1687
+ return str(result_path)
1688
+
1689
+ def _save_individual_experiment_files(
1690
+ self,
1691
+ base_output_data: Dict,
1692
+ data_items: List[Dict],
1693
+ results: List[Dict],
1694
+ output_base_path: Path,
1695
+ safe_experiment_name: str,
1696
+ ) -> List[str]:
1697
+ """Save individual experiment files maintaining directory hierarchy."""
1698
+ individual_files = []
1699
+
1700
+ for i, (data_item, result) in enumerate(zip(data_items, results)):
1701
+ # Determine output path based on source file or item_id
1702
+ if "item_id" in data_item:
1703
+ # From consolidated groundtruth - use item_id to determine path
1704
+ item_id = data_item["item_id"]
1705
+ # Create directory structure like emails/file_name or meetings/file_name
1706
+ if "/" in item_id:
1707
+ relative_dir = item_id.split("/")[0] if "/" in item_id else ""
1708
+ file_base = item_id.split("/")[-1] if "/" in item_id else item_id
1709
+ else:
1710
+ # Guess directory from item_id pattern
1711
+ if "email" in item_id.lower():
1712
+ relative_dir = "emails"
1713
+ elif "meeting" in item_id.lower():
1714
+ relative_dir = "meetings"
1715
+ else:
1716
+ relative_dir = "misc"
1717
+ file_base = item_id
1718
+ elif "source_file" in data_item:
1719
+ # From individual files - extract relative path from source file
1720
+ source_file = Path(data_item["source_file"])
1721
+ if "test_data" in source_file.parts:
1722
+ # Extract relative path from test_data structure
1723
+ test_data_index = source_file.parts.index("test_data")
1724
+ relative_parts = source_file.parts[test_data_index + 1 :]
1725
+ if len(relative_parts) > 1:
1726
+ relative_dir = "/".join(relative_parts[:-1])
1727
+ file_base = source_file.stem
1728
+ else:
1729
+ relative_dir = ""
1730
+ file_base = source_file.stem
1731
+ else:
1732
+ relative_dir = ""
1733
+ file_base = source_file.stem
1734
+ else:
1735
+ # Fallback - no hierarchical structure
1736
+ relative_dir = ""
1737
+ file_base = f"item_{i+1}"
1738
+
1739
+ # Create individual output data
1740
+ individual_output_data = {
1741
+ "metadata": base_output_data["metadata"].copy(),
1742
+ "analysis": {},
1743
+ }
1744
+
1745
+ # Adjust metadata for individual file
1746
+ individual_output_data["metadata"]["total_items"] = 1
1747
+ individual_output_data["metadata"]["source_item"] = data_item.get(
1748
+ "item_id", ""
1749
+ )
1750
+ individual_output_data["metadata"]["source_file"] = data_item.get(
1751
+ "source_file", ""
1752
+ )
1753
+
1754
+ # Add single result to analysis
1755
+ if base_output_data["metadata"]["experiment_type"] == "qa":
1756
+ if "qa_results" in result:
1757
+ individual_output_data["analysis"]["transcript_qa_results"] = [
1758
+ result
1759
+ ]
1760
+ else:
1761
+ individual_output_data["analysis"]["qa_results"] = [result]
1762
+ elif base_output_data["metadata"]["experiment_type"] == "summarization":
1763
+ individual_output_data["analysis"]["summarization_results"] = [result]
1764
+
1765
+ # Create output directory and file
1766
+ if relative_dir:
1767
+ output_dir = output_base_path / relative_dir
1768
+ output_dir.mkdir(parents=True, exist_ok=True)
1769
+ else:
1770
+ output_dir = output_base_path
1771
+
1772
+ individual_filename = f"{file_base}.{safe_experiment_name}.experiment.json"
1773
+ individual_path = output_dir / individual_filename
1774
+
1775
+ with open(individual_path, "w", encoding="utf-8") as f:
1776
+ json.dump(individual_output_data, f, indent=2)
1777
+
1778
+ # Store relative path for consolidation metadata
1779
+ if relative_dir:
1780
+ relative_path = f"{relative_dir}/{individual_filename}"
1781
+ else:
1782
+ relative_path = individual_filename
1783
+
1784
+ individual_files.append(relative_path)
1785
+
1786
+ return individual_files
1787
+
1788
+ def _cleanup_intermediate_files(self, intermediate_dir: Path) -> None:
1789
+ """Clean up intermediate files after successful completion."""
1790
+ try:
1791
+ import shutil
1792
+
1793
+ if intermediate_dir.exists():
1794
+ shutil.rmtree(intermediate_dir)
1795
+ self.log.info(f"Cleaned up intermediate files from: {intermediate_dir}")
1796
+ except Exception as e:
1797
+ self.log.warning(
1798
+ f"Failed to clean up intermediate directory {intermediate_dir}: {e}"
1799
+ )
1800
+
1801
+ def run_all_experiments(
1802
+ self,
1803
+ input_path: str,
1804
+ output_dir: str,
1805
+ delay_seconds: float = 1.0,
1806
+ queries_source: str = None,
1807
+ skip_existing: bool = False,
1808
+ ) -> Tuple[List[str], int]:
1809
+ """Run all experiments defined in the config file.
1810
+
1811
+ Returns:
1812
+ tuple: (result_files, skipped_count) where result_files is a list of output file paths
1813
+ and skipped_count is the number of experiments that were skipped
1814
+ """
1815
+ # Start timing all experiments
1816
+ all_experiments_start_time = time.time()
1817
+
1818
+ self.log.info(
1819
+ f"Starting batch experiments with {len(self.experiments)} configurations"
1820
+ )
1821
+
1822
+ # Run each experiment
1823
+ result_files = []
1824
+ skipped_count = 0
1825
+ for i, experiment in enumerate(self.experiments):
1826
+ # Check if we should skip this experiment
1827
+ if skip_existing and self.check_experiment_exists(experiment, output_dir):
1828
+ self.log.info(
1829
+ f"Skipping experiment {i+1}/{len(self.experiments)}: {experiment.name} (already exists)"
1830
+ )
1831
+ skipped_count += 1
1832
+
1833
+ # Add the existing file to result_files for consolidated report
1834
+ output_base_path = Path(output_dir)
1835
+ safe_name = "".join(
1836
+ (
1837
+ c
1838
+ if (c.isalnum() or c in (" ", "-", "_"))
1839
+ else "_" if c == "." else ""
1840
+ )
1841
+ for c in experiment.name
1842
+ ).rstrip()
1843
+ safe_name = safe_name.replace(" ", "_")
1844
+ consolidated_filename = f"{safe_name}.experiment.json"
1845
+ consolidated_path = output_base_path / consolidated_filename
1846
+ result_files.append(str(consolidated_path))
1847
+ continue
1848
+
1849
+ self.log.info(
1850
+ f"Running experiment {i+1}/{len(self.experiments)}: {experiment.name} (type: {experiment.experiment_type})"
1851
+ )
1852
+
1853
+ # Load data from input source based on experiment type
1854
+ data_items = self.load_data_from_source(
1855
+ input_path, experiment.experiment_type, queries_source
1856
+ )
1857
+ self.log.info(
1858
+ f"Loaded {len(data_items)} data items from {input_path} for {experiment.experiment_type} experiment"
1859
+ )
1860
+
1861
+ result_file = self.run_experiment(
1862
+ experiment, data_items, output_dir, delay_seconds
1863
+ )
1864
+ result_files.append(result_file)
1865
+
1866
+ # Add delay between experiments
1867
+ if delay_seconds > 0 and i < len(self.experiments) - 1:
1868
+ self.log.info(f"Waiting {delay_seconds}s before next experiment...")
1869
+ time.sleep(delay_seconds)
1870
+
1871
+ # Calculate total time for all experiments
1872
+ total_time = time.time() - all_experiments_start_time
1873
+
1874
+ if skipped_count > 0:
1875
+ self.log.info(
1876
+ f"Completed {len(result_files) - skipped_count} new experiments, skipped {skipped_count} existing"
1877
+ )
1878
+ else:
1879
+ self.log.info(
1880
+ f"Completed {len(result_files)} out of {len(self.experiments)} experiments"
1881
+ )
1882
+ self.log.info(f"Total execution time: {round(total_time, 2)} seconds")
1883
+
1884
+ # Create consolidated experiments report at root level
1885
+ if len(result_files) > 1:
1886
+ consolidated_report_path = self._create_consolidated_experiments_report(
1887
+ result_files, output_dir, input_path, total_time
1888
+ )
1889
+ self.log.info(
1890
+ f"Consolidated experiments report saved to: {consolidated_report_path}"
1891
+ )
1892
+
1893
+ return result_files, skipped_count
1894
+
1895
+ def _create_consolidated_experiments_report(
1896
+ self,
1897
+ result_files: List[str],
1898
+ output_dir: str,
1899
+ input_path: str,
1900
+ total_time: float = None,
1901
+ ) -> str:
1902
+ """Create a consolidated report of all experiments."""
1903
+ output_base_path = Path(output_dir)
1904
+
1905
+ # Load all experiment results
1906
+ all_experiments = []
1907
+ total_usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
1908
+ total_cost = {"input_cost": 0.0, "output_cost": 0.0, "total_cost": 0.0}
1909
+
1910
+ for result_file in result_files:
1911
+ try:
1912
+ with open(result_file, "r", encoding="utf-8") as f:
1913
+ experiment_data = json.load(f)
1914
+
1915
+ experiment_info = {
1916
+ "experiment_name": experiment_data["metadata"]["experiment_name"],
1917
+ "experiment_type": experiment_data["metadata"]["experiment_type"],
1918
+ "model": experiment_data["metadata"]["model"],
1919
+ "llm_type": experiment_data["metadata"]["llm_type"],
1920
+ "file_path": str(Path(result_file).relative_to(output_base_path)),
1921
+ "timestamp": experiment_data["metadata"]["timestamp"],
1922
+ "total_items": experiment_data["metadata"]["total_items"],
1923
+ "usage": experiment_data["metadata"]["total_usage"],
1924
+ "cost": experiment_data["metadata"]["total_cost"],
1925
+ "individual_files": experiment_data["metadata"].get(
1926
+ "individual_files", []
1927
+ ),
1928
+ "consolidated_from": experiment_data["metadata"].get(
1929
+ "consolidated_from", 0
1930
+ ),
1931
+ }
1932
+
1933
+ # Include analysis results based on experiment type
1934
+ if "analysis" in experiment_data:
1935
+ analysis = experiment_data["analysis"]
1936
+ if experiment_data["metadata"]["experiment_type"] == "qa":
1937
+ # Include Q&A results
1938
+ if "qa_results" in analysis:
1939
+ experiment_info["qa_results"] = analysis["qa_results"]
1940
+ elif (
1941
+ experiment_data["metadata"]["experiment_type"]
1942
+ == "summarization"
1943
+ ):
1944
+ # Include summarization results
1945
+ if "summaries" in analysis:
1946
+ experiment_info["summaries"] = analysis["summaries"]
1947
+
1948
+ all_experiments.append(experiment_info)
1949
+
1950
+ # Accumulate totals
1951
+ for key in total_usage:
1952
+ total_usage[key] += experiment_data["metadata"]["total_usage"].get(
1953
+ key, 0
1954
+ )
1955
+ for key in total_cost:
1956
+ total_cost[key] += experiment_data["metadata"]["total_cost"].get(
1957
+ key, 0.0
1958
+ )
1959
+
1960
+ except Exception as e:
1961
+ self.log.error(f"Error loading experiment file {result_file}: {e}")
1962
+ continue
1963
+
1964
+ # Create consolidated report
1965
+ consolidated_report = {
1966
+ "metadata": {
1967
+ "report_type": "consolidated_experiments",
1968
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
1969
+ "input_source": input_path,
1970
+ "output_directory": output_dir,
1971
+ "total_experiments": len(all_experiments),
1972
+ "total_usage": total_usage,
1973
+ "total_cost": total_cost,
1974
+ },
1975
+ "experiments": all_experiments,
1976
+ }
1977
+
1978
+ # Add total execution time if provided
1979
+ if total_time is not None:
1980
+ consolidated_report["metadata"]["total_execution_time_seconds"] = round(
1981
+ total_time, 3
1982
+ )
1983
+
1984
+ # Save consolidated report
1985
+ consolidated_filename = "consolidated_experiments_report.json"
1986
+ consolidated_path = output_base_path / consolidated_filename
1987
+
1988
+ with open(consolidated_path, "w", encoding="utf-8") as f:
1989
+ json.dump(consolidated_report, f, indent=2)
1990
+
1991
+ return str(consolidated_path)
1992
+
1993
+ def create_sample_config(self, output_path: str):
1994
+ """Create a sample configuration file."""
1995
+ sample_config = {
1996
+ "description": "Batch experiment configuration for transcript evaluation (both Q&A and summarization)",
1997
+ "experiments": [
1998
+ {
1999
+ "name": "Claude-Sonnet-QA-Standard",
2000
+ "llm_type": "claude",
2001
+ "model": DEFAULT_CLAUDE_MODEL,
2002
+ "experiment_type": "qa",
2003
+ "system_prompt": "You are a helpful assistant that answers questions about meeting transcripts. Provide accurate, concise answers based on the transcript content.",
2004
+ "max_tokens": 512,
2005
+ "temperature": 0.1,
2006
+ "parameters": {},
2007
+ "_comment": "Cloud inference - will incur API costs",
2008
+ },
2009
+ {
2010
+ "name": "Claude-Sonnet-Summarization-Standard",
2011
+ "llm_type": "claude",
2012
+ "model": DEFAULT_CLAUDE_MODEL,
2013
+ "experiment_type": "summarization",
2014
+ "system_prompt": "You are an expert meeting analyst. Analyze the transcript carefully and provide clear, accurate information based on the content.",
2015
+ "max_tokens": 512,
2016
+ "temperature": 0.1,
2017
+ "parameters": {},
2018
+ "_comment": "Cloud inference - will incur API costs",
2019
+ },
2020
+ {
2021
+ "name": "Claude-Sonnet-QA-Detailed",
2022
+ "llm_type": "claude",
2023
+ "model": DEFAULT_CLAUDE_MODEL,
2024
+ "experiment_type": "qa",
2025
+ "system_prompt": "You are an expert meeting analyst. Provide comprehensive, detailed answers about meeting transcripts including context, participants, and implications. Be thorough and precise.",
2026
+ "max_tokens": 1024,
2027
+ "temperature": 0.2,
2028
+ "parameters": {},
2029
+ },
2030
+ {
2031
+ "name": "Lemonade-Llama-QA-Standard",
2032
+ "llm_type": "lemonade",
2033
+ "model": "llama3.2:3b",
2034
+ "experiment_type": "qa",
2035
+ "system_prompt": "Answer questions about meeting transcripts clearly and accurately. Focus on the key information requested.",
2036
+ "max_tokens": 512,
2037
+ "temperature": 0.1,
2038
+ "parameters": {"host": "localhost", "port": 8000},
2039
+ "_comment": "Local inference - FREE, runs on your hardware",
2040
+ },
2041
+ {
2042
+ "name": "Lemonade-Llama-Summarization-Creative",
2043
+ "llm_type": "lemonade",
2044
+ "model": "llama3.2:3b",
2045
+ "experiment_type": "summarization",
2046
+ "system_prompt": "You are a creative meeting analyst. Analyze the transcript thoughtfully and provide insightful information that captures key insights and implications.",
2047
+ "max_tokens": 512,
2048
+ "temperature": 0.7,
2049
+ "parameters": {"host": "localhost", "port": 8000},
2050
+ "_comment": "Local inference - FREE, runs on your hardware",
2051
+ },
2052
+ ],
2053
+ }
2054
+
2055
+ with open(output_path, "w", encoding="utf-8") as f:
2056
+ json.dump(sample_config, f, indent=2)
2057
+
2058
+ self.log.info(f"Sample configuration saved to: {output_path}")
2059
+
2060
+ def create_config_from_groundtruth(
2061
+ self, groundtruth_file: str, output_file: str
2062
+ ) -> None:
2063
+ """Create experiment configuration from groundtruth file metadata."""
2064
+ try:
2065
+ # Load groundtruth file
2066
+ with open(groundtruth_file, "r", encoding="utf-8") as f:
2067
+ groundtruth_data = json.load(f)
2068
+
2069
+ metadata = groundtruth_data.get("metadata", {})
2070
+ analysis = groundtruth_data.get("analysis", {})
2071
+
2072
+ # Extract key information
2073
+ use_case = metadata.get("use_case", "qa")
2074
+ original_model = metadata.get("claude_model", DEFAULT_CLAUDE_MODEL)
2075
+ original_prompt = metadata.get("system_prompt", "")
2076
+ max_tokens = metadata.get("max_tokens", 512 if use_case == "qa" else 1024)
2077
+ temperature = metadata.get("temperature", 0.1)
2078
+
2079
+ # Determine appropriate system prompt if not in metadata
2080
+ if not original_prompt:
2081
+ if use_case == "qa":
2082
+ original_prompt = "You are an expert meeting analyst. Answer questions about the transcript accurately and concisely based only on the provided information."
2083
+ elif use_case == "summarization":
2084
+ original_prompt = "You are an expert meeting analyst. Create a concise summary of the transcript including key topics, decisions, and action items."
2085
+ else:
2086
+ original_prompt = "You are an expert analyst. Process the provided content according to the task requirements."
2087
+
2088
+ # Create base experiment configuration
2089
+ experiments = []
2090
+
2091
+ # Original configuration
2092
+ base_name = original_model.replace("claude-", "").replace("-", "-").title()
2093
+ experiments.append(
2094
+ {
2095
+ "name": f"{base_name}-Original",
2096
+ "llm_type": "claude",
2097
+ "model": original_model,
2098
+ "experiment_type": use_case,
2099
+ "system_prompt": original_prompt,
2100
+ "max_tokens": max_tokens,
2101
+ "temperature": temperature,
2102
+ "parameters": {},
2103
+ }
2104
+ )
2105
+
2106
+ # Add model variations with same prompt
2107
+ model_variants = [
2108
+ ("claude-3-haiku-20240307", "Haiku"),
2109
+ ("claude-3-opus-20240229", "Opus"),
2110
+ (DEFAULT_CLAUDE_MODEL, "Sonnet-4.5"),
2111
+ ]
2112
+
2113
+ for model, name in model_variants:
2114
+ if model != original_model: # Don't duplicate original
2115
+ experiments.append(
2116
+ {
2117
+ "name": f"Claude-{name}-Same-Prompt",
2118
+ "llm_type": "claude",
2119
+ "model": model,
2120
+ "experiment_type": use_case,
2121
+ "system_prompt": original_prompt,
2122
+ "max_tokens": max_tokens,
2123
+ "temperature": temperature,
2124
+ "parameters": {},
2125
+ }
2126
+ )
2127
+
2128
+ # Add temperature variations for original model
2129
+ if temperature != 0.0:
2130
+ experiments.append(
2131
+ {
2132
+ "name": f"{base_name}-Creative",
2133
+ "llm_type": "claude",
2134
+ "model": original_model,
2135
+ "experiment_type": use_case,
2136
+ "system_prompt": original_prompt,
2137
+ "max_tokens": max_tokens,
2138
+ "temperature": min(
2139
+ CREATIVE_TEMPERATURE_MAX,
2140
+ temperature + CREATIVE_TEMPERATURE_INCREMENT,
2141
+ ),
2142
+ "parameters": {},
2143
+ }
2144
+ )
2145
+
2146
+ if temperature != 0.0:
2147
+ experiments.append(
2148
+ {
2149
+ "name": f"{base_name}-Deterministic",
2150
+ "llm_type": "claude",
2151
+ "model": original_model,
2152
+ "experiment_type": use_case,
2153
+ "system_prompt": original_prompt,
2154
+ "max_tokens": max_tokens,
2155
+ "temperature": 0.0,
2156
+ "parameters": {},
2157
+ }
2158
+ )
2159
+
2160
+ # Create configuration structure
2161
+ groundtruth_name = Path(groundtruth_file).stem
2162
+ config = {
2163
+ "description": f"Configuration generated from groundtruth metadata: {groundtruth_name}",
2164
+ "source_groundtruth": groundtruth_file,
2165
+ "generated_at": datetime.now().isoformat(),
2166
+ "original_metadata": metadata,
2167
+ "experiments": experiments,
2168
+ }
2169
+
2170
+ # Save configuration
2171
+ output_path = Path(output_file)
2172
+ output_path.parent.mkdir(parents=True, exist_ok=True)
2173
+
2174
+ with open(output_path, "w", encoding="utf-8") as f:
2175
+ json.dump(config, f, indent=2, ensure_ascii=False)
2176
+
2177
+ self.log.info(
2178
+ f"Generated experiment configuration with {len(experiments)} experiments"
2179
+ )
2180
+ self.log.info(f"Configuration saved to: {output_path}")
2181
+
2182
+ return str(output_path)
2183
+
2184
+ except Exception as e:
2185
+ self.log.error(f"Error creating config from groundtruth: {e}")
2186
+ raise
2187
+
2188
+
2189
+ def main():
2190
+ """Command line interface for batch experiments."""
2191
+ import argparse
2192
+
2193
+ parser = argparse.ArgumentParser(
2194
+ description="Run batch experiments with different LLM configurations",
2195
+ formatter_class=argparse.RawDescriptionHelpFormatter,
2196
+ epilog="""
2197
+ Examples:
2198
+ # Create sample configuration file
2199
+ python -m gaia.eval.batch_experiment --create-sample-config experiment_config.json
2200
+
2201
+ # Run batch experiments on transcript directory
2202
+ python -m gaia.eval.batch_experiment -c experiment_config.json -i ./transcripts -o ./experiments
2203
+
2204
+ # Run batch experiments on transcript directory with custom queries from groundtruth
2205
+ python -m gaia.eval.batch_experiment -c experiment_config.json -i ./transcripts -q ./groundtruth/meeting.qa.groundtruth.json -o ./experiments
2206
+
2207
+ # Run batch experiments on groundtruth file
2208
+ python -m gaia.eval.batch_experiment -c experiment_config.json -i ./groundtruth/transcript.qa.groundtruth.json -o ./experiments
2209
+
2210
+ # Run batch experiments on consolidated groundtruth file
2211
+ python -m gaia.eval.batch_experiment -c experiment_config.json -i ./groundtruth/consolidated_summarization_groundtruth.json -o ./experiments
2212
+
2213
+ # Run with custom delay between requests
2214
+ python -m gaia.eval.batch_experiment -c experiment_config.json -i ./transcripts -o ./experiments --delay 2.0
2215
+ """,
2216
+ )
2217
+
2218
+ parser.add_argument(
2219
+ "-c", "--config", type=str, help="Path to experiment configuration JSON file"
2220
+ )
2221
+ parser.add_argument(
2222
+ "-i",
2223
+ "--input",
2224
+ type=str,
2225
+ help="Path to input data: transcript file, directory of transcripts, or groundtruth JSON file",
2226
+ )
2227
+ parser.add_argument(
2228
+ "-q",
2229
+ "--queries-source",
2230
+ type=str,
2231
+ help="Path to groundtruth JSON file to extract queries from (for QA experiments on raw transcripts)",
2232
+ )
2233
+ parser.add_argument(
2234
+ "-o",
2235
+ "--output-dir",
2236
+ type=str,
2237
+ default="./experiments",
2238
+ help="Output directory for experiment results (default: ./experiments)",
2239
+ )
2240
+ parser.add_argument(
2241
+ "--delay",
2242
+ type=float,
2243
+ default=1.0,
2244
+ help="Delay in seconds between requests to avoid rate limiting (default: 1.0)",
2245
+ )
2246
+ parser.add_argument(
2247
+ "--create-sample-config",
2248
+ type=str,
2249
+ help="Create a sample configuration file at the specified path",
2250
+ )
2251
+ parser.add_argument(
2252
+ "--create-config-from-groundtruth",
2253
+ type=str,
2254
+ help="Create configuration from groundtruth file metadata (provide groundtruth file path)",
2255
+ )
2256
+ parser.add_argument(
2257
+ "--force",
2258
+ action="store_true",
2259
+ help="Force regeneration of all experiments, even if they already exist (default: skip existing)",
2260
+ )
2261
+
2262
+ args = parser.parse_args()
2263
+
2264
+ # Create sample config if requested
2265
+ if args.create_sample_config:
2266
+ runner = BatchExperimentRunner.__new__(BatchExperimentRunner)
2267
+ runner.log = get_logger(__name__)
2268
+ runner.create_sample_config(args.create_sample_config)
2269
+ print(f"✅ Sample configuration created: {args.create_sample_config}")
2270
+ print("Edit this file to define your experiments, then run:")
2271
+ print(
2272
+ f" python -m gaia.eval.batch_experiment -c {args.create_sample_config} -i <input_path> -o <output_dir>"
2273
+ )
2274
+ return
2275
+
2276
+ # Create config from groundtruth if requested
2277
+ if args.create_config_from_groundtruth:
2278
+ # Determine output filename if not provided in the argument
2279
+ groundtruth_path = Path(args.create_config_from_groundtruth)
2280
+ default_output = f"{groundtruth_path.stem}.config.json"
2281
+
2282
+ runner = BatchExperimentRunner.__new__(BatchExperimentRunner)
2283
+ runner.log = get_logger(__name__)
2284
+ config_path = runner.create_config_from_groundtruth(
2285
+ args.create_config_from_groundtruth, default_output
2286
+ )
2287
+ print(f"✅ Configuration created from groundtruth metadata: {config_path}")
2288
+ print("Review and edit the configuration, then run:")
2289
+ print(
2290
+ f" python -m gaia.eval.batch_experiment -c {config_path} -i <input_path> -o <output_dir>"
2291
+ )
2292
+ return
2293
+
2294
+ # Validate required arguments
2295
+ if not args.config or not args.input:
2296
+ parser.error(
2297
+ "Both --config and --input are required (unless using --create-sample-config or --create-config-from-groundtruth)"
2298
+ )
2299
+
2300
+ # Run batch experiments
2301
+ runner = BatchExperimentRunner(args.config)
2302
+ # By default skip existing experiments, unless --force is specified
2303
+ skip_existing = not args.force
2304
+ result_files, skipped_count = runner.run_all_experiments(
2305
+ input_path=args.input,
2306
+ output_dir=args.output_dir,
2307
+ delay_seconds=args.delay,
2308
+ queries_source=args.queries_source,
2309
+ skip_existing=skip_existing,
2310
+ )
2311
+
2312
+ # Report results with skip information
2313
+ if skipped_count > 0:
2314
+ new_count = len(result_files) - skipped_count
2315
+ print(
2316
+ f"✅ Completed {len(result_files)} experiments ({new_count} new, {skipped_count} skipped)"
2317
+ )
2318
+ else:
2319
+ print(f"✅ Completed {len(result_files)} experiments")
2320
+
2321
+ print(f" Results saved to: {args.output_dir}")
2322
+ print(f" Generated files:")
2323
+ for result_file in result_files:
2324
+ print(f" - {Path(result_file).name}")
2325
+
2326
+ print(f"\nNext steps:")
2327
+ print(f" 1. Evaluate results using: gaia eval -f <result_file>")
2328
+ print(f" 2. Generate comparative report: gaia report -d {args.output_dir}")
2329
+
2330
+
2331
+ if __name__ == "__main__":
2332
+ exit(main())