ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (535) hide show
  1. agentui/.prettierrc +15 -0
  2. agentui/QUICKSTART.md +272 -0
  3. agentui/README.md +59 -0
  4. agentui/env.example +16 -0
  5. agentui/jsconfig.json +14 -0
  6. agentui/package-lock.json +4242 -0
  7. agentui/package.json +34 -0
  8. agentui/scripts/postinstall/apply-patches.mjs +260 -0
  9. agentui/src/app.css +61 -0
  10. agentui/src/app.d.ts +13 -0
  11. agentui/src/app.html +12 -0
  12. agentui/src/components/LoadingSpinner.svelte +64 -0
  13. agentui/src/components/ThemeSwitcher.svelte +159 -0
  14. agentui/src/components/index.js +4 -0
  15. agentui/src/lib/api/bots.ts +60 -0
  16. agentui/src/lib/api/chat.ts +22 -0
  17. agentui/src/lib/api/http.ts +25 -0
  18. agentui/src/lib/components/BotCard.svelte +33 -0
  19. agentui/src/lib/components/ChatBubble.svelte +63 -0
  20. agentui/src/lib/components/Toast.svelte +21 -0
  21. agentui/src/lib/config.ts +20 -0
  22. agentui/src/lib/stores/auth.svelte.ts +73 -0
  23. agentui/src/lib/stores/theme.svelte.js +64 -0
  24. agentui/src/lib/stores/toast.svelte.ts +31 -0
  25. agentui/src/lib/utils/conversation.ts +39 -0
  26. agentui/src/routes/+layout.svelte +20 -0
  27. agentui/src/routes/+page.svelte +232 -0
  28. agentui/src/routes/login/+page.svelte +200 -0
  29. agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
  30. agentui/src/routes/talk/[agentId]/+page.ts +7 -0
  31. agentui/static/README.md +1 -0
  32. agentui/svelte.config.js +11 -0
  33. agentui/tailwind.config.ts +53 -0
  34. agentui/tsconfig.json +3 -0
  35. agentui/vite.config.ts +10 -0
  36. ai_parrot-0.17.2.dist-info/METADATA +472 -0
  37. ai_parrot-0.17.2.dist-info/RECORD +535 -0
  38. ai_parrot-0.17.2.dist-info/WHEEL +6 -0
  39. ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
  40. ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
  41. ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
  42. crew-builder/.prettierrc +15 -0
  43. crew-builder/QUICKSTART.md +259 -0
  44. crew-builder/README.md +113 -0
  45. crew-builder/env.example +17 -0
  46. crew-builder/jsconfig.json +14 -0
  47. crew-builder/package-lock.json +4182 -0
  48. crew-builder/package.json +37 -0
  49. crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
  50. crew-builder/src/app.css +62 -0
  51. crew-builder/src/app.d.ts +13 -0
  52. crew-builder/src/app.html +12 -0
  53. crew-builder/src/components/LoadingSpinner.svelte +64 -0
  54. crew-builder/src/components/ThemeSwitcher.svelte +149 -0
  55. crew-builder/src/components/index.js +9 -0
  56. crew-builder/src/lib/api/bots.ts +60 -0
  57. crew-builder/src/lib/api/chat.ts +80 -0
  58. crew-builder/src/lib/api/client.ts +56 -0
  59. crew-builder/src/lib/api/crew/crew.ts +136 -0
  60. crew-builder/src/lib/api/index.ts +5 -0
  61. crew-builder/src/lib/api/o365/auth.ts +65 -0
  62. crew-builder/src/lib/auth/auth.ts +54 -0
  63. crew-builder/src/lib/components/AgentNode.svelte +43 -0
  64. crew-builder/src/lib/components/BotCard.svelte +33 -0
  65. crew-builder/src/lib/components/ChatBubble.svelte +67 -0
  66. crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
  67. crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
  68. crew-builder/src/lib/components/JsonViewer.svelte +24 -0
  69. crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
  70. crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
  71. crew-builder/src/lib/components/Toast.svelte +67 -0
  72. crew-builder/src/lib/components/Toolbar.svelte +157 -0
  73. crew-builder/src/lib/components/index.ts +10 -0
  74. crew-builder/src/lib/config.ts +8 -0
  75. crew-builder/src/lib/stores/auth.svelte.ts +228 -0
  76. crew-builder/src/lib/stores/crewStore.ts +369 -0
  77. crew-builder/src/lib/stores/theme.svelte.js +145 -0
  78. crew-builder/src/lib/stores/toast.svelte.ts +69 -0
  79. crew-builder/src/lib/utils/conversation.ts +39 -0
  80. crew-builder/src/lib/utils/markdown.ts +122 -0
  81. crew-builder/src/lib/utils/talkHistory.ts +47 -0
  82. crew-builder/src/routes/+layout.svelte +20 -0
  83. crew-builder/src/routes/+page.svelte +539 -0
  84. crew-builder/src/routes/agents/+page.svelte +247 -0
  85. crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
  86. crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
  87. crew-builder/src/routes/builder/+page.svelte +204 -0
  88. crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
  89. crew-builder/src/routes/crew/ask/+page.ts +1 -0
  90. crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
  91. crew-builder/src/routes/login/+page.svelte +197 -0
  92. crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
  93. crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
  94. crew-builder/static/README.md +1 -0
  95. crew-builder/svelte.config.js +11 -0
  96. crew-builder/tailwind.config.ts +53 -0
  97. crew-builder/tsconfig.json +3 -0
  98. crew-builder/vite.config.ts +10 -0
  99. mcp_servers/calculator_server.py +309 -0
  100. parrot/__init__.py +27 -0
  101. parrot/__pycache__/__init__.cpython-310.pyc +0 -0
  102. parrot/__pycache__/version.cpython-310.pyc +0 -0
  103. parrot/_version.py +34 -0
  104. parrot/a2a/__init__.py +48 -0
  105. parrot/a2a/client.py +658 -0
  106. parrot/a2a/discovery.py +89 -0
  107. parrot/a2a/mixin.py +257 -0
  108. parrot/a2a/models.py +376 -0
  109. parrot/a2a/server.py +770 -0
  110. parrot/agents/__init__.py +29 -0
  111. parrot/bots/__init__.py +12 -0
  112. parrot/bots/a2a_agent.py +19 -0
  113. parrot/bots/abstract.py +3139 -0
  114. parrot/bots/agent.py +1129 -0
  115. parrot/bots/basic.py +9 -0
  116. parrot/bots/chatbot.py +669 -0
  117. parrot/bots/data.py +1618 -0
  118. parrot/bots/database/__init__.py +5 -0
  119. parrot/bots/database/abstract.py +3071 -0
  120. parrot/bots/database/cache.py +286 -0
  121. parrot/bots/database/models.py +468 -0
  122. parrot/bots/database/prompts.py +154 -0
  123. parrot/bots/database/retries.py +98 -0
  124. parrot/bots/database/router.py +269 -0
  125. parrot/bots/database/sql.py +41 -0
  126. parrot/bots/db/__init__.py +6 -0
  127. parrot/bots/db/abstract.py +556 -0
  128. parrot/bots/db/bigquery.py +602 -0
  129. parrot/bots/db/cache.py +85 -0
  130. parrot/bots/db/documentdb.py +668 -0
  131. parrot/bots/db/elastic.py +1014 -0
  132. parrot/bots/db/influx.py +898 -0
  133. parrot/bots/db/mock.py +96 -0
  134. parrot/bots/db/multi.py +783 -0
  135. parrot/bots/db/prompts.py +185 -0
  136. parrot/bots/db/sql.py +1255 -0
  137. parrot/bots/db/tools.py +212 -0
  138. parrot/bots/document.py +680 -0
  139. parrot/bots/hrbot.py +15 -0
  140. parrot/bots/kb.py +170 -0
  141. parrot/bots/mcp.py +36 -0
  142. parrot/bots/orchestration/README.md +463 -0
  143. parrot/bots/orchestration/__init__.py +1 -0
  144. parrot/bots/orchestration/agent.py +155 -0
  145. parrot/bots/orchestration/crew.py +3330 -0
  146. parrot/bots/orchestration/fsm.py +1179 -0
  147. parrot/bots/orchestration/hr.py +434 -0
  148. parrot/bots/orchestration/storage/__init__.py +4 -0
  149. parrot/bots/orchestration/storage/memory.py +100 -0
  150. parrot/bots/orchestration/storage/mixin.py +119 -0
  151. parrot/bots/orchestration/verify.py +202 -0
  152. parrot/bots/product.py +204 -0
  153. parrot/bots/prompts/__init__.py +96 -0
  154. parrot/bots/prompts/agents.py +155 -0
  155. parrot/bots/prompts/data.py +216 -0
  156. parrot/bots/prompts/output_generation.py +8 -0
  157. parrot/bots/scraper/__init__.py +3 -0
  158. parrot/bots/scraper/models.py +122 -0
  159. parrot/bots/scraper/scraper.py +1173 -0
  160. parrot/bots/scraper/templates.py +115 -0
  161. parrot/bots/stores/__init__.py +5 -0
  162. parrot/bots/stores/local.py +172 -0
  163. parrot/bots/webdev.py +81 -0
  164. parrot/cli.py +17 -0
  165. parrot/clients/__init__.py +16 -0
  166. parrot/clients/base.py +1491 -0
  167. parrot/clients/claude.py +1191 -0
  168. parrot/clients/factory.py +129 -0
  169. parrot/clients/google.py +4567 -0
  170. parrot/clients/gpt.py +1975 -0
  171. parrot/clients/grok.py +432 -0
  172. parrot/clients/groq.py +986 -0
  173. parrot/clients/hf.py +582 -0
  174. parrot/clients/models.py +18 -0
  175. parrot/conf.py +395 -0
  176. parrot/embeddings/__init__.py +9 -0
  177. parrot/embeddings/base.py +157 -0
  178. parrot/embeddings/google.py +98 -0
  179. parrot/embeddings/huggingface.py +74 -0
  180. parrot/embeddings/openai.py +84 -0
  181. parrot/embeddings/processor.py +88 -0
  182. parrot/exceptions.c +13868 -0
  183. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  184. parrot/exceptions.pxd +22 -0
  185. parrot/exceptions.pxi +15 -0
  186. parrot/exceptions.pyx +44 -0
  187. parrot/generators/__init__.py +29 -0
  188. parrot/generators/base.py +200 -0
  189. parrot/generators/html.py +293 -0
  190. parrot/generators/react.py +205 -0
  191. parrot/generators/streamlit.py +203 -0
  192. parrot/generators/template.py +105 -0
  193. parrot/handlers/__init__.py +4 -0
  194. parrot/handlers/agent.py +861 -0
  195. parrot/handlers/agents/__init__.py +1 -0
  196. parrot/handlers/agents/abstract.py +900 -0
  197. parrot/handlers/bots.py +338 -0
  198. parrot/handlers/chat.py +915 -0
  199. parrot/handlers/creation.sql +192 -0
  200. parrot/handlers/crew/ARCHITECTURE.md +362 -0
  201. parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
  202. parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
  203. parrot/handlers/crew/__init__.py +0 -0
  204. parrot/handlers/crew/handler.py +801 -0
  205. parrot/handlers/crew/models.py +229 -0
  206. parrot/handlers/crew/redis_persistence.py +523 -0
  207. parrot/handlers/jobs/__init__.py +10 -0
  208. parrot/handlers/jobs/job.py +384 -0
  209. parrot/handlers/jobs/mixin.py +627 -0
  210. parrot/handlers/jobs/models.py +115 -0
  211. parrot/handlers/jobs/worker.py +31 -0
  212. parrot/handlers/models.py +596 -0
  213. parrot/handlers/o365_auth.py +105 -0
  214. parrot/handlers/stream.py +337 -0
  215. parrot/interfaces/__init__.py +6 -0
  216. parrot/interfaces/aws.py +143 -0
  217. parrot/interfaces/credentials.py +113 -0
  218. parrot/interfaces/database.py +27 -0
  219. parrot/interfaces/google.py +1123 -0
  220. parrot/interfaces/hierarchy.py +1227 -0
  221. parrot/interfaces/http.py +651 -0
  222. parrot/interfaces/images/__init__.py +0 -0
  223. parrot/interfaces/images/plugins/__init__.py +24 -0
  224. parrot/interfaces/images/plugins/abstract.py +58 -0
  225. parrot/interfaces/images/plugins/analisys.py +148 -0
  226. parrot/interfaces/images/plugins/classify.py +150 -0
  227. parrot/interfaces/images/plugins/classifybase.py +182 -0
  228. parrot/interfaces/images/plugins/detect.py +150 -0
  229. parrot/interfaces/images/plugins/exif.py +1103 -0
  230. parrot/interfaces/images/plugins/hash.py +52 -0
  231. parrot/interfaces/images/plugins/vision.py +104 -0
  232. parrot/interfaces/images/plugins/yolo.py +66 -0
  233. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  234. parrot/interfaces/o365.py +978 -0
  235. parrot/interfaces/onedrive.py +822 -0
  236. parrot/interfaces/sharepoint.py +1435 -0
  237. parrot/interfaces/soap.py +257 -0
  238. parrot/loaders/__init__.py +8 -0
  239. parrot/loaders/abstract.py +1131 -0
  240. parrot/loaders/audio.py +199 -0
  241. parrot/loaders/basepdf.py +53 -0
  242. parrot/loaders/basevideo.py +1568 -0
  243. parrot/loaders/csv.py +409 -0
  244. parrot/loaders/docx.py +116 -0
  245. parrot/loaders/epubloader.py +316 -0
  246. parrot/loaders/excel.py +199 -0
  247. parrot/loaders/factory.py +55 -0
  248. parrot/loaders/files/__init__.py +0 -0
  249. parrot/loaders/files/abstract.py +39 -0
  250. parrot/loaders/files/html.py +26 -0
  251. parrot/loaders/files/text.py +63 -0
  252. parrot/loaders/html.py +152 -0
  253. parrot/loaders/markdown.py +442 -0
  254. parrot/loaders/pdf.py +373 -0
  255. parrot/loaders/pdfmark.py +320 -0
  256. parrot/loaders/pdftables.py +506 -0
  257. parrot/loaders/ppt.py +476 -0
  258. parrot/loaders/qa.py +63 -0
  259. parrot/loaders/splitters/__init__.py +10 -0
  260. parrot/loaders/splitters/base.py +138 -0
  261. parrot/loaders/splitters/md.py +228 -0
  262. parrot/loaders/splitters/token.py +143 -0
  263. parrot/loaders/txt.py +26 -0
  264. parrot/loaders/video.py +89 -0
  265. parrot/loaders/videolocal.py +218 -0
  266. parrot/loaders/videounderstanding.py +377 -0
  267. parrot/loaders/vimeo.py +167 -0
  268. parrot/loaders/web.py +599 -0
  269. parrot/loaders/youtube.py +504 -0
  270. parrot/manager/__init__.py +5 -0
  271. parrot/manager/manager.py +1030 -0
  272. parrot/mcp/__init__.py +28 -0
  273. parrot/mcp/adapter.py +105 -0
  274. parrot/mcp/cli.py +174 -0
  275. parrot/mcp/client.py +119 -0
  276. parrot/mcp/config.py +75 -0
  277. parrot/mcp/integration.py +842 -0
  278. parrot/mcp/oauth.py +933 -0
  279. parrot/mcp/server.py +225 -0
  280. parrot/mcp/transports/__init__.py +3 -0
  281. parrot/mcp/transports/base.py +279 -0
  282. parrot/mcp/transports/grpc_session.py +163 -0
  283. parrot/mcp/transports/http.py +312 -0
  284. parrot/mcp/transports/mcp.proto +108 -0
  285. parrot/mcp/transports/quic.py +1082 -0
  286. parrot/mcp/transports/sse.py +330 -0
  287. parrot/mcp/transports/stdio.py +309 -0
  288. parrot/mcp/transports/unix.py +395 -0
  289. parrot/mcp/transports/websocket.py +547 -0
  290. parrot/memory/__init__.py +16 -0
  291. parrot/memory/abstract.py +209 -0
  292. parrot/memory/agent.py +32 -0
  293. parrot/memory/cache.py +175 -0
  294. parrot/memory/core.py +555 -0
  295. parrot/memory/file.py +153 -0
  296. parrot/memory/mem.py +131 -0
  297. parrot/memory/redis.py +613 -0
  298. parrot/models/__init__.py +46 -0
  299. parrot/models/basic.py +118 -0
  300. parrot/models/compliance.py +208 -0
  301. parrot/models/crew.py +395 -0
  302. parrot/models/detections.py +654 -0
  303. parrot/models/generation.py +85 -0
  304. parrot/models/google.py +223 -0
  305. parrot/models/groq.py +23 -0
  306. parrot/models/openai.py +30 -0
  307. parrot/models/outputs.py +285 -0
  308. parrot/models/responses.py +938 -0
  309. parrot/notifications/__init__.py +743 -0
  310. parrot/openapi/__init__.py +3 -0
  311. parrot/openapi/components.yaml +641 -0
  312. parrot/openapi/config.py +322 -0
  313. parrot/outputs/__init__.py +32 -0
  314. parrot/outputs/formats/__init__.py +108 -0
  315. parrot/outputs/formats/altair.py +359 -0
  316. parrot/outputs/formats/application.py +122 -0
  317. parrot/outputs/formats/base.py +351 -0
  318. parrot/outputs/formats/bokeh.py +356 -0
  319. parrot/outputs/formats/card.py +424 -0
  320. parrot/outputs/formats/chart.py +436 -0
  321. parrot/outputs/formats/d3.py +255 -0
  322. parrot/outputs/formats/echarts.py +310 -0
  323. parrot/outputs/formats/generators/__init__.py +0 -0
  324. parrot/outputs/formats/generators/abstract.py +61 -0
  325. parrot/outputs/formats/generators/panel.py +145 -0
  326. parrot/outputs/formats/generators/streamlit.py +86 -0
  327. parrot/outputs/formats/generators/terminal.py +63 -0
  328. parrot/outputs/formats/holoviews.py +310 -0
  329. parrot/outputs/formats/html.py +147 -0
  330. parrot/outputs/formats/jinja2.py +46 -0
  331. parrot/outputs/formats/json.py +87 -0
  332. parrot/outputs/formats/map.py +933 -0
  333. parrot/outputs/formats/markdown.py +172 -0
  334. parrot/outputs/formats/matplotlib.py +237 -0
  335. parrot/outputs/formats/mixins/__init__.py +0 -0
  336. parrot/outputs/formats/mixins/emaps.py +855 -0
  337. parrot/outputs/formats/plotly.py +341 -0
  338. parrot/outputs/formats/seaborn.py +310 -0
  339. parrot/outputs/formats/table.py +397 -0
  340. parrot/outputs/formats/template_report.py +138 -0
  341. parrot/outputs/formats/yaml.py +125 -0
  342. parrot/outputs/formatter.py +152 -0
  343. parrot/outputs/templates/__init__.py +95 -0
  344. parrot/pipelines/__init__.py +0 -0
  345. parrot/pipelines/abstract.py +210 -0
  346. parrot/pipelines/detector.py +124 -0
  347. parrot/pipelines/models.py +90 -0
  348. parrot/pipelines/planogram.py +3002 -0
  349. parrot/pipelines/table.sql +97 -0
  350. parrot/plugins/__init__.py +106 -0
  351. parrot/plugins/importer.py +80 -0
  352. parrot/py.typed +0 -0
  353. parrot/registry/__init__.py +18 -0
  354. parrot/registry/registry.py +594 -0
  355. parrot/scheduler/__init__.py +1189 -0
  356. parrot/scheduler/models.py +60 -0
  357. parrot/security/__init__.py +16 -0
  358. parrot/security/prompt_injection.py +268 -0
  359. parrot/security/security_events.sql +25 -0
  360. parrot/services/__init__.py +1 -0
  361. parrot/services/mcp/__init__.py +8 -0
  362. parrot/services/mcp/config.py +13 -0
  363. parrot/services/mcp/server.py +295 -0
  364. parrot/services/o365_remote_auth.py +235 -0
  365. parrot/stores/__init__.py +7 -0
  366. parrot/stores/abstract.py +352 -0
  367. parrot/stores/arango.py +1090 -0
  368. parrot/stores/bigquery.py +1377 -0
  369. parrot/stores/cache.py +106 -0
  370. parrot/stores/empty.py +10 -0
  371. parrot/stores/faiss_store.py +1157 -0
  372. parrot/stores/kb/__init__.py +9 -0
  373. parrot/stores/kb/abstract.py +68 -0
  374. parrot/stores/kb/cache.py +165 -0
  375. parrot/stores/kb/doc.py +325 -0
  376. parrot/stores/kb/hierarchy.py +346 -0
  377. parrot/stores/kb/local.py +457 -0
  378. parrot/stores/kb/prompt.py +28 -0
  379. parrot/stores/kb/redis.py +659 -0
  380. parrot/stores/kb/store.py +115 -0
  381. parrot/stores/kb/user.py +374 -0
  382. parrot/stores/models.py +59 -0
  383. parrot/stores/pgvector.py +3 -0
  384. parrot/stores/postgres.py +2853 -0
  385. parrot/stores/utils/__init__.py +0 -0
  386. parrot/stores/utils/chunking.py +197 -0
  387. parrot/telemetry/__init__.py +3 -0
  388. parrot/telemetry/mixin.py +111 -0
  389. parrot/template/__init__.py +3 -0
  390. parrot/template/engine.py +259 -0
  391. parrot/tools/__init__.py +23 -0
  392. parrot/tools/abstract.py +644 -0
  393. parrot/tools/agent.py +363 -0
  394. parrot/tools/arangodbsearch.py +537 -0
  395. parrot/tools/arxiv_tool.py +188 -0
  396. parrot/tools/calculator/__init__.py +3 -0
  397. parrot/tools/calculator/operations/__init__.py +38 -0
  398. parrot/tools/calculator/operations/calculus.py +80 -0
  399. parrot/tools/calculator/operations/statistics.py +76 -0
  400. parrot/tools/calculator/tool.py +150 -0
  401. parrot/tools/cloudwatch.py +988 -0
  402. parrot/tools/codeinterpreter/__init__.py +127 -0
  403. parrot/tools/codeinterpreter/executor.py +371 -0
  404. parrot/tools/codeinterpreter/internals.py +473 -0
  405. parrot/tools/codeinterpreter/models.py +643 -0
  406. parrot/tools/codeinterpreter/prompts.py +224 -0
  407. parrot/tools/codeinterpreter/tool.py +664 -0
  408. parrot/tools/company_info/__init__.py +6 -0
  409. parrot/tools/company_info/tool.py +1138 -0
  410. parrot/tools/correlationanalysis.py +437 -0
  411. parrot/tools/database/abstract.py +286 -0
  412. parrot/tools/database/bq.py +115 -0
  413. parrot/tools/database/cache.py +284 -0
  414. parrot/tools/database/models.py +95 -0
  415. parrot/tools/database/pg.py +343 -0
  416. parrot/tools/databasequery.py +1159 -0
  417. parrot/tools/db.py +1800 -0
  418. parrot/tools/ddgo.py +370 -0
  419. parrot/tools/decorators.py +271 -0
  420. parrot/tools/dftohtml.py +282 -0
  421. parrot/tools/document.py +549 -0
  422. parrot/tools/ecs.py +819 -0
  423. parrot/tools/edareport.py +368 -0
  424. parrot/tools/elasticsearch.py +1049 -0
  425. parrot/tools/employees.py +462 -0
  426. parrot/tools/epson/__init__.py +96 -0
  427. parrot/tools/excel.py +683 -0
  428. parrot/tools/file/__init__.py +13 -0
  429. parrot/tools/file/abstract.py +76 -0
  430. parrot/tools/file/gcs.py +378 -0
  431. parrot/tools/file/local.py +284 -0
  432. parrot/tools/file/s3.py +511 -0
  433. parrot/tools/file/tmp.py +309 -0
  434. parrot/tools/file/tool.py +501 -0
  435. parrot/tools/file_reader.py +129 -0
  436. parrot/tools/flowtask/__init__.py +19 -0
  437. parrot/tools/flowtask/tool.py +761 -0
  438. parrot/tools/gittoolkit.py +508 -0
  439. parrot/tools/google/__init__.py +18 -0
  440. parrot/tools/google/base.py +169 -0
  441. parrot/tools/google/tools.py +1251 -0
  442. parrot/tools/googlelocation.py +5 -0
  443. parrot/tools/googleroutes.py +5 -0
  444. parrot/tools/googlesearch.py +5 -0
  445. parrot/tools/googlesitesearch.py +5 -0
  446. parrot/tools/googlevoice.py +2 -0
  447. parrot/tools/gvoice.py +695 -0
  448. parrot/tools/ibisworld/README.md +225 -0
  449. parrot/tools/ibisworld/__init__.py +11 -0
  450. parrot/tools/ibisworld/tool.py +366 -0
  451. parrot/tools/jiratoolkit.py +1718 -0
  452. parrot/tools/manager.py +1098 -0
  453. parrot/tools/math.py +152 -0
  454. parrot/tools/metadata.py +476 -0
  455. parrot/tools/msteams.py +1621 -0
  456. parrot/tools/msword.py +635 -0
  457. parrot/tools/multidb.py +580 -0
  458. parrot/tools/multistoresearch.py +369 -0
  459. parrot/tools/networkninja.py +167 -0
  460. parrot/tools/nextstop/__init__.py +4 -0
  461. parrot/tools/nextstop/base.py +286 -0
  462. parrot/tools/nextstop/employee.py +733 -0
  463. parrot/tools/nextstop/store.py +462 -0
  464. parrot/tools/notification.py +435 -0
  465. parrot/tools/o365/__init__.py +42 -0
  466. parrot/tools/o365/base.py +295 -0
  467. parrot/tools/o365/bundle.py +522 -0
  468. parrot/tools/o365/events.py +554 -0
  469. parrot/tools/o365/mail.py +992 -0
  470. parrot/tools/o365/onedrive.py +497 -0
  471. parrot/tools/o365/sharepoint.py +641 -0
  472. parrot/tools/openapi_toolkit.py +904 -0
  473. parrot/tools/openweather.py +527 -0
  474. parrot/tools/pdfprint.py +1001 -0
  475. parrot/tools/powerbi.py +518 -0
  476. parrot/tools/powerpoint.py +1113 -0
  477. parrot/tools/pricestool.py +146 -0
  478. parrot/tools/products/__init__.py +246 -0
  479. parrot/tools/prophet_tool.py +171 -0
  480. parrot/tools/pythonpandas.py +630 -0
  481. parrot/tools/pythonrepl.py +910 -0
  482. parrot/tools/qsource.py +436 -0
  483. parrot/tools/querytoolkit.py +395 -0
  484. parrot/tools/quickeda.py +827 -0
  485. parrot/tools/resttool.py +553 -0
  486. parrot/tools/retail/__init__.py +0 -0
  487. parrot/tools/retail/bby.py +528 -0
  488. parrot/tools/sandboxtool.py +703 -0
  489. parrot/tools/sassie/__init__.py +352 -0
  490. parrot/tools/scraping/__init__.py +7 -0
  491. parrot/tools/scraping/docs/select.md +466 -0
  492. parrot/tools/scraping/documentation.md +1278 -0
  493. parrot/tools/scraping/driver.py +436 -0
  494. parrot/tools/scraping/models.py +576 -0
  495. parrot/tools/scraping/options.py +85 -0
  496. parrot/tools/scraping/orchestrator.py +517 -0
  497. parrot/tools/scraping/readme.md +740 -0
  498. parrot/tools/scraping/tool.py +3115 -0
  499. parrot/tools/seasonaldetection.py +642 -0
  500. parrot/tools/shell_tool/__init__.py +5 -0
  501. parrot/tools/shell_tool/actions.py +408 -0
  502. parrot/tools/shell_tool/engine.py +155 -0
  503. parrot/tools/shell_tool/models.py +322 -0
  504. parrot/tools/shell_tool/tool.py +442 -0
  505. parrot/tools/site_search.py +214 -0
  506. parrot/tools/textfile.py +418 -0
  507. parrot/tools/think.py +378 -0
  508. parrot/tools/toolkit.py +298 -0
  509. parrot/tools/webapp_tool.py +187 -0
  510. parrot/tools/whatif.py +1279 -0
  511. parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
  512. parrot/tools/workday/__init__.py +6 -0
  513. parrot/tools/workday/models.py +1389 -0
  514. parrot/tools/workday/tool.py +1293 -0
  515. parrot/tools/yfinance_tool.py +306 -0
  516. parrot/tools/zipcode.py +217 -0
  517. parrot/utils/__init__.py +2 -0
  518. parrot/utils/helpers.py +73 -0
  519. parrot/utils/parsers/__init__.py +5 -0
  520. parrot/utils/parsers/toml.c +12078 -0
  521. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  522. parrot/utils/parsers/toml.pyx +21 -0
  523. parrot/utils/toml.py +11 -0
  524. parrot/utils/types.cpp +20936 -0
  525. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  526. parrot/utils/types.pyx +213 -0
  527. parrot/utils/uv.py +11 -0
  528. parrot/version.py +10 -0
  529. parrot/yaml-rs/Cargo.lock +350 -0
  530. parrot/yaml-rs/Cargo.toml +19 -0
  531. parrot/yaml-rs/pyproject.toml +19 -0
  532. parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
  533. parrot/yaml-rs/src/lib.rs +222 -0
  534. requirements/docker-compose.yml +24 -0
  535. requirements/requirements-dev.txt +21 -0
@@ -0,0 +1,218 @@
1
+ from typing import Union, List
2
+ from collections.abc import Callable
3
+ import re
4
+ from pathlib import PurePath
5
+ from ..stores.models import Document
6
+ from .basevideo import BaseVideoLoader
7
+
8
+
9
+ def split_text(text, max_length):
10
+ """Split text into chunks of a maximum length, ensuring not to break words."""
11
+ # Split the transcript into paragraphs
12
+ paragraphs = text.split('\n\n')
13
+ chunks = []
14
+ current_chunk = ""
15
+ for paragraph in paragraphs:
16
+ # If the paragraph is too large, split it into sentences
17
+ if len(paragraph) > max_length:
18
+ # Split paragraph into sentences
19
+ sentences = re.split(r'(?<=[.!?]) +', paragraph)
20
+ for sentence in sentences:
21
+ if len(current_chunk) + len(sentence) + 1 > max_length:
22
+ # Save the current chunk and start a new one
23
+ chunks.append(current_chunk.strip())
24
+ current_chunk = sentence
25
+ else:
26
+ # Add sentence to the current chunk
27
+ current_chunk += " " + sentence
28
+ else:
29
+ # If adding the paragraph exceeds max size, start a new chunk
30
+ if len(current_chunk) + len(paragraph) + 2 > max_length:
31
+ chunks.append(current_chunk.strip())
32
+ current_chunk = paragraph
33
+ else:
34
+ # Add paragraph to the current chunk
35
+ current_chunk += "\n\n" + paragraph
36
+ # Add any remaining text to chunks
37
+ if current_chunk.strip():
38
+ chunks.append(current_chunk.strip())
39
+
40
+ return chunks
41
+
42
+
43
+ class VideoLocalLoader(BaseVideoLoader):
44
+ """
45
+ Generating Video transcripts from local Videos.
46
+ """
47
+ extensions: List[str] = ['.mp4', '.webm']
48
+
49
+ def __init__(
50
+ self,
51
+ *args,
52
+ source: List[Union[str, PurePath]] = None,
53
+ tokenizer: Union[str, Callable] = None,
54
+ text_splitter: Union[str, Callable] = None,
55
+ source_type: str = 'video',
56
+ **kwargs
57
+ ):
58
+ super().__init__(
59
+ source=source,
60
+ tokenizer=tokenizer,
61
+ text_splitter=text_splitter,
62
+ source_type=source_type,
63
+ **kwargs
64
+ )
65
+ self.extract_frames: bool = kwargs.pop('extract_frames', False)
66
+ self.seconds_per_frame: int = kwargs.pop('seconds_per_frame', 1)
67
+ self.compress_speed: bool = kwargs.pop('compress_speed', False)
68
+ self.speed_factor: float = kwargs.pop('speed_factor', 1.5)
69
+
70
+ async def _load(self, path: Union[str, PurePath, List[PurePath]], **kwargs) -> List[Document]:
71
+ metadata = {
72
+ "url": f"{path}",
73
+ "source": f"{path}",
74
+ "filename": f"{path.name}",
75
+ "question": '',
76
+ "answer": '',
77
+ 'type': 'video_transcript',
78
+ "source_type": self._source_type,
79
+ "data": {},
80
+ "summary": '',
81
+ "document_meta": {
82
+ "language": self._language,
83
+ "topic_tags": ""
84
+ }
85
+ }
86
+ documents = []
87
+ transcript_path = path.with_suffix('.txt')
88
+ vtt_path = path.with_suffix('.vtt')
89
+ srt_path = path.with_suffix(".srt")
90
+ summary_path = path.with_suffix('.summary')
91
+ audio_path = path.with_suffix('.wav')
92
+ # second: extract audio from File
93
+ try:
94
+ self.extract_audio(
95
+ path,
96
+ audio_path,
97
+ compress_speed=self.compress_speed,
98
+ speed_factor=self.speed_factor
99
+ )
100
+ except Exception as exc:
101
+ print(f"Error extracting audio from video: {exc}")
102
+ raise
103
+ transcript = ''
104
+ try:
105
+ # ensure a clean 16k Hz mono wav file for whisper
106
+ wav_path = self.ensure_wav_16k_mono(audio_path)
107
+ # get the Whisper parser
108
+ transcript_whisper = self.get_whisperx_transcript(wav_path)
109
+ transcript = transcript_whisper.get('text', '') if transcript_whisper else ''
110
+ except Exception as exc:
111
+ print(f"Error transcribing audio from video: {exc}")
112
+ raise
113
+ # diarization:
114
+ if self._diarization:
115
+ if (srt := self.audio_to_srt(
116
+ audio_path=wav_path,
117
+ asr=transcript_whisper,
118
+ output_srt_path=srt_path,
119
+ max_gap_s=0.5,
120
+ max_chars=90,
121
+ max_duration_s=0.9,
122
+ )):
123
+ doc = Document(
124
+ page_content=srt,
125
+ metadata={
126
+ "source": f"{srt_path}",
127
+ "url": f"{srt_path.name}",
128
+ "filename": f"{srt_path}",
129
+ "origin": f"{path}",
130
+ 'type': 'srt_transcript',
131
+ "source_type": 'AUDIO',
132
+ }
133
+ )
134
+ # Summarize the transcript
135
+ if transcript:
136
+ # first: extract summary, saving summary as a document:
137
+ summary = await self.summary_from_text(transcript)
138
+ self.saving_file(summary_path, summary.encode('utf-8'))
139
+ # second: saving transcript to a file:
140
+ self.saving_file(transcript_path, transcript.encode('utf-8'))
141
+ # Create Three Documents:
142
+ # one is for transcript
143
+ # split document only if size > 65.534
144
+ if len(transcript) > 65534:
145
+ # Split transcript into chunks
146
+ transcript_chunks = split_text(transcript, 32767)
147
+ for chunk in transcript_chunks:
148
+ doc = Document(
149
+ page_content=chunk,
150
+ metadata=metadata
151
+ )
152
+ documents.append(doc)
153
+ else:
154
+ doc = Document(
155
+ page_content=transcript,
156
+ metadata=metadata
157
+ )
158
+ documents.append(doc)
159
+ # second is Summary
160
+ if summary:
161
+ _meta = {
162
+ **metadata,
163
+ "type": 'video summary'
164
+ }
165
+ doc = Document(
166
+ page_content=summary,
167
+ metadata=_meta
168
+ )
169
+ # Third is VTT:
170
+ if transcript_whisper:
171
+ # VTT version:
172
+ vtt_text = self.transcript_to_vtt(transcript_whisper, vtt_path)
173
+ _meta = {
174
+ **metadata,
175
+ "type": 'video subte vtt'
176
+ }
177
+ if len(vtt_text) > 65535:
178
+ transcript_chunks = split_text(vtt_text, 65535)
179
+ for chunk in transcript_chunks:
180
+ doc = Document(
181
+ page_content=chunk,
182
+ metadata=_meta
183
+ )
184
+ documents.append(doc)
185
+ else:
186
+ doc = Document(
187
+ page_content=vtt_text,
188
+ metadata=_meta
189
+ )
190
+ documents.append(doc)
191
+ # Saving every dialog chunk as a separate document
192
+ dialogs = self.transcript_to_blocks(transcript_whisper)
193
+ docs = []
194
+ for chunk in dialogs:
195
+ start_time = chunk['start_time']
196
+ _meta = {
197
+ "source": f"{path.name}: min. {start_time}",
198
+ "type": "video dialog",
199
+ "document_meta": {
200
+ "start": f"{start_time}",
201
+ "end": f"{chunk['end_time']}",
202
+ "id": f"{chunk['id']}",
203
+ "language": self._language,
204
+ "title": f"{path.stem}",
205
+ "topic_tags": ""
206
+ }
207
+ }
208
+ _info = {**metadata, **_meta}
209
+ doc = Document(
210
+ page_content=chunk['text'],
211
+ metadata=_info
212
+ )
213
+ docs.append(doc)
214
+ documents.extend(docs)
215
+ return documents
216
+
217
+ async def load_video(self, url: str, video_title: str, transcript: str) -> list:
218
+ pass
@@ -0,0 +1,377 @@
1
+ from typing import Union, List, Optional
2
+ from collections.abc import Callable
3
+ import re
4
+ import json
5
+ from pathlib import PurePath, Path
6
+ from datetime import datetime
7
+ from ..stores.models import Document
8
+ from .basevideo import BaseVideoLoader
9
+ from ..clients.google import GoogleGenAIClient
10
+ from ..models.google import GoogleModel
11
+
12
+
13
+ def split_text(text, max_length):
14
+ """Split text into chunks of a maximum length, ensuring not to break words."""
15
+ # Split the transcript into paragraphs
16
+ paragraphs = text.split('\n\n')
17
+ chunks = []
18
+ current_chunk = ""
19
+ for paragraph in paragraphs:
20
+ # If the paragraph is too large, split it into sentences
21
+ if len(paragraph) > max_length:
22
+ # Split paragraph into sentences
23
+ sentences = re.split(r'(?<=[.!?]) +', paragraph)
24
+ for sentence in sentences:
25
+ if len(current_chunk) + len(sentence) + 1 > max_length:
26
+ # Save the current chunk and start a new one
27
+ chunks.append(current_chunk.strip())
28
+ current_chunk = sentence
29
+ else:
30
+ # Add sentence to the current chunk
31
+ current_chunk += " " + sentence
32
+ else:
33
+ # If adding the paragraph exceeds max size, start a new chunk
34
+ if len(current_chunk) + len(paragraph) + 2 > max_length:
35
+ chunks.append(current_chunk.strip())
36
+ current_chunk = paragraph
37
+ else:
38
+ # Add paragraph to the current chunk
39
+ current_chunk += "\n\n" + paragraph
40
+ # Add any remaining text to chunks
41
+ if current_chunk.strip():
42
+ chunks.append(current_chunk.strip())
43
+
44
+ return chunks
45
+
46
+
47
+ def extract_scenes_from_response(response_text: str) -> List[dict]:
48
+ """
49
+ Extract structured scenes from the AI response.
50
+ Attempts to parse JSON-like structures or creates scenes from the text.
51
+ """
52
+ scenes = []
53
+
54
+ # Try to extract JSON from the response
55
+ try:
56
+ # Look for JSON blocks
57
+ json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
58
+ if json_match:
59
+ json_data = json.loads(json_match.group())
60
+ if 'scenes' in json_data:
61
+ return json_data['scenes']
62
+ except json.JSONDecodeError:
63
+ pass
64
+
65
+ # Fallback: Parse text-based scenes
66
+ # Look for scene markers like "Scene 1:", "Step 1:", etc.
67
+ scene_pattern = r'(?:Scene|Step)\s*(\d+)[:.]?\s*(.*?)(?=(?:Scene|Step)\s*\d+|$)'
68
+ matches = re.findall(scene_pattern, response_text, re.DOTALL | re.IGNORECASE)
69
+
70
+ for i, (scene_num, content) in enumerate(matches):
71
+ # Extract quoted text (spoken text)
72
+ quotes = re.findall(r'"([^"]*)"', content)
73
+
74
+ # Extract instructions (non-quoted text)
75
+ instructions = re.sub(r'"[^"]*"', '', content).strip()
76
+ instructions = re.sub(r'\s+', ' ', instructions)
77
+
78
+ scene_data = {
79
+ 'scene_number': int(scene_num) if scene_num.isdigit() else i + 1,
80
+ 'instructions': instructions,
81
+ 'spoken_text': ' '.join(quotes) if quotes else '',
82
+ 'content': content.strip(),
83
+ 'timestamp': f"Scene {scene_num}" if scene_num else f"Scene {i + 1}"
84
+ }
85
+ scenes.append(scene_data)
86
+
87
+ # If no scenes found, create one scene with all content
88
+ if not scenes:
89
+ scenes.append({
90
+ 'scene_number': 1,
91
+ 'instructions': response_text,
92
+ 'spoken_text': '',
93
+ 'content': response_text,
94
+ 'timestamp': 'Full Video'
95
+ })
96
+
97
+ return scenes
98
+
99
+
100
+ class VideoUnderstandingLoader(BaseVideoLoader):
101
+ """
102
+ Video analysis loader using Google GenAI for understanding video content.
103
+ Extracts step-by-step instructions and spoken text from training videos.
104
+ """
105
+ extensions: List[str] = ['.mp4', '.webm', '.avi', '.mov', '.mkv']
106
+
107
+ def __init__(
108
+ self,
109
+ source: Optional[Union[str, Path, List[Union[str, Path]]]] = None,
110
+ *,
111
+ tokenizer: Union[str, Callable] = None,
112
+ text_splitter: Union[str, Callable] = None,
113
+ source_type: str = 'video_understanding',
114
+ model: Union[str, GoogleModel] = GoogleModel.GEMINI_2_5_FLASH_IMAGE_PREVIEW,
115
+ temperature: float = 0.2,
116
+ prompt: Optional[str] = None,
117
+ custom_instructions: Optional[str] = None,
118
+ **kwargs
119
+ ):
120
+ super().__init__(
121
+ source,
122
+ tokenizer=tokenizer,
123
+ text_splitter=text_splitter,
124
+ source_type=source_type,
125
+ **kwargs
126
+ )
127
+
128
+ # Google GenAI configuration
129
+ self.model = model
130
+ self.temperature = temperature
131
+ self.google_client = None
132
+
133
+ # Custom prompts
134
+ self.prompt = prompt
135
+ self.custom_instructions = custom_instructions
136
+
137
+ # Default prompt for video analysis
138
+ self.default_prompt = """
139
+ Analyze the video and extract step-by-step instructions for employees to follow, and the spoken text into quotation marks, related to the training content shown in this video.
140
+ """
141
+
142
+ # Default instruction for video analysis
143
+ self.default_instructions = """
144
+ Video Analysis Instructions:
145
+ 1. Videos are training materials for employees to learn how to use Workday.
146
+ 2. There are several step-by-step processes shown in the video, with screenshots and spoken text.
147
+ 3. Break down the video into distinct scenes based on changes in visuals or context.
148
+ 4. For each scene, extract all step-by-step instructions, including any spoken text in quotation marks.
149
+ 5. Place each caption into an object with the timecode of the caption in the video.
150
+ """
151
+
152
+ async def _get_google_client(self) -> GoogleGenAIClient:
153
+ """Get or create Google GenAI client."""
154
+ if self.google_client is None:
155
+ self.google_client = GoogleGenAIClient(model=self.model)
156
+ return self.google_client
157
+
158
+ async def _analyze_video_with_ai(self, video_path: Path) -> str:
159
+ """Analyze video using Google GenAI."""
160
+ try:
161
+ client = await self._get_google_client()
162
+
163
+ # Use custom prompt or default
164
+ prompt = self.prompt or self.default_prompt
165
+ instructions = self.custom_instructions or self.default_instructions
166
+
167
+ async with client as ai_client:
168
+ self.logger.info(f"Analyzing video with Google GenAI: {video_path.name}")
169
+
170
+ response = await ai_client.video_understanding(
171
+ video=video_path,
172
+ prompt=prompt,
173
+ prompt_instruction=instructions,
174
+ temperature=self.temperature,
175
+ stateless=True
176
+ )
177
+
178
+ return response.output if hasattr(response, 'output') else str(response)
179
+
180
+ except Exception as e:
181
+ self.logger.error(f"Error analyzing video with AI: {e}")
182
+ return f"Error analyzing video: {str(e)}"
183
+
184
+ async def _load(self, path: Union[str, PurePath, List[PurePath]], **kwargs) -> List[Document]:
185
+ """Load and analyze video file."""
186
+ if isinstance(path, (str, PurePath)):
187
+ path = Path(path)
188
+ if not path.exists():
189
+ self.logger.error(f"Video file not found: {path}")
190
+ return []
191
+
192
+ self.logger.info(f"Processing video: {path.name}")
193
+
194
+ # Base metadata
195
+ base_metadata = {
196
+ "url": f"file://{path}",
197
+ "source": str(path),
198
+ "filename": path.name,
199
+ "type": "video_understanding",
200
+ "source_type": self._source_type,
201
+ "category": self.category,
202
+ "created_at": datetime.now().strftime("%Y-%m-%d, %H:%M:%S"),
203
+ "document_meta": {
204
+ "language": self._language,
205
+ "model_used": str(self.model.value if hasattr(self.model, 'value') else self.model),
206
+ "analysis_type": "video_understanding",
207
+ "video_title": path.stem
208
+ }
209
+ }
210
+
211
+ documents = []
212
+
213
+ try:
214
+ # Analyze video with Google GenAI
215
+ ai_response = await self._analyze_video_with_ai(path)
216
+
217
+ # Save AI response to file
218
+ response_path = path.with_suffix('.ai_analysis.txt')
219
+ self.saving_file(response_path, ai_response.encode('utf-8'))
220
+
221
+ # Extract scenes from AI response
222
+ scenes = extract_scenes_from_response(ai_response)
223
+
224
+ # Create main analysis document
225
+ main_doc_metadata = {
226
+ **base_metadata,
227
+ "type": "video_analysis_full",
228
+ "document_meta": {
229
+ **base_metadata["document_meta"],
230
+ "total_scenes": len(scenes),
231
+ "analysis_timestamp": datetime.now().isoformat()
232
+ }
233
+ }
234
+
235
+ # Split if too long
236
+ if len(ai_response) > 65534:
237
+ chunks = split_text(ai_response, 32767)
238
+ for i, chunk in enumerate(chunks):
239
+ chunk_metadata = {
240
+ **main_doc_metadata,
241
+ "type": "video_analysis_chunk",
242
+ "document_meta": {
243
+ **main_doc_metadata["document_meta"],
244
+ "chunk_number": i + 1,
245
+ "total_chunks": len(chunks)
246
+ }
247
+ }
248
+ doc = Document(
249
+ page_content=chunk,
250
+ metadata=chunk_metadata
251
+ )
252
+ documents.append(doc)
253
+ else:
254
+ doc = Document(
255
+ page_content=ai_response,
256
+ metadata=main_doc_metadata
257
+ )
258
+ documents.append(doc)
259
+
260
+ # Create individual scene documents
261
+ for scene in scenes:
262
+ scene_metadata = {
263
+ **base_metadata,
264
+ "type": "video_scene",
265
+ "source": f"{path.name}: {scene.get('timestamp', 'Scene')}",
266
+ "document_meta": {
267
+ **base_metadata["document_meta"],
268
+ "scene_number": scene.get('scene_number', 1),
269
+ "timestamp": scene.get('timestamp', ''),
270
+ "has_spoken_text": bool(scene.get('spoken_text', '').strip()),
271
+ "has_instructions": bool(scene.get('instructions', '').strip())
272
+ }
273
+ }
274
+
275
+ # Create content combining instructions and spoken text
276
+ content_parts = []
277
+
278
+ if scene.get('instructions'):
279
+ content_parts.append(f"INSTRUCTIONS:\n{scene['instructions']}")
280
+
281
+ if scene.get('spoken_text'):
282
+ content_parts.append(f"SPOKEN TEXT:\n\"{scene['spoken_text']}\"")
283
+
284
+ scene_content = "\n\n".join(content_parts) if content_parts else scene.get('content', '')
285
+
286
+ if scene_content.strip():
287
+ scene_doc = Document(
288
+ page_content=scene_content,
289
+ metadata=scene_metadata
290
+ )
291
+ documents.append(scene_doc)
292
+
293
+ # Create separate documents for instructions and spoken text if needed
294
+ all_instructions = []
295
+ all_spoken = []
296
+
297
+ for scene in scenes:
298
+ if scene.get('instructions'):
299
+ all_instructions.append(f"Scene {scene.get('scene_number', '')}: {scene['instructions']}")
300
+ if scene.get('spoken_text'):
301
+ all_spoken.append(f"Scene {scene.get('scene_number', '')}: \"{scene['spoken_text']}\"")
302
+
303
+ # Instructions summary document
304
+ if all_instructions:
305
+ instructions_metadata = {
306
+ **base_metadata,
307
+ "type": "video_instructions_summary",
308
+ "document_meta": {
309
+ **base_metadata["document_meta"],
310
+ "content_type": "instructions_only",
311
+ "scene_count": len(all_instructions)
312
+ }
313
+ }
314
+
315
+ instructions_content = "STEP-BY-STEP INSTRUCTIONS:\n\n" + "\n\n".join(all_instructions)
316
+
317
+ instructions_doc = Document(
318
+ page_content=instructions_content,
319
+ metadata=instructions_metadata
320
+ )
321
+ documents.append(instructions_doc)
322
+
323
+ # Spoken text summary document
324
+ if all_spoken:
325
+ spoken_metadata = {
326
+ **base_metadata,
327
+ "type": "video_spoken_summary",
328
+ "document_meta": {
329
+ **base_metadata["document_meta"],
330
+ "content_type": "spoken_text_only",
331
+ "scene_count": len(all_spoken)
332
+ }
333
+ }
334
+
335
+ spoken_content = "SPOKEN TEXT TRANSCRIPT:\n\n" + "\n\n".join(all_spoken)
336
+
337
+ spoken_doc = Document(
338
+ page_content=spoken_content,
339
+ metadata=spoken_metadata
340
+ )
341
+ documents.append(spoken_doc)
342
+
343
+ self.logger.info(f"Generated {len(documents)} documents from video analysis")
344
+
345
+ except Exception as e:
346
+ self.logger.error(f"Error processing video {path}: {e}")
347
+ # Create error document
348
+ error_metadata = {
349
+ **base_metadata,
350
+ "type": "video_analysis_error",
351
+ "document_meta": {
352
+ **base_metadata["document_meta"],
353
+ "error": str(e),
354
+ "error_timestamp": datetime.now().isoformat()
355
+ }
356
+ }
357
+
358
+ error_doc = Document(
359
+ page_content=f"Error analyzing video {path.name}: {str(e)}",
360
+ metadata=error_metadata
361
+ )
362
+ documents.append(error_doc)
363
+
364
+ return documents
365
+
366
+ async def load_video(self, url: str, video_title: str, transcript: str) -> list:
367
+ """
368
+ Required abstract method implementation.
369
+ This method is not used in our implementation but required by BaseVideoLoader.
370
+ """
371
+ # This method is required by the abstract base class but not used in our implementation
372
+ # We use _load instead for our video analysis
373
+ return []
374
+
375
+ async def close(self):
376
+ """Clean up resources."""
377
+ super().clear_cuda()