ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (535) hide show
  1. agentui/.prettierrc +15 -0
  2. agentui/QUICKSTART.md +272 -0
  3. agentui/README.md +59 -0
  4. agentui/env.example +16 -0
  5. agentui/jsconfig.json +14 -0
  6. agentui/package-lock.json +4242 -0
  7. agentui/package.json +34 -0
  8. agentui/scripts/postinstall/apply-patches.mjs +260 -0
  9. agentui/src/app.css +61 -0
  10. agentui/src/app.d.ts +13 -0
  11. agentui/src/app.html +12 -0
  12. agentui/src/components/LoadingSpinner.svelte +64 -0
  13. agentui/src/components/ThemeSwitcher.svelte +159 -0
  14. agentui/src/components/index.js +4 -0
  15. agentui/src/lib/api/bots.ts +60 -0
  16. agentui/src/lib/api/chat.ts +22 -0
  17. agentui/src/lib/api/http.ts +25 -0
  18. agentui/src/lib/components/BotCard.svelte +33 -0
  19. agentui/src/lib/components/ChatBubble.svelte +63 -0
  20. agentui/src/lib/components/Toast.svelte +21 -0
  21. agentui/src/lib/config.ts +20 -0
  22. agentui/src/lib/stores/auth.svelte.ts +73 -0
  23. agentui/src/lib/stores/theme.svelte.js +64 -0
  24. agentui/src/lib/stores/toast.svelte.ts +31 -0
  25. agentui/src/lib/utils/conversation.ts +39 -0
  26. agentui/src/routes/+layout.svelte +20 -0
  27. agentui/src/routes/+page.svelte +232 -0
  28. agentui/src/routes/login/+page.svelte +200 -0
  29. agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
  30. agentui/src/routes/talk/[agentId]/+page.ts +7 -0
  31. agentui/static/README.md +1 -0
  32. agentui/svelte.config.js +11 -0
  33. agentui/tailwind.config.ts +53 -0
  34. agentui/tsconfig.json +3 -0
  35. agentui/vite.config.ts +10 -0
  36. ai_parrot-0.17.2.dist-info/METADATA +472 -0
  37. ai_parrot-0.17.2.dist-info/RECORD +535 -0
  38. ai_parrot-0.17.2.dist-info/WHEEL +6 -0
  39. ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
  40. ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
  41. ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
  42. crew-builder/.prettierrc +15 -0
  43. crew-builder/QUICKSTART.md +259 -0
  44. crew-builder/README.md +113 -0
  45. crew-builder/env.example +17 -0
  46. crew-builder/jsconfig.json +14 -0
  47. crew-builder/package-lock.json +4182 -0
  48. crew-builder/package.json +37 -0
  49. crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
  50. crew-builder/src/app.css +62 -0
  51. crew-builder/src/app.d.ts +13 -0
  52. crew-builder/src/app.html +12 -0
  53. crew-builder/src/components/LoadingSpinner.svelte +64 -0
  54. crew-builder/src/components/ThemeSwitcher.svelte +149 -0
  55. crew-builder/src/components/index.js +9 -0
  56. crew-builder/src/lib/api/bots.ts +60 -0
  57. crew-builder/src/lib/api/chat.ts +80 -0
  58. crew-builder/src/lib/api/client.ts +56 -0
  59. crew-builder/src/lib/api/crew/crew.ts +136 -0
  60. crew-builder/src/lib/api/index.ts +5 -0
  61. crew-builder/src/lib/api/o365/auth.ts +65 -0
  62. crew-builder/src/lib/auth/auth.ts +54 -0
  63. crew-builder/src/lib/components/AgentNode.svelte +43 -0
  64. crew-builder/src/lib/components/BotCard.svelte +33 -0
  65. crew-builder/src/lib/components/ChatBubble.svelte +67 -0
  66. crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
  67. crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
  68. crew-builder/src/lib/components/JsonViewer.svelte +24 -0
  69. crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
  70. crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
  71. crew-builder/src/lib/components/Toast.svelte +67 -0
  72. crew-builder/src/lib/components/Toolbar.svelte +157 -0
  73. crew-builder/src/lib/components/index.ts +10 -0
  74. crew-builder/src/lib/config.ts +8 -0
  75. crew-builder/src/lib/stores/auth.svelte.ts +228 -0
  76. crew-builder/src/lib/stores/crewStore.ts +369 -0
  77. crew-builder/src/lib/stores/theme.svelte.js +145 -0
  78. crew-builder/src/lib/stores/toast.svelte.ts +69 -0
  79. crew-builder/src/lib/utils/conversation.ts +39 -0
  80. crew-builder/src/lib/utils/markdown.ts +122 -0
  81. crew-builder/src/lib/utils/talkHistory.ts +47 -0
  82. crew-builder/src/routes/+layout.svelte +20 -0
  83. crew-builder/src/routes/+page.svelte +539 -0
  84. crew-builder/src/routes/agents/+page.svelte +247 -0
  85. crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
  86. crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
  87. crew-builder/src/routes/builder/+page.svelte +204 -0
  88. crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
  89. crew-builder/src/routes/crew/ask/+page.ts +1 -0
  90. crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
  91. crew-builder/src/routes/login/+page.svelte +197 -0
  92. crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
  93. crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
  94. crew-builder/static/README.md +1 -0
  95. crew-builder/svelte.config.js +11 -0
  96. crew-builder/tailwind.config.ts +53 -0
  97. crew-builder/tsconfig.json +3 -0
  98. crew-builder/vite.config.ts +10 -0
  99. mcp_servers/calculator_server.py +309 -0
  100. parrot/__init__.py +27 -0
  101. parrot/__pycache__/__init__.cpython-310.pyc +0 -0
  102. parrot/__pycache__/version.cpython-310.pyc +0 -0
  103. parrot/_version.py +34 -0
  104. parrot/a2a/__init__.py +48 -0
  105. parrot/a2a/client.py +658 -0
  106. parrot/a2a/discovery.py +89 -0
  107. parrot/a2a/mixin.py +257 -0
  108. parrot/a2a/models.py +376 -0
  109. parrot/a2a/server.py +770 -0
  110. parrot/agents/__init__.py +29 -0
  111. parrot/bots/__init__.py +12 -0
  112. parrot/bots/a2a_agent.py +19 -0
  113. parrot/bots/abstract.py +3139 -0
  114. parrot/bots/agent.py +1129 -0
  115. parrot/bots/basic.py +9 -0
  116. parrot/bots/chatbot.py +669 -0
  117. parrot/bots/data.py +1618 -0
  118. parrot/bots/database/__init__.py +5 -0
  119. parrot/bots/database/abstract.py +3071 -0
  120. parrot/bots/database/cache.py +286 -0
  121. parrot/bots/database/models.py +468 -0
  122. parrot/bots/database/prompts.py +154 -0
  123. parrot/bots/database/retries.py +98 -0
  124. parrot/bots/database/router.py +269 -0
  125. parrot/bots/database/sql.py +41 -0
  126. parrot/bots/db/__init__.py +6 -0
  127. parrot/bots/db/abstract.py +556 -0
  128. parrot/bots/db/bigquery.py +602 -0
  129. parrot/bots/db/cache.py +85 -0
  130. parrot/bots/db/documentdb.py +668 -0
  131. parrot/bots/db/elastic.py +1014 -0
  132. parrot/bots/db/influx.py +898 -0
  133. parrot/bots/db/mock.py +96 -0
  134. parrot/bots/db/multi.py +783 -0
  135. parrot/bots/db/prompts.py +185 -0
  136. parrot/bots/db/sql.py +1255 -0
  137. parrot/bots/db/tools.py +212 -0
  138. parrot/bots/document.py +680 -0
  139. parrot/bots/hrbot.py +15 -0
  140. parrot/bots/kb.py +170 -0
  141. parrot/bots/mcp.py +36 -0
  142. parrot/bots/orchestration/README.md +463 -0
  143. parrot/bots/orchestration/__init__.py +1 -0
  144. parrot/bots/orchestration/agent.py +155 -0
  145. parrot/bots/orchestration/crew.py +3330 -0
  146. parrot/bots/orchestration/fsm.py +1179 -0
  147. parrot/bots/orchestration/hr.py +434 -0
  148. parrot/bots/orchestration/storage/__init__.py +4 -0
  149. parrot/bots/orchestration/storage/memory.py +100 -0
  150. parrot/bots/orchestration/storage/mixin.py +119 -0
  151. parrot/bots/orchestration/verify.py +202 -0
  152. parrot/bots/product.py +204 -0
  153. parrot/bots/prompts/__init__.py +96 -0
  154. parrot/bots/prompts/agents.py +155 -0
  155. parrot/bots/prompts/data.py +216 -0
  156. parrot/bots/prompts/output_generation.py +8 -0
  157. parrot/bots/scraper/__init__.py +3 -0
  158. parrot/bots/scraper/models.py +122 -0
  159. parrot/bots/scraper/scraper.py +1173 -0
  160. parrot/bots/scraper/templates.py +115 -0
  161. parrot/bots/stores/__init__.py +5 -0
  162. parrot/bots/stores/local.py +172 -0
  163. parrot/bots/webdev.py +81 -0
  164. parrot/cli.py +17 -0
  165. parrot/clients/__init__.py +16 -0
  166. parrot/clients/base.py +1491 -0
  167. parrot/clients/claude.py +1191 -0
  168. parrot/clients/factory.py +129 -0
  169. parrot/clients/google.py +4567 -0
  170. parrot/clients/gpt.py +1975 -0
  171. parrot/clients/grok.py +432 -0
  172. parrot/clients/groq.py +986 -0
  173. parrot/clients/hf.py +582 -0
  174. parrot/clients/models.py +18 -0
  175. parrot/conf.py +395 -0
  176. parrot/embeddings/__init__.py +9 -0
  177. parrot/embeddings/base.py +157 -0
  178. parrot/embeddings/google.py +98 -0
  179. parrot/embeddings/huggingface.py +74 -0
  180. parrot/embeddings/openai.py +84 -0
  181. parrot/embeddings/processor.py +88 -0
  182. parrot/exceptions.c +13868 -0
  183. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  184. parrot/exceptions.pxd +22 -0
  185. parrot/exceptions.pxi +15 -0
  186. parrot/exceptions.pyx +44 -0
  187. parrot/generators/__init__.py +29 -0
  188. parrot/generators/base.py +200 -0
  189. parrot/generators/html.py +293 -0
  190. parrot/generators/react.py +205 -0
  191. parrot/generators/streamlit.py +203 -0
  192. parrot/generators/template.py +105 -0
  193. parrot/handlers/__init__.py +4 -0
  194. parrot/handlers/agent.py +861 -0
  195. parrot/handlers/agents/__init__.py +1 -0
  196. parrot/handlers/agents/abstract.py +900 -0
  197. parrot/handlers/bots.py +338 -0
  198. parrot/handlers/chat.py +915 -0
  199. parrot/handlers/creation.sql +192 -0
  200. parrot/handlers/crew/ARCHITECTURE.md +362 -0
  201. parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
  202. parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
  203. parrot/handlers/crew/__init__.py +0 -0
  204. parrot/handlers/crew/handler.py +801 -0
  205. parrot/handlers/crew/models.py +229 -0
  206. parrot/handlers/crew/redis_persistence.py +523 -0
  207. parrot/handlers/jobs/__init__.py +10 -0
  208. parrot/handlers/jobs/job.py +384 -0
  209. parrot/handlers/jobs/mixin.py +627 -0
  210. parrot/handlers/jobs/models.py +115 -0
  211. parrot/handlers/jobs/worker.py +31 -0
  212. parrot/handlers/models.py +596 -0
  213. parrot/handlers/o365_auth.py +105 -0
  214. parrot/handlers/stream.py +337 -0
  215. parrot/interfaces/__init__.py +6 -0
  216. parrot/interfaces/aws.py +143 -0
  217. parrot/interfaces/credentials.py +113 -0
  218. parrot/interfaces/database.py +27 -0
  219. parrot/interfaces/google.py +1123 -0
  220. parrot/interfaces/hierarchy.py +1227 -0
  221. parrot/interfaces/http.py +651 -0
  222. parrot/interfaces/images/__init__.py +0 -0
  223. parrot/interfaces/images/plugins/__init__.py +24 -0
  224. parrot/interfaces/images/plugins/abstract.py +58 -0
  225. parrot/interfaces/images/plugins/analisys.py +148 -0
  226. parrot/interfaces/images/plugins/classify.py +150 -0
  227. parrot/interfaces/images/plugins/classifybase.py +182 -0
  228. parrot/interfaces/images/plugins/detect.py +150 -0
  229. parrot/interfaces/images/plugins/exif.py +1103 -0
  230. parrot/interfaces/images/plugins/hash.py +52 -0
  231. parrot/interfaces/images/plugins/vision.py +104 -0
  232. parrot/interfaces/images/plugins/yolo.py +66 -0
  233. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  234. parrot/interfaces/o365.py +978 -0
  235. parrot/interfaces/onedrive.py +822 -0
  236. parrot/interfaces/sharepoint.py +1435 -0
  237. parrot/interfaces/soap.py +257 -0
  238. parrot/loaders/__init__.py +8 -0
  239. parrot/loaders/abstract.py +1131 -0
  240. parrot/loaders/audio.py +199 -0
  241. parrot/loaders/basepdf.py +53 -0
  242. parrot/loaders/basevideo.py +1568 -0
  243. parrot/loaders/csv.py +409 -0
  244. parrot/loaders/docx.py +116 -0
  245. parrot/loaders/epubloader.py +316 -0
  246. parrot/loaders/excel.py +199 -0
  247. parrot/loaders/factory.py +55 -0
  248. parrot/loaders/files/__init__.py +0 -0
  249. parrot/loaders/files/abstract.py +39 -0
  250. parrot/loaders/files/html.py +26 -0
  251. parrot/loaders/files/text.py +63 -0
  252. parrot/loaders/html.py +152 -0
  253. parrot/loaders/markdown.py +442 -0
  254. parrot/loaders/pdf.py +373 -0
  255. parrot/loaders/pdfmark.py +320 -0
  256. parrot/loaders/pdftables.py +506 -0
  257. parrot/loaders/ppt.py +476 -0
  258. parrot/loaders/qa.py +63 -0
  259. parrot/loaders/splitters/__init__.py +10 -0
  260. parrot/loaders/splitters/base.py +138 -0
  261. parrot/loaders/splitters/md.py +228 -0
  262. parrot/loaders/splitters/token.py +143 -0
  263. parrot/loaders/txt.py +26 -0
  264. parrot/loaders/video.py +89 -0
  265. parrot/loaders/videolocal.py +218 -0
  266. parrot/loaders/videounderstanding.py +377 -0
  267. parrot/loaders/vimeo.py +167 -0
  268. parrot/loaders/web.py +599 -0
  269. parrot/loaders/youtube.py +504 -0
  270. parrot/manager/__init__.py +5 -0
  271. parrot/manager/manager.py +1030 -0
  272. parrot/mcp/__init__.py +28 -0
  273. parrot/mcp/adapter.py +105 -0
  274. parrot/mcp/cli.py +174 -0
  275. parrot/mcp/client.py +119 -0
  276. parrot/mcp/config.py +75 -0
  277. parrot/mcp/integration.py +842 -0
  278. parrot/mcp/oauth.py +933 -0
  279. parrot/mcp/server.py +225 -0
  280. parrot/mcp/transports/__init__.py +3 -0
  281. parrot/mcp/transports/base.py +279 -0
  282. parrot/mcp/transports/grpc_session.py +163 -0
  283. parrot/mcp/transports/http.py +312 -0
  284. parrot/mcp/transports/mcp.proto +108 -0
  285. parrot/mcp/transports/quic.py +1082 -0
  286. parrot/mcp/transports/sse.py +330 -0
  287. parrot/mcp/transports/stdio.py +309 -0
  288. parrot/mcp/transports/unix.py +395 -0
  289. parrot/mcp/transports/websocket.py +547 -0
  290. parrot/memory/__init__.py +16 -0
  291. parrot/memory/abstract.py +209 -0
  292. parrot/memory/agent.py +32 -0
  293. parrot/memory/cache.py +175 -0
  294. parrot/memory/core.py +555 -0
  295. parrot/memory/file.py +153 -0
  296. parrot/memory/mem.py +131 -0
  297. parrot/memory/redis.py +613 -0
  298. parrot/models/__init__.py +46 -0
  299. parrot/models/basic.py +118 -0
  300. parrot/models/compliance.py +208 -0
  301. parrot/models/crew.py +395 -0
  302. parrot/models/detections.py +654 -0
  303. parrot/models/generation.py +85 -0
  304. parrot/models/google.py +223 -0
  305. parrot/models/groq.py +23 -0
  306. parrot/models/openai.py +30 -0
  307. parrot/models/outputs.py +285 -0
  308. parrot/models/responses.py +938 -0
  309. parrot/notifications/__init__.py +743 -0
  310. parrot/openapi/__init__.py +3 -0
  311. parrot/openapi/components.yaml +641 -0
  312. parrot/openapi/config.py +322 -0
  313. parrot/outputs/__init__.py +32 -0
  314. parrot/outputs/formats/__init__.py +108 -0
  315. parrot/outputs/formats/altair.py +359 -0
  316. parrot/outputs/formats/application.py +122 -0
  317. parrot/outputs/formats/base.py +351 -0
  318. parrot/outputs/formats/bokeh.py +356 -0
  319. parrot/outputs/formats/card.py +424 -0
  320. parrot/outputs/formats/chart.py +436 -0
  321. parrot/outputs/formats/d3.py +255 -0
  322. parrot/outputs/formats/echarts.py +310 -0
  323. parrot/outputs/formats/generators/__init__.py +0 -0
  324. parrot/outputs/formats/generators/abstract.py +61 -0
  325. parrot/outputs/formats/generators/panel.py +145 -0
  326. parrot/outputs/formats/generators/streamlit.py +86 -0
  327. parrot/outputs/formats/generators/terminal.py +63 -0
  328. parrot/outputs/formats/holoviews.py +310 -0
  329. parrot/outputs/formats/html.py +147 -0
  330. parrot/outputs/formats/jinja2.py +46 -0
  331. parrot/outputs/formats/json.py +87 -0
  332. parrot/outputs/formats/map.py +933 -0
  333. parrot/outputs/formats/markdown.py +172 -0
  334. parrot/outputs/formats/matplotlib.py +237 -0
  335. parrot/outputs/formats/mixins/__init__.py +0 -0
  336. parrot/outputs/formats/mixins/emaps.py +855 -0
  337. parrot/outputs/formats/plotly.py +341 -0
  338. parrot/outputs/formats/seaborn.py +310 -0
  339. parrot/outputs/formats/table.py +397 -0
  340. parrot/outputs/formats/template_report.py +138 -0
  341. parrot/outputs/formats/yaml.py +125 -0
  342. parrot/outputs/formatter.py +152 -0
  343. parrot/outputs/templates/__init__.py +95 -0
  344. parrot/pipelines/__init__.py +0 -0
  345. parrot/pipelines/abstract.py +210 -0
  346. parrot/pipelines/detector.py +124 -0
  347. parrot/pipelines/models.py +90 -0
  348. parrot/pipelines/planogram.py +3002 -0
  349. parrot/pipelines/table.sql +97 -0
  350. parrot/plugins/__init__.py +106 -0
  351. parrot/plugins/importer.py +80 -0
  352. parrot/py.typed +0 -0
  353. parrot/registry/__init__.py +18 -0
  354. parrot/registry/registry.py +594 -0
  355. parrot/scheduler/__init__.py +1189 -0
  356. parrot/scheduler/models.py +60 -0
  357. parrot/security/__init__.py +16 -0
  358. parrot/security/prompt_injection.py +268 -0
  359. parrot/security/security_events.sql +25 -0
  360. parrot/services/__init__.py +1 -0
  361. parrot/services/mcp/__init__.py +8 -0
  362. parrot/services/mcp/config.py +13 -0
  363. parrot/services/mcp/server.py +295 -0
  364. parrot/services/o365_remote_auth.py +235 -0
  365. parrot/stores/__init__.py +7 -0
  366. parrot/stores/abstract.py +352 -0
  367. parrot/stores/arango.py +1090 -0
  368. parrot/stores/bigquery.py +1377 -0
  369. parrot/stores/cache.py +106 -0
  370. parrot/stores/empty.py +10 -0
  371. parrot/stores/faiss_store.py +1157 -0
  372. parrot/stores/kb/__init__.py +9 -0
  373. parrot/stores/kb/abstract.py +68 -0
  374. parrot/stores/kb/cache.py +165 -0
  375. parrot/stores/kb/doc.py +325 -0
  376. parrot/stores/kb/hierarchy.py +346 -0
  377. parrot/stores/kb/local.py +457 -0
  378. parrot/stores/kb/prompt.py +28 -0
  379. parrot/stores/kb/redis.py +659 -0
  380. parrot/stores/kb/store.py +115 -0
  381. parrot/stores/kb/user.py +374 -0
  382. parrot/stores/models.py +59 -0
  383. parrot/stores/pgvector.py +3 -0
  384. parrot/stores/postgres.py +2853 -0
  385. parrot/stores/utils/__init__.py +0 -0
  386. parrot/stores/utils/chunking.py +197 -0
  387. parrot/telemetry/__init__.py +3 -0
  388. parrot/telemetry/mixin.py +111 -0
  389. parrot/template/__init__.py +3 -0
  390. parrot/template/engine.py +259 -0
  391. parrot/tools/__init__.py +23 -0
  392. parrot/tools/abstract.py +644 -0
  393. parrot/tools/agent.py +363 -0
  394. parrot/tools/arangodbsearch.py +537 -0
  395. parrot/tools/arxiv_tool.py +188 -0
  396. parrot/tools/calculator/__init__.py +3 -0
  397. parrot/tools/calculator/operations/__init__.py +38 -0
  398. parrot/tools/calculator/operations/calculus.py +80 -0
  399. parrot/tools/calculator/operations/statistics.py +76 -0
  400. parrot/tools/calculator/tool.py +150 -0
  401. parrot/tools/cloudwatch.py +988 -0
  402. parrot/tools/codeinterpreter/__init__.py +127 -0
  403. parrot/tools/codeinterpreter/executor.py +371 -0
  404. parrot/tools/codeinterpreter/internals.py +473 -0
  405. parrot/tools/codeinterpreter/models.py +643 -0
  406. parrot/tools/codeinterpreter/prompts.py +224 -0
  407. parrot/tools/codeinterpreter/tool.py +664 -0
  408. parrot/tools/company_info/__init__.py +6 -0
  409. parrot/tools/company_info/tool.py +1138 -0
  410. parrot/tools/correlationanalysis.py +437 -0
  411. parrot/tools/database/abstract.py +286 -0
  412. parrot/tools/database/bq.py +115 -0
  413. parrot/tools/database/cache.py +284 -0
  414. parrot/tools/database/models.py +95 -0
  415. parrot/tools/database/pg.py +343 -0
  416. parrot/tools/databasequery.py +1159 -0
  417. parrot/tools/db.py +1800 -0
  418. parrot/tools/ddgo.py +370 -0
  419. parrot/tools/decorators.py +271 -0
  420. parrot/tools/dftohtml.py +282 -0
  421. parrot/tools/document.py +549 -0
  422. parrot/tools/ecs.py +819 -0
  423. parrot/tools/edareport.py +368 -0
  424. parrot/tools/elasticsearch.py +1049 -0
  425. parrot/tools/employees.py +462 -0
  426. parrot/tools/epson/__init__.py +96 -0
  427. parrot/tools/excel.py +683 -0
  428. parrot/tools/file/__init__.py +13 -0
  429. parrot/tools/file/abstract.py +76 -0
  430. parrot/tools/file/gcs.py +378 -0
  431. parrot/tools/file/local.py +284 -0
  432. parrot/tools/file/s3.py +511 -0
  433. parrot/tools/file/tmp.py +309 -0
  434. parrot/tools/file/tool.py +501 -0
  435. parrot/tools/file_reader.py +129 -0
  436. parrot/tools/flowtask/__init__.py +19 -0
  437. parrot/tools/flowtask/tool.py +761 -0
  438. parrot/tools/gittoolkit.py +508 -0
  439. parrot/tools/google/__init__.py +18 -0
  440. parrot/tools/google/base.py +169 -0
  441. parrot/tools/google/tools.py +1251 -0
  442. parrot/tools/googlelocation.py +5 -0
  443. parrot/tools/googleroutes.py +5 -0
  444. parrot/tools/googlesearch.py +5 -0
  445. parrot/tools/googlesitesearch.py +5 -0
  446. parrot/tools/googlevoice.py +2 -0
  447. parrot/tools/gvoice.py +695 -0
  448. parrot/tools/ibisworld/README.md +225 -0
  449. parrot/tools/ibisworld/__init__.py +11 -0
  450. parrot/tools/ibisworld/tool.py +366 -0
  451. parrot/tools/jiratoolkit.py +1718 -0
  452. parrot/tools/manager.py +1098 -0
  453. parrot/tools/math.py +152 -0
  454. parrot/tools/metadata.py +476 -0
  455. parrot/tools/msteams.py +1621 -0
  456. parrot/tools/msword.py +635 -0
  457. parrot/tools/multidb.py +580 -0
  458. parrot/tools/multistoresearch.py +369 -0
  459. parrot/tools/networkninja.py +167 -0
  460. parrot/tools/nextstop/__init__.py +4 -0
  461. parrot/tools/nextstop/base.py +286 -0
  462. parrot/tools/nextstop/employee.py +733 -0
  463. parrot/tools/nextstop/store.py +462 -0
  464. parrot/tools/notification.py +435 -0
  465. parrot/tools/o365/__init__.py +42 -0
  466. parrot/tools/o365/base.py +295 -0
  467. parrot/tools/o365/bundle.py +522 -0
  468. parrot/tools/o365/events.py +554 -0
  469. parrot/tools/o365/mail.py +992 -0
  470. parrot/tools/o365/onedrive.py +497 -0
  471. parrot/tools/o365/sharepoint.py +641 -0
  472. parrot/tools/openapi_toolkit.py +904 -0
  473. parrot/tools/openweather.py +527 -0
  474. parrot/tools/pdfprint.py +1001 -0
  475. parrot/tools/powerbi.py +518 -0
  476. parrot/tools/powerpoint.py +1113 -0
  477. parrot/tools/pricestool.py +146 -0
  478. parrot/tools/products/__init__.py +246 -0
  479. parrot/tools/prophet_tool.py +171 -0
  480. parrot/tools/pythonpandas.py +630 -0
  481. parrot/tools/pythonrepl.py +910 -0
  482. parrot/tools/qsource.py +436 -0
  483. parrot/tools/querytoolkit.py +395 -0
  484. parrot/tools/quickeda.py +827 -0
  485. parrot/tools/resttool.py +553 -0
  486. parrot/tools/retail/__init__.py +0 -0
  487. parrot/tools/retail/bby.py +528 -0
  488. parrot/tools/sandboxtool.py +703 -0
  489. parrot/tools/sassie/__init__.py +352 -0
  490. parrot/tools/scraping/__init__.py +7 -0
  491. parrot/tools/scraping/docs/select.md +466 -0
  492. parrot/tools/scraping/documentation.md +1278 -0
  493. parrot/tools/scraping/driver.py +436 -0
  494. parrot/tools/scraping/models.py +576 -0
  495. parrot/tools/scraping/options.py +85 -0
  496. parrot/tools/scraping/orchestrator.py +517 -0
  497. parrot/tools/scraping/readme.md +740 -0
  498. parrot/tools/scraping/tool.py +3115 -0
  499. parrot/tools/seasonaldetection.py +642 -0
  500. parrot/tools/shell_tool/__init__.py +5 -0
  501. parrot/tools/shell_tool/actions.py +408 -0
  502. parrot/tools/shell_tool/engine.py +155 -0
  503. parrot/tools/shell_tool/models.py +322 -0
  504. parrot/tools/shell_tool/tool.py +442 -0
  505. parrot/tools/site_search.py +214 -0
  506. parrot/tools/textfile.py +418 -0
  507. parrot/tools/think.py +378 -0
  508. parrot/tools/toolkit.py +298 -0
  509. parrot/tools/webapp_tool.py +187 -0
  510. parrot/tools/whatif.py +1279 -0
  511. parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
  512. parrot/tools/workday/__init__.py +6 -0
  513. parrot/tools/workday/models.py +1389 -0
  514. parrot/tools/workday/tool.py +1293 -0
  515. parrot/tools/yfinance_tool.py +306 -0
  516. parrot/tools/zipcode.py +217 -0
  517. parrot/utils/__init__.py +2 -0
  518. parrot/utils/helpers.py +73 -0
  519. parrot/utils/parsers/__init__.py +5 -0
  520. parrot/utils/parsers/toml.c +12078 -0
  521. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  522. parrot/utils/parsers/toml.pyx +21 -0
  523. parrot/utils/toml.py +11 -0
  524. parrot/utils/types.cpp +20936 -0
  525. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  526. parrot/utils/types.pyx +213 -0
  527. parrot/utils/uv.py +11 -0
  528. parrot/version.py +10 -0
  529. parrot/yaml-rs/Cargo.lock +350 -0
  530. parrot/yaml-rs/Cargo.toml +19 -0
  531. parrot/yaml-rs/pyproject.toml +19 -0
  532. parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
  533. parrot/yaml-rs/src/lib.rs +222 -0
  534. requirements/docker-compose.yml +24 -0
  535. requirements/requirements-dev.txt +21 -0
parrot/tools/msword.py ADDED
@@ -0,0 +1,635 @@
1
+ """
2
+ MS Word Tool migrated to use AbstractDocumentTool framework.
3
+ """
4
+ from typing import Any, Dict, List, Optional, Union
5
+ import re
6
+ import tempfile
7
+ import os
8
+ from pathlib import Path
9
+ import io
10
+ from urllib.parse import urlparse
11
+ import aiohttp
12
+ import aiofiles
13
+ from docx import Document
14
+ from docx.shared import Inches, Pt
15
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
16
+ from docx.enum.style import WD_STYLE_TYPE
17
+ from jinja2 import Environment, FileSystemLoader
18
+ from pydantic import BaseModel, Field, field_validator
19
+ import mammoth
20
+ import markdown
21
+ from bs4 import BeautifulSoup, NavigableString
22
+ from markdownify import markdownify as md
23
+ from .document import AbstractDocumentTool, DocumentGenerationArgs
24
+
25
+
26
+ class MSWordArgs(DocumentGenerationArgs):
27
+ """Arguments schema for MS Word Document generation."""
28
+
29
+ template_name: Optional[str] = Field(
30
+ None,
31
+ description="Name of the HTML template (e.g., 'report.html') to render before conversion"
32
+ )
33
+ template_vars: Optional[Dict[str, Any]] = Field(
34
+ None,
35
+ description="Variables to pass to the HTML template (e.g., title, author, date)"
36
+ )
37
+ docx_template: Optional[str] = Field(
38
+ None,
39
+ description="Path to a DOCX template file to use as base document"
40
+ )
41
+ style_config: Optional[Dict[str, Any]] = Field(
42
+ None,
43
+ description="Custom styling configuration for the document"
44
+ )
45
+ page_margins: Optional[Dict[str, float]] = Field(
46
+ None,
47
+ description="Page margins in inches (top, bottom, left, right)"
48
+ )
49
+
50
+ @field_validator('template_name')
51
+ @classmethod
52
+ def validate_template_name(cls, v):
53
+ if v and not v.endswith('.html'):
54
+ v = f"{v}.html"
55
+ return v
56
+
57
+
58
+ class MSWordTool(AbstractDocumentTool):
59
+ """
60
+ Microsoft Word Document Generation Tool.
61
+
62
+ This tool converts text content (including Markdown and HTML) into professionally
63
+ formatted Word documents (.docx). It supports custom templates, styling, and
64
+ advanced document formatting features.
65
+
66
+ Features:
67
+ - Markdown to Word conversion with proper formatting
68
+ - HTML to Word conversion support
69
+ - Custom DOCX template support
70
+ - Jinja2 HTML template processing
71
+ - Configurable styling and page setup
72
+ - Table, list, and heading support
73
+ - Professional document formatting
74
+ """
75
+
76
+ name = "msword_generator"
77
+ description = (
78
+ "Generate Microsoft Word documents from text, Markdown, or HTML content. "
79
+ "Supports custom templates, styling, and professional document formatting. "
80
+ "Perfect for creating reports, documentation, and formatted documents."
81
+ )
82
+ args_schema = MSWordArgs
83
+
84
+ # Document type configuration
85
+ document_type = "document"
86
+ default_extension = "docx"
87
+ supported_extensions = [".docx", ".dotx"]
88
+
89
+ def __init__(
90
+ self,
91
+ templates_dir: Optional[Path] = None,
92
+ default_html_template: Optional[str] = None,
93
+ **kwargs
94
+ ):
95
+ """
96
+ Initialize the MS Word Tool.
97
+
98
+ Args:
99
+ templates_dir: Directory containing HTML and DOCX templates
100
+ default_html_template: Default HTML template for content processing
101
+ **kwargs: Additional arguments for AbstractDocumentTool
102
+ """
103
+ super().__init__(templates_dir=templates_dir, **kwargs)
104
+
105
+ self.default_html_template = default_html_template
106
+
107
+ # Initialize Jinja2 environment for HTML templates
108
+ if self.templates_dir:
109
+ self.html_env = Environment(
110
+ loader=FileSystemLoader(str(self.templates_dir)),
111
+ autoescape=True
112
+ )
113
+ else:
114
+ self.html_env = None
115
+
116
+ def _detect_content_type(self, text: str) -> str:
117
+ """Detect if content is HTML, Markdown, or plain text."""
118
+ text_stripped = text.strip()
119
+
120
+ # Simple HTML detection
121
+ if (text_stripped.startswith('<') and text_stripped.endswith('>')) or \
122
+ any(tag in text_stripped.lower() for tag in ['<html', '<div', '<p', '<h1']):
123
+ return 'html'
124
+
125
+ # Markdown detection
126
+ markdown_patterns = [
127
+ r'^#{1,6}\s', # Headers
128
+ r'^\*\s', # Bullet points
129
+ r'^\d+\.\s', # Numbered lists
130
+ r'\*\*.*?\*\*', # Bold
131
+ r'\*.*?\*', # Italic
132
+ r'`.*?`', # Code
133
+ r'\[.*?\]\(.*?\)', # Links
134
+ ]
135
+
136
+ for pattern in markdown_patterns:
137
+ if re.search(pattern, text_stripped, re.MULTILINE):
138
+ return 'markdown'
139
+
140
+ return 'markdown' # Default to markdown for processing
141
+
142
+ def _render_html_template(
143
+ self,
144
+ content: str,
145
+ template_name: Optional[str],
146
+ template_vars: Optional[Dict[str, Any]]
147
+ ) -> str:
148
+ """Render content through Jinja2 HTML template if provided."""
149
+ if not template_name or not self.html_env:
150
+ return content
151
+
152
+ try:
153
+ template = self.html_env.get_template(template_name)
154
+ vars_dict = template_vars or {}
155
+
156
+ # Add default variables
157
+ vars_dict.setdefault('content', content)
158
+ vars_dict.setdefault('date', self._get_current_date())
159
+ vars_dict.setdefault('timestamp', self._get_current_timestamp())
160
+
161
+ rendered = template.render(**vars_dict)
162
+ self.logger.info(
163
+ f"Rendered content through HTML template: {template_name}"
164
+ )
165
+ return rendered
166
+
167
+ except Exception as e:
168
+ self.logger.error(f"HTML template rendering failed: {e}")
169
+ return content
170
+
171
+ def _preprocess_markdown(self, text: str) -> str:
172
+ """Preprocess markdown to handle common issues."""
173
+ # Replace placeholder variables with empty strings
174
+ text = re.sub(r'\{[a-zA-Z0-9_]+\}', '', text)
175
+
176
+ # Handle f-strings that weren't evaluated
177
+ text = re.sub(r'f"""(.*?)"""', r'\1', text, flags=re.DOTALL)
178
+ text = re.sub(r"f'''(.*?)'''", r'\1', text, flags=re.DOTALL)
179
+
180
+ # Remove triple backticks and language indicators
181
+ text = re.sub(r'```[a-zA-Z]*\n', '', text)
182
+ text = re.sub(r'```', '', text)
183
+
184
+ # Fix heading issues (ensure space after #)
185
+ text = re.sub(r'(#+)([^ \n])', r'\1 \2', text)
186
+
187
+ # Fix escaped newlines if any
188
+ text = text.replace('\\n', '\n')
189
+
190
+ return text
191
+
192
+ def _markdown_to_html(self, markdown_text: str) -> str:
193
+ """Convert markdown to HTML."""
194
+ try:
195
+ html = markdown.markdown(
196
+ markdown_text,
197
+ extensions=['extra', 'codehilite', 'tables'] # Removed 'toc' to avoid issues
198
+ )
199
+ return html
200
+ except Exception as e:
201
+ self.logger.error(f"Markdown conversion failed: {e}")
202
+ # Fallback: wrap in paragraphs
203
+ paragraphs = markdown_text.split('\n\n')
204
+ html_paragraphs = [f'<p>{p.replace(chr(10), "<br>")}</p>' for p in paragraphs if p.strip()]
205
+ return '\n'.join(html_paragraphs)
206
+
207
+ def _create_document(self, template_path: Optional[str] = None) -> Document:
208
+ """Create or load DOCX document."""
209
+ if template_path:
210
+ template_file = self._get_template_path(template_path)
211
+ if template_file and template_file.exists():
212
+ self.logger.info(f"Loading DOCX template: {template_file}")
213
+ return Document(str(template_file))
214
+
215
+ # Create new document with basic styling
216
+ doc = Document()
217
+ self._setup_document_styles(doc)
218
+ return doc
219
+
220
+ def _setup_document_styles(self, doc: Document) -> None:
221
+ """Set up basic document styles."""
222
+ try:
223
+ styles = doc.styles
224
+
225
+ # Configure Normal style
226
+ if 'Normal' in styles:
227
+ normal = styles['Normal']
228
+ normal.font.name = 'Calibri'
229
+ normal.font.size = Pt(11)
230
+
231
+ # Configure heading styles
232
+ for i in range(1, 7):
233
+ heading_name = f'Heading {i}'
234
+ if heading_name in styles:
235
+ heading = styles[heading_name]
236
+ heading.font.name = 'Calibri'
237
+ heading.font.size = Pt(18 - i * 2)
238
+
239
+ self.logger.debug("Document styles configured successfully")
240
+
241
+ except Exception as e:
242
+ self.logger.error(f"Style setup failed: {e}")
243
+
244
+ def _apply_page_margins(self, doc: Document, margins: Dict[str, float]) -> None:
245
+ """Apply custom page margins to the document."""
246
+ try:
247
+ section = doc.sections[0]
248
+
249
+ if 'top' in margins:
250
+ section.top_margin = Inches(margins['top'])
251
+ if 'bottom' in margins:
252
+ section.bottom_margin = Inches(margins['bottom'])
253
+ if 'left' in margins:
254
+ section.left_margin = Inches(margins['left'])
255
+ if 'right' in margins:
256
+ section.right_margin = Inches(margins['right'])
257
+
258
+ self.logger.debug(f"Applied page margins: {margins}")
259
+
260
+ except Exception as e:
261
+ self.logger.error(f"Failed to apply page margins: {e}")
262
+
263
+ def _html_to_docx(self, html_content: str, doc: Document) -> None:
264
+ """Convert HTML content to DOCX document."""
265
+ try:
266
+ soup = BeautifulSoup(html_content, 'html.parser')
267
+
268
+ # Process each element in the HTML
269
+ for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'ul', 'ol', 'table', 'br']):
270
+ self._process_html_element(element, doc)
271
+
272
+ except Exception as e:
273
+ self.logger.error(f"HTML to DOCX conversion failed: {e}")
274
+ # Fallback: add as plain text
275
+ doc.add_paragraph(html_content)
276
+
277
+ def _process_html_element(self, element, doc: Document) -> None:
278
+ """Process individual HTML elements."""
279
+ tag_name = element.name.lower()
280
+
281
+ if tag_name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
282
+ level = int(tag_name[1])
283
+ heading_text = self._get_text_content(element)
284
+ if heading_text.strip():
285
+ doc.add_heading(heading_text, level=level)
286
+
287
+ elif tag_name in ['p', 'div']:
288
+ text = self._get_text_content(element)
289
+ if text.strip():
290
+ paragraph = doc.add_paragraph()
291
+ self._add_formatted_text(paragraph, element)
292
+
293
+ elif tag_name == 'table':
294
+ self._process_table(element, doc)
295
+
296
+ elif tag_name in ['ul', 'ol']:
297
+ for li in element.find_all('li', recursive=False):
298
+ text = self._get_text_content(li)
299
+ if text.strip():
300
+ list_style = 'List Bullet' if tag_name == 'ul' else 'List Number'
301
+ doc.add_paragraph(text, style=list_style)
302
+
303
+ elif tag_name == 'br':
304
+ doc.add_paragraph()
305
+
306
+ def _get_text_content(self, element) -> str:
307
+ """Extract text content from HTML element."""
308
+ if isinstance(element, NavigableString):
309
+ return str(element)
310
+
311
+ text_parts = []
312
+ for content in element.contents:
313
+ if isinstance(content, NavigableString):
314
+ text_parts.append(str(content))
315
+ else:
316
+ text_parts.append(self._get_text_content(content))
317
+
318
+ return ''.join(text_parts).strip()
319
+
320
+ def _process_table(self, table_element, doc: Document) -> None:
321
+ """Process HTML table and convert to DOCX table."""
322
+ rows = table_element.find_all('tr')
323
+ if not rows:
324
+ return
325
+
326
+ # Create table with appropriate dimensions
327
+ max_cols = max(len(row.find_all(['td', 'th'])) for row in rows)
328
+ table = doc.add_table(rows=0, cols=max_cols)
329
+ table.style = 'Table Grid'
330
+
331
+ for row in rows:
332
+ cells = row.find_all(['td', 'th'])
333
+ table_row = table.add_row()
334
+
335
+ for i, cell in enumerate(cells):
336
+ if i < len(table_row.cells):
337
+ cell_text = self._get_text_content(cell)
338
+ table_row.cells[i].text = cell_text
339
+
340
+ # Make header cells bold
341
+ if cell.name == 'th':
342
+ for paragraph in table_row.cells[i].paragraphs:
343
+ for run in paragraph.runs:
344
+ run.bold = True
345
+
346
+ def _add_formatted_text(self, paragraph, element) -> None:
347
+ """Add formatted text to paragraph maintaining basic formatting."""
348
+ if isinstance(element, NavigableString):
349
+ paragraph.add_run(str(element))
350
+ return
351
+
352
+ for content in element.contents:
353
+ if isinstance(content, NavigableString):
354
+ run = paragraph.add_run(str(content))
355
+ else:
356
+ text_content = self._get_text_content(content)
357
+ run = paragraph.add_run(text_content)
358
+
359
+ # Apply basic formatting based on HTML tags
360
+ if hasattr(content, 'name'):
361
+ if content.name in ['strong', 'b']:
362
+ run.bold = True
363
+ elif content.name in ['em', 'i']:
364
+ run.italic = True
365
+ elif content.name == 'code':
366
+ run.font.name = 'Courier New'
367
+ run.font.size = Pt(10)
368
+
369
+ async def _generate_document_content(self, content: str, **kwargs) -> bytes:
370
+ """
371
+ Generate Word document content from input.
372
+
373
+ Args:
374
+ content: Input content (text, markdown, or HTML)
375
+ **kwargs: Additional arguments from MSWordArgs
376
+
377
+ Returns:
378
+ DOCX document as bytes
379
+ """
380
+ try:
381
+ # Extract arguments
382
+ template_name = kwargs.get('template_name')
383
+ template_vars = kwargs.get('template_vars')
384
+ docx_template = kwargs.get('docx_template')
385
+ style_config = kwargs.get('style_config')
386
+ page_margins = kwargs.get('page_margins')
387
+
388
+ # Process content through HTML template if provided
389
+ processed_content = self._render_html_template(content, template_name, template_vars)
390
+
391
+ # Detect content type
392
+ content_type = self._detect_content_type(processed_content)
393
+ self.logger.info(f"Detected content type: {content_type}")
394
+
395
+ # Create DOCX document
396
+ doc = self._create_document(docx_template)
397
+
398
+ # Apply page margins if specified
399
+ if page_margins:
400
+ self._apply_page_margins(doc, page_margins)
401
+
402
+ # Convert content to DOCX based on type
403
+ if content_type == 'html':
404
+ self._html_to_docx(processed_content, doc)
405
+ else: # markdown or plain text
406
+ # Preprocess and convert markdown to HTML
407
+ cleaned_content = self._preprocess_markdown(processed_content)
408
+ html_content = self._markdown_to_html(cleaned_content)
409
+ self._html_to_docx(html_content, doc)
410
+
411
+ # Save document to bytes
412
+ doc_bytes = io.BytesIO()
413
+ doc.save(doc_bytes)
414
+ doc_bytes.seek(0)
415
+
416
+ return doc_bytes.getvalue()
417
+
418
+ except Exception as e:
419
+ self.logger.error(f"Error generating Word document: {e}")
420
+ raise
421
+
422
+ async def _execute(
423
+ self,
424
+ content: str,
425
+ output_filename: Optional[str] = None,
426
+ file_prefix: str = "document",
427
+ output_dir: Optional[str] = None,
428
+ overwrite_existing: bool = False,
429
+ template_name: Optional[str] = None,
430
+ template_vars: Optional[Dict[str, Any]] = None,
431
+ docx_template: Optional[str] = None,
432
+ style_config: Optional[Dict[str, Any]] = None,
433
+ page_margins: Optional[Dict[str, float]] = None,
434
+ **kwargs
435
+ ) -> Dict[str, Any]:
436
+ """
437
+ Execute Word document generation (AbstractTool interface).
438
+
439
+ Args:
440
+ content: Content to convert to Word document
441
+ output_filename: Custom filename (without extension)
442
+ file_prefix: Prefix for auto-generated filenames
443
+ output_dir: Custom output directory
444
+ overwrite_existing: Whether to overwrite existing files
445
+ template_name: HTML template name for content processing
446
+ template_vars: Variables for HTML template
447
+ docx_template: DOCX template file path
448
+ style_config: Custom styling configuration
449
+ page_margins: Page margins configuration
450
+ **kwargs: Additional arguments
451
+
452
+ Returns:
453
+ Dictionary with document generation results
454
+ """
455
+ try:
456
+ self.logger.info(
457
+ f"Starting Word document generation with {len(content)} characters of content"
458
+ )
459
+
460
+ # Use the safe document creation workflow
461
+ result = await self._create_document_safely(
462
+ content=content,
463
+ output_filename=output_filename,
464
+ file_prefix=file_prefix,
465
+ output_dir=output_dir,
466
+ overwrite_existing=overwrite_existing or self.overwrite_existing,
467
+ extension="docx",
468
+ template_name=template_name,
469
+ template_vars=template_vars,
470
+ docx_template=docx_template,
471
+ style_config=style_config,
472
+ page_margins=page_margins
473
+ )
474
+
475
+ if result['status'] == 'success':
476
+ self.logger.info(
477
+ f"Word document created successfully: {result['metadata']['filename']}"
478
+ )
479
+
480
+ return result
481
+
482
+ except Exception as e:
483
+ self.logger.error(f"Error in Word document generation: {e}")
484
+ raise
485
+
486
+
487
+ class WordToMarkdownTool(AbstractDocumentTool):
488
+ """
489
+ Tool for converting Word documents to Markdown format.
490
+
491
+ This tool downloads Word documents from URLs and converts them to Markdown
492
+ format for easier processing by LLMs and other text analysis tools.
493
+ """
494
+
495
+ name = "word_to_markdown"
496
+ description = (
497
+ "Convert Word documents to Markdown format from URLs. "
498
+ "Downloads Word documents and converts them to clean Markdown text. "
499
+ "Useful for processing and analyzing Word documents."
500
+ )
501
+
502
+ # Document type configuration
503
+ document_type = "conversion"
504
+ default_extension = "md"
505
+ supported_extensions = [".md", ".txt"]
506
+
507
+ def __init__(self, **kwargs):
508
+ """Initialize the Word to Markdown tool."""
509
+ super().__init__(**kwargs)
510
+ self._temp_dir = None
511
+
512
+ async def _download_file(self, url: str) -> str:
513
+ """Download Word document from URL to temporary file."""
514
+ # Create temporary directory if needed
515
+ if not self._temp_dir:
516
+ self._temp_dir = tempfile.mkdtemp()
517
+
518
+ # Generate filename from URL
519
+ parsed_url = urlparse(url)
520
+ filename = os.path.basename(parsed_url.path)
521
+ if not filename.endswith(('.docx', '.doc')):
522
+ filename += '.docx'
523
+
524
+ file_path = os.path.join(self._temp_dir, filename)
525
+
526
+ # Download file
527
+ async with aiohttp.ClientSession() as session:
528
+ async with session.get(url) as response:
529
+ if response.status != 200:
530
+ raise Exception(f"Download failed with status {response.status}")
531
+
532
+ async with aiofiles.open(file_path, 'wb') as f:
533
+ await f.write(await response.read())
534
+
535
+ self.logger.info(f"Downloaded Word document: {filename}")
536
+ return file_path
537
+
538
+ async def _convert_to_markdown(self, file_path: str) -> str:
539
+ """Convert Word document to Markdown using mammoth."""
540
+ try:
541
+ with open(file_path, "rb") as docx_file:
542
+ result = mammoth.convert_to_html(docx_file)
543
+ html = result.value
544
+ markdown_text = md(html)
545
+
546
+ # Add conversion warnings as comments
547
+ if result.messages:
548
+ warnings = "\n".join([f"<!-- Warning: {msg} -->" for msg in result.messages])
549
+ markdown_text = f"{warnings}\n\n{markdown_text}"
550
+
551
+ return markdown_text
552
+
553
+ except Exception as e:
554
+ self.logger.error(f"Conversion to markdown failed: {e}")
555
+ raise
556
+
557
+ async def _cleanup_temp_files(self, file_path: Optional[str] = None) -> None:
558
+ """Clean up temporary files and directory."""
559
+ try:
560
+ if file_path and os.path.exists(file_path):
561
+ os.remove(file_path)
562
+
563
+ if self._temp_dir and os.path.exists(self._temp_dir):
564
+ if not os.listdir(self._temp_dir): # Only remove if empty
565
+ os.rmdir(self._temp_dir)
566
+ self._temp_dir = None
567
+
568
+ except Exception as e:
569
+ self.logger.warning(f"Cleanup failed: {e}")
570
+
571
+ async def convert_from_url(self, url: str, save_markdown: bool = False, **kwargs) -> Dict[str, Any]:
572
+ """
573
+ Convert Word document from URL to Markdown.
574
+
575
+ Args:
576
+ url: URL of the Word document
577
+ save_markdown: Whether to save the markdown to a file
578
+ **kwargs: Additional arguments for file saving
579
+
580
+ Returns:
581
+ Dictionary with conversion results
582
+ """
583
+ file_path = None
584
+ try:
585
+ # Download the file
586
+ file_path = await self._download_file(url)
587
+
588
+ # Convert to markdown
589
+ markdown_content = await self._convert_to_markdown(file_path)
590
+
591
+ result = {
592
+ "status": "success",
593
+ "markdown_content": markdown_content,
594
+ "source_url": url,
595
+ "content_length": len(markdown_content),
596
+ "message": "Word document converted to Markdown successfully"
597
+ }
598
+
599
+ # Optionally save markdown to file
600
+ if save_markdown:
601
+ file_result = await self._create_document_safely(
602
+ content=markdown_content,
603
+ extension="md",
604
+ **kwargs
605
+ )
606
+ if file_result['status'] == 'success':
607
+ result.update({
608
+ "saved_file": file_result['metadata'],
609
+ "file_path": file_result['metadata']['file_path'],
610
+ "file_url": file_result['metadata']['file_url']
611
+ })
612
+
613
+ return result
614
+
615
+ except Exception as e:
616
+ self.logger.error(f"Word to Markdown conversion failed: {e}")
617
+ return {
618
+ "status": "error",
619
+ "error": str(e),
620
+ "source_url": url,
621
+ "message": f"Failed to convert Word document: {str(e)}"
622
+ }
623
+
624
+ finally:
625
+ # Clean up temporary files
626
+ await self._cleanup_temp_files(file_path)
627
+
628
+ async def _generate_document_content(self, content: str, **kwargs) -> str:
629
+ """Generate markdown content (implementation required by AbstractDocumentTool)."""
630
+ # This tool is primarily for URL conversion, but we implement this for completeness
631
+ return content
632
+
633
+ async def _execute(self, url: str, save_markdown: bool = False, **kwargs) -> Dict[str, Any]:
634
+ """Execute Word to Markdown conversion."""
635
+ return await self.convert_from_url(url, save_markdown, **kwargs)