ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (535) hide show
  1. agentui/.prettierrc +15 -0
  2. agentui/QUICKSTART.md +272 -0
  3. agentui/README.md +59 -0
  4. agentui/env.example +16 -0
  5. agentui/jsconfig.json +14 -0
  6. agentui/package-lock.json +4242 -0
  7. agentui/package.json +34 -0
  8. agentui/scripts/postinstall/apply-patches.mjs +260 -0
  9. agentui/src/app.css +61 -0
  10. agentui/src/app.d.ts +13 -0
  11. agentui/src/app.html +12 -0
  12. agentui/src/components/LoadingSpinner.svelte +64 -0
  13. agentui/src/components/ThemeSwitcher.svelte +159 -0
  14. agentui/src/components/index.js +4 -0
  15. agentui/src/lib/api/bots.ts +60 -0
  16. agentui/src/lib/api/chat.ts +22 -0
  17. agentui/src/lib/api/http.ts +25 -0
  18. agentui/src/lib/components/BotCard.svelte +33 -0
  19. agentui/src/lib/components/ChatBubble.svelte +63 -0
  20. agentui/src/lib/components/Toast.svelte +21 -0
  21. agentui/src/lib/config.ts +20 -0
  22. agentui/src/lib/stores/auth.svelte.ts +73 -0
  23. agentui/src/lib/stores/theme.svelte.js +64 -0
  24. agentui/src/lib/stores/toast.svelte.ts +31 -0
  25. agentui/src/lib/utils/conversation.ts +39 -0
  26. agentui/src/routes/+layout.svelte +20 -0
  27. agentui/src/routes/+page.svelte +232 -0
  28. agentui/src/routes/login/+page.svelte +200 -0
  29. agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
  30. agentui/src/routes/talk/[agentId]/+page.ts +7 -0
  31. agentui/static/README.md +1 -0
  32. agentui/svelte.config.js +11 -0
  33. agentui/tailwind.config.ts +53 -0
  34. agentui/tsconfig.json +3 -0
  35. agentui/vite.config.ts +10 -0
  36. ai_parrot-0.17.2.dist-info/METADATA +472 -0
  37. ai_parrot-0.17.2.dist-info/RECORD +535 -0
  38. ai_parrot-0.17.2.dist-info/WHEEL +6 -0
  39. ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
  40. ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
  41. ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
  42. crew-builder/.prettierrc +15 -0
  43. crew-builder/QUICKSTART.md +259 -0
  44. crew-builder/README.md +113 -0
  45. crew-builder/env.example +17 -0
  46. crew-builder/jsconfig.json +14 -0
  47. crew-builder/package-lock.json +4182 -0
  48. crew-builder/package.json +37 -0
  49. crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
  50. crew-builder/src/app.css +62 -0
  51. crew-builder/src/app.d.ts +13 -0
  52. crew-builder/src/app.html +12 -0
  53. crew-builder/src/components/LoadingSpinner.svelte +64 -0
  54. crew-builder/src/components/ThemeSwitcher.svelte +149 -0
  55. crew-builder/src/components/index.js +9 -0
  56. crew-builder/src/lib/api/bots.ts +60 -0
  57. crew-builder/src/lib/api/chat.ts +80 -0
  58. crew-builder/src/lib/api/client.ts +56 -0
  59. crew-builder/src/lib/api/crew/crew.ts +136 -0
  60. crew-builder/src/lib/api/index.ts +5 -0
  61. crew-builder/src/lib/api/o365/auth.ts +65 -0
  62. crew-builder/src/lib/auth/auth.ts +54 -0
  63. crew-builder/src/lib/components/AgentNode.svelte +43 -0
  64. crew-builder/src/lib/components/BotCard.svelte +33 -0
  65. crew-builder/src/lib/components/ChatBubble.svelte +67 -0
  66. crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
  67. crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
  68. crew-builder/src/lib/components/JsonViewer.svelte +24 -0
  69. crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
  70. crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
  71. crew-builder/src/lib/components/Toast.svelte +67 -0
  72. crew-builder/src/lib/components/Toolbar.svelte +157 -0
  73. crew-builder/src/lib/components/index.ts +10 -0
  74. crew-builder/src/lib/config.ts +8 -0
  75. crew-builder/src/lib/stores/auth.svelte.ts +228 -0
  76. crew-builder/src/lib/stores/crewStore.ts +369 -0
  77. crew-builder/src/lib/stores/theme.svelte.js +145 -0
  78. crew-builder/src/lib/stores/toast.svelte.ts +69 -0
  79. crew-builder/src/lib/utils/conversation.ts +39 -0
  80. crew-builder/src/lib/utils/markdown.ts +122 -0
  81. crew-builder/src/lib/utils/talkHistory.ts +47 -0
  82. crew-builder/src/routes/+layout.svelte +20 -0
  83. crew-builder/src/routes/+page.svelte +539 -0
  84. crew-builder/src/routes/agents/+page.svelte +247 -0
  85. crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
  86. crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
  87. crew-builder/src/routes/builder/+page.svelte +204 -0
  88. crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
  89. crew-builder/src/routes/crew/ask/+page.ts +1 -0
  90. crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
  91. crew-builder/src/routes/login/+page.svelte +197 -0
  92. crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
  93. crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
  94. crew-builder/static/README.md +1 -0
  95. crew-builder/svelte.config.js +11 -0
  96. crew-builder/tailwind.config.ts +53 -0
  97. crew-builder/tsconfig.json +3 -0
  98. crew-builder/vite.config.ts +10 -0
  99. mcp_servers/calculator_server.py +309 -0
  100. parrot/__init__.py +27 -0
  101. parrot/__pycache__/__init__.cpython-310.pyc +0 -0
  102. parrot/__pycache__/version.cpython-310.pyc +0 -0
  103. parrot/_version.py +34 -0
  104. parrot/a2a/__init__.py +48 -0
  105. parrot/a2a/client.py +658 -0
  106. parrot/a2a/discovery.py +89 -0
  107. parrot/a2a/mixin.py +257 -0
  108. parrot/a2a/models.py +376 -0
  109. parrot/a2a/server.py +770 -0
  110. parrot/agents/__init__.py +29 -0
  111. parrot/bots/__init__.py +12 -0
  112. parrot/bots/a2a_agent.py +19 -0
  113. parrot/bots/abstract.py +3139 -0
  114. parrot/bots/agent.py +1129 -0
  115. parrot/bots/basic.py +9 -0
  116. parrot/bots/chatbot.py +669 -0
  117. parrot/bots/data.py +1618 -0
  118. parrot/bots/database/__init__.py +5 -0
  119. parrot/bots/database/abstract.py +3071 -0
  120. parrot/bots/database/cache.py +286 -0
  121. parrot/bots/database/models.py +468 -0
  122. parrot/bots/database/prompts.py +154 -0
  123. parrot/bots/database/retries.py +98 -0
  124. parrot/bots/database/router.py +269 -0
  125. parrot/bots/database/sql.py +41 -0
  126. parrot/bots/db/__init__.py +6 -0
  127. parrot/bots/db/abstract.py +556 -0
  128. parrot/bots/db/bigquery.py +602 -0
  129. parrot/bots/db/cache.py +85 -0
  130. parrot/bots/db/documentdb.py +668 -0
  131. parrot/bots/db/elastic.py +1014 -0
  132. parrot/bots/db/influx.py +898 -0
  133. parrot/bots/db/mock.py +96 -0
  134. parrot/bots/db/multi.py +783 -0
  135. parrot/bots/db/prompts.py +185 -0
  136. parrot/bots/db/sql.py +1255 -0
  137. parrot/bots/db/tools.py +212 -0
  138. parrot/bots/document.py +680 -0
  139. parrot/bots/hrbot.py +15 -0
  140. parrot/bots/kb.py +170 -0
  141. parrot/bots/mcp.py +36 -0
  142. parrot/bots/orchestration/README.md +463 -0
  143. parrot/bots/orchestration/__init__.py +1 -0
  144. parrot/bots/orchestration/agent.py +155 -0
  145. parrot/bots/orchestration/crew.py +3330 -0
  146. parrot/bots/orchestration/fsm.py +1179 -0
  147. parrot/bots/orchestration/hr.py +434 -0
  148. parrot/bots/orchestration/storage/__init__.py +4 -0
  149. parrot/bots/orchestration/storage/memory.py +100 -0
  150. parrot/bots/orchestration/storage/mixin.py +119 -0
  151. parrot/bots/orchestration/verify.py +202 -0
  152. parrot/bots/product.py +204 -0
  153. parrot/bots/prompts/__init__.py +96 -0
  154. parrot/bots/prompts/agents.py +155 -0
  155. parrot/bots/prompts/data.py +216 -0
  156. parrot/bots/prompts/output_generation.py +8 -0
  157. parrot/bots/scraper/__init__.py +3 -0
  158. parrot/bots/scraper/models.py +122 -0
  159. parrot/bots/scraper/scraper.py +1173 -0
  160. parrot/bots/scraper/templates.py +115 -0
  161. parrot/bots/stores/__init__.py +5 -0
  162. parrot/bots/stores/local.py +172 -0
  163. parrot/bots/webdev.py +81 -0
  164. parrot/cli.py +17 -0
  165. parrot/clients/__init__.py +16 -0
  166. parrot/clients/base.py +1491 -0
  167. parrot/clients/claude.py +1191 -0
  168. parrot/clients/factory.py +129 -0
  169. parrot/clients/google.py +4567 -0
  170. parrot/clients/gpt.py +1975 -0
  171. parrot/clients/grok.py +432 -0
  172. parrot/clients/groq.py +986 -0
  173. parrot/clients/hf.py +582 -0
  174. parrot/clients/models.py +18 -0
  175. parrot/conf.py +395 -0
  176. parrot/embeddings/__init__.py +9 -0
  177. parrot/embeddings/base.py +157 -0
  178. parrot/embeddings/google.py +98 -0
  179. parrot/embeddings/huggingface.py +74 -0
  180. parrot/embeddings/openai.py +84 -0
  181. parrot/embeddings/processor.py +88 -0
  182. parrot/exceptions.c +13868 -0
  183. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  184. parrot/exceptions.pxd +22 -0
  185. parrot/exceptions.pxi +15 -0
  186. parrot/exceptions.pyx +44 -0
  187. parrot/generators/__init__.py +29 -0
  188. parrot/generators/base.py +200 -0
  189. parrot/generators/html.py +293 -0
  190. parrot/generators/react.py +205 -0
  191. parrot/generators/streamlit.py +203 -0
  192. parrot/generators/template.py +105 -0
  193. parrot/handlers/__init__.py +4 -0
  194. parrot/handlers/agent.py +861 -0
  195. parrot/handlers/agents/__init__.py +1 -0
  196. parrot/handlers/agents/abstract.py +900 -0
  197. parrot/handlers/bots.py +338 -0
  198. parrot/handlers/chat.py +915 -0
  199. parrot/handlers/creation.sql +192 -0
  200. parrot/handlers/crew/ARCHITECTURE.md +362 -0
  201. parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
  202. parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
  203. parrot/handlers/crew/__init__.py +0 -0
  204. parrot/handlers/crew/handler.py +801 -0
  205. parrot/handlers/crew/models.py +229 -0
  206. parrot/handlers/crew/redis_persistence.py +523 -0
  207. parrot/handlers/jobs/__init__.py +10 -0
  208. parrot/handlers/jobs/job.py +384 -0
  209. parrot/handlers/jobs/mixin.py +627 -0
  210. parrot/handlers/jobs/models.py +115 -0
  211. parrot/handlers/jobs/worker.py +31 -0
  212. parrot/handlers/models.py +596 -0
  213. parrot/handlers/o365_auth.py +105 -0
  214. parrot/handlers/stream.py +337 -0
  215. parrot/interfaces/__init__.py +6 -0
  216. parrot/interfaces/aws.py +143 -0
  217. parrot/interfaces/credentials.py +113 -0
  218. parrot/interfaces/database.py +27 -0
  219. parrot/interfaces/google.py +1123 -0
  220. parrot/interfaces/hierarchy.py +1227 -0
  221. parrot/interfaces/http.py +651 -0
  222. parrot/interfaces/images/__init__.py +0 -0
  223. parrot/interfaces/images/plugins/__init__.py +24 -0
  224. parrot/interfaces/images/plugins/abstract.py +58 -0
  225. parrot/interfaces/images/plugins/analisys.py +148 -0
  226. parrot/interfaces/images/plugins/classify.py +150 -0
  227. parrot/interfaces/images/plugins/classifybase.py +182 -0
  228. parrot/interfaces/images/plugins/detect.py +150 -0
  229. parrot/interfaces/images/plugins/exif.py +1103 -0
  230. parrot/interfaces/images/plugins/hash.py +52 -0
  231. parrot/interfaces/images/plugins/vision.py +104 -0
  232. parrot/interfaces/images/plugins/yolo.py +66 -0
  233. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  234. parrot/interfaces/o365.py +978 -0
  235. parrot/interfaces/onedrive.py +822 -0
  236. parrot/interfaces/sharepoint.py +1435 -0
  237. parrot/interfaces/soap.py +257 -0
  238. parrot/loaders/__init__.py +8 -0
  239. parrot/loaders/abstract.py +1131 -0
  240. parrot/loaders/audio.py +199 -0
  241. parrot/loaders/basepdf.py +53 -0
  242. parrot/loaders/basevideo.py +1568 -0
  243. parrot/loaders/csv.py +409 -0
  244. parrot/loaders/docx.py +116 -0
  245. parrot/loaders/epubloader.py +316 -0
  246. parrot/loaders/excel.py +199 -0
  247. parrot/loaders/factory.py +55 -0
  248. parrot/loaders/files/__init__.py +0 -0
  249. parrot/loaders/files/abstract.py +39 -0
  250. parrot/loaders/files/html.py +26 -0
  251. parrot/loaders/files/text.py +63 -0
  252. parrot/loaders/html.py +152 -0
  253. parrot/loaders/markdown.py +442 -0
  254. parrot/loaders/pdf.py +373 -0
  255. parrot/loaders/pdfmark.py +320 -0
  256. parrot/loaders/pdftables.py +506 -0
  257. parrot/loaders/ppt.py +476 -0
  258. parrot/loaders/qa.py +63 -0
  259. parrot/loaders/splitters/__init__.py +10 -0
  260. parrot/loaders/splitters/base.py +138 -0
  261. parrot/loaders/splitters/md.py +228 -0
  262. parrot/loaders/splitters/token.py +143 -0
  263. parrot/loaders/txt.py +26 -0
  264. parrot/loaders/video.py +89 -0
  265. parrot/loaders/videolocal.py +218 -0
  266. parrot/loaders/videounderstanding.py +377 -0
  267. parrot/loaders/vimeo.py +167 -0
  268. parrot/loaders/web.py +599 -0
  269. parrot/loaders/youtube.py +504 -0
  270. parrot/manager/__init__.py +5 -0
  271. parrot/manager/manager.py +1030 -0
  272. parrot/mcp/__init__.py +28 -0
  273. parrot/mcp/adapter.py +105 -0
  274. parrot/mcp/cli.py +174 -0
  275. parrot/mcp/client.py +119 -0
  276. parrot/mcp/config.py +75 -0
  277. parrot/mcp/integration.py +842 -0
  278. parrot/mcp/oauth.py +933 -0
  279. parrot/mcp/server.py +225 -0
  280. parrot/mcp/transports/__init__.py +3 -0
  281. parrot/mcp/transports/base.py +279 -0
  282. parrot/mcp/transports/grpc_session.py +163 -0
  283. parrot/mcp/transports/http.py +312 -0
  284. parrot/mcp/transports/mcp.proto +108 -0
  285. parrot/mcp/transports/quic.py +1082 -0
  286. parrot/mcp/transports/sse.py +330 -0
  287. parrot/mcp/transports/stdio.py +309 -0
  288. parrot/mcp/transports/unix.py +395 -0
  289. parrot/mcp/transports/websocket.py +547 -0
  290. parrot/memory/__init__.py +16 -0
  291. parrot/memory/abstract.py +209 -0
  292. parrot/memory/agent.py +32 -0
  293. parrot/memory/cache.py +175 -0
  294. parrot/memory/core.py +555 -0
  295. parrot/memory/file.py +153 -0
  296. parrot/memory/mem.py +131 -0
  297. parrot/memory/redis.py +613 -0
  298. parrot/models/__init__.py +46 -0
  299. parrot/models/basic.py +118 -0
  300. parrot/models/compliance.py +208 -0
  301. parrot/models/crew.py +395 -0
  302. parrot/models/detections.py +654 -0
  303. parrot/models/generation.py +85 -0
  304. parrot/models/google.py +223 -0
  305. parrot/models/groq.py +23 -0
  306. parrot/models/openai.py +30 -0
  307. parrot/models/outputs.py +285 -0
  308. parrot/models/responses.py +938 -0
  309. parrot/notifications/__init__.py +743 -0
  310. parrot/openapi/__init__.py +3 -0
  311. parrot/openapi/components.yaml +641 -0
  312. parrot/openapi/config.py +322 -0
  313. parrot/outputs/__init__.py +32 -0
  314. parrot/outputs/formats/__init__.py +108 -0
  315. parrot/outputs/formats/altair.py +359 -0
  316. parrot/outputs/formats/application.py +122 -0
  317. parrot/outputs/formats/base.py +351 -0
  318. parrot/outputs/formats/bokeh.py +356 -0
  319. parrot/outputs/formats/card.py +424 -0
  320. parrot/outputs/formats/chart.py +436 -0
  321. parrot/outputs/formats/d3.py +255 -0
  322. parrot/outputs/formats/echarts.py +310 -0
  323. parrot/outputs/formats/generators/__init__.py +0 -0
  324. parrot/outputs/formats/generators/abstract.py +61 -0
  325. parrot/outputs/formats/generators/panel.py +145 -0
  326. parrot/outputs/formats/generators/streamlit.py +86 -0
  327. parrot/outputs/formats/generators/terminal.py +63 -0
  328. parrot/outputs/formats/holoviews.py +310 -0
  329. parrot/outputs/formats/html.py +147 -0
  330. parrot/outputs/formats/jinja2.py +46 -0
  331. parrot/outputs/formats/json.py +87 -0
  332. parrot/outputs/formats/map.py +933 -0
  333. parrot/outputs/formats/markdown.py +172 -0
  334. parrot/outputs/formats/matplotlib.py +237 -0
  335. parrot/outputs/formats/mixins/__init__.py +0 -0
  336. parrot/outputs/formats/mixins/emaps.py +855 -0
  337. parrot/outputs/formats/plotly.py +341 -0
  338. parrot/outputs/formats/seaborn.py +310 -0
  339. parrot/outputs/formats/table.py +397 -0
  340. parrot/outputs/formats/template_report.py +138 -0
  341. parrot/outputs/formats/yaml.py +125 -0
  342. parrot/outputs/formatter.py +152 -0
  343. parrot/outputs/templates/__init__.py +95 -0
  344. parrot/pipelines/__init__.py +0 -0
  345. parrot/pipelines/abstract.py +210 -0
  346. parrot/pipelines/detector.py +124 -0
  347. parrot/pipelines/models.py +90 -0
  348. parrot/pipelines/planogram.py +3002 -0
  349. parrot/pipelines/table.sql +97 -0
  350. parrot/plugins/__init__.py +106 -0
  351. parrot/plugins/importer.py +80 -0
  352. parrot/py.typed +0 -0
  353. parrot/registry/__init__.py +18 -0
  354. parrot/registry/registry.py +594 -0
  355. parrot/scheduler/__init__.py +1189 -0
  356. parrot/scheduler/models.py +60 -0
  357. parrot/security/__init__.py +16 -0
  358. parrot/security/prompt_injection.py +268 -0
  359. parrot/security/security_events.sql +25 -0
  360. parrot/services/__init__.py +1 -0
  361. parrot/services/mcp/__init__.py +8 -0
  362. parrot/services/mcp/config.py +13 -0
  363. parrot/services/mcp/server.py +295 -0
  364. parrot/services/o365_remote_auth.py +235 -0
  365. parrot/stores/__init__.py +7 -0
  366. parrot/stores/abstract.py +352 -0
  367. parrot/stores/arango.py +1090 -0
  368. parrot/stores/bigquery.py +1377 -0
  369. parrot/stores/cache.py +106 -0
  370. parrot/stores/empty.py +10 -0
  371. parrot/stores/faiss_store.py +1157 -0
  372. parrot/stores/kb/__init__.py +9 -0
  373. parrot/stores/kb/abstract.py +68 -0
  374. parrot/stores/kb/cache.py +165 -0
  375. parrot/stores/kb/doc.py +325 -0
  376. parrot/stores/kb/hierarchy.py +346 -0
  377. parrot/stores/kb/local.py +457 -0
  378. parrot/stores/kb/prompt.py +28 -0
  379. parrot/stores/kb/redis.py +659 -0
  380. parrot/stores/kb/store.py +115 -0
  381. parrot/stores/kb/user.py +374 -0
  382. parrot/stores/models.py +59 -0
  383. parrot/stores/pgvector.py +3 -0
  384. parrot/stores/postgres.py +2853 -0
  385. parrot/stores/utils/__init__.py +0 -0
  386. parrot/stores/utils/chunking.py +197 -0
  387. parrot/telemetry/__init__.py +3 -0
  388. parrot/telemetry/mixin.py +111 -0
  389. parrot/template/__init__.py +3 -0
  390. parrot/template/engine.py +259 -0
  391. parrot/tools/__init__.py +23 -0
  392. parrot/tools/abstract.py +644 -0
  393. parrot/tools/agent.py +363 -0
  394. parrot/tools/arangodbsearch.py +537 -0
  395. parrot/tools/arxiv_tool.py +188 -0
  396. parrot/tools/calculator/__init__.py +3 -0
  397. parrot/tools/calculator/operations/__init__.py +38 -0
  398. parrot/tools/calculator/operations/calculus.py +80 -0
  399. parrot/tools/calculator/operations/statistics.py +76 -0
  400. parrot/tools/calculator/tool.py +150 -0
  401. parrot/tools/cloudwatch.py +988 -0
  402. parrot/tools/codeinterpreter/__init__.py +127 -0
  403. parrot/tools/codeinterpreter/executor.py +371 -0
  404. parrot/tools/codeinterpreter/internals.py +473 -0
  405. parrot/tools/codeinterpreter/models.py +643 -0
  406. parrot/tools/codeinterpreter/prompts.py +224 -0
  407. parrot/tools/codeinterpreter/tool.py +664 -0
  408. parrot/tools/company_info/__init__.py +6 -0
  409. parrot/tools/company_info/tool.py +1138 -0
  410. parrot/tools/correlationanalysis.py +437 -0
  411. parrot/tools/database/abstract.py +286 -0
  412. parrot/tools/database/bq.py +115 -0
  413. parrot/tools/database/cache.py +284 -0
  414. parrot/tools/database/models.py +95 -0
  415. parrot/tools/database/pg.py +343 -0
  416. parrot/tools/databasequery.py +1159 -0
  417. parrot/tools/db.py +1800 -0
  418. parrot/tools/ddgo.py +370 -0
  419. parrot/tools/decorators.py +271 -0
  420. parrot/tools/dftohtml.py +282 -0
  421. parrot/tools/document.py +549 -0
  422. parrot/tools/ecs.py +819 -0
  423. parrot/tools/edareport.py +368 -0
  424. parrot/tools/elasticsearch.py +1049 -0
  425. parrot/tools/employees.py +462 -0
  426. parrot/tools/epson/__init__.py +96 -0
  427. parrot/tools/excel.py +683 -0
  428. parrot/tools/file/__init__.py +13 -0
  429. parrot/tools/file/abstract.py +76 -0
  430. parrot/tools/file/gcs.py +378 -0
  431. parrot/tools/file/local.py +284 -0
  432. parrot/tools/file/s3.py +511 -0
  433. parrot/tools/file/tmp.py +309 -0
  434. parrot/tools/file/tool.py +501 -0
  435. parrot/tools/file_reader.py +129 -0
  436. parrot/tools/flowtask/__init__.py +19 -0
  437. parrot/tools/flowtask/tool.py +761 -0
  438. parrot/tools/gittoolkit.py +508 -0
  439. parrot/tools/google/__init__.py +18 -0
  440. parrot/tools/google/base.py +169 -0
  441. parrot/tools/google/tools.py +1251 -0
  442. parrot/tools/googlelocation.py +5 -0
  443. parrot/tools/googleroutes.py +5 -0
  444. parrot/tools/googlesearch.py +5 -0
  445. parrot/tools/googlesitesearch.py +5 -0
  446. parrot/tools/googlevoice.py +2 -0
  447. parrot/tools/gvoice.py +695 -0
  448. parrot/tools/ibisworld/README.md +225 -0
  449. parrot/tools/ibisworld/__init__.py +11 -0
  450. parrot/tools/ibisworld/tool.py +366 -0
  451. parrot/tools/jiratoolkit.py +1718 -0
  452. parrot/tools/manager.py +1098 -0
  453. parrot/tools/math.py +152 -0
  454. parrot/tools/metadata.py +476 -0
  455. parrot/tools/msteams.py +1621 -0
  456. parrot/tools/msword.py +635 -0
  457. parrot/tools/multidb.py +580 -0
  458. parrot/tools/multistoresearch.py +369 -0
  459. parrot/tools/networkninja.py +167 -0
  460. parrot/tools/nextstop/__init__.py +4 -0
  461. parrot/tools/nextstop/base.py +286 -0
  462. parrot/tools/nextstop/employee.py +733 -0
  463. parrot/tools/nextstop/store.py +462 -0
  464. parrot/tools/notification.py +435 -0
  465. parrot/tools/o365/__init__.py +42 -0
  466. parrot/tools/o365/base.py +295 -0
  467. parrot/tools/o365/bundle.py +522 -0
  468. parrot/tools/o365/events.py +554 -0
  469. parrot/tools/o365/mail.py +992 -0
  470. parrot/tools/o365/onedrive.py +497 -0
  471. parrot/tools/o365/sharepoint.py +641 -0
  472. parrot/tools/openapi_toolkit.py +904 -0
  473. parrot/tools/openweather.py +527 -0
  474. parrot/tools/pdfprint.py +1001 -0
  475. parrot/tools/powerbi.py +518 -0
  476. parrot/tools/powerpoint.py +1113 -0
  477. parrot/tools/pricestool.py +146 -0
  478. parrot/tools/products/__init__.py +246 -0
  479. parrot/tools/prophet_tool.py +171 -0
  480. parrot/tools/pythonpandas.py +630 -0
  481. parrot/tools/pythonrepl.py +910 -0
  482. parrot/tools/qsource.py +436 -0
  483. parrot/tools/querytoolkit.py +395 -0
  484. parrot/tools/quickeda.py +827 -0
  485. parrot/tools/resttool.py +553 -0
  486. parrot/tools/retail/__init__.py +0 -0
  487. parrot/tools/retail/bby.py +528 -0
  488. parrot/tools/sandboxtool.py +703 -0
  489. parrot/tools/sassie/__init__.py +352 -0
  490. parrot/tools/scraping/__init__.py +7 -0
  491. parrot/tools/scraping/docs/select.md +466 -0
  492. parrot/tools/scraping/documentation.md +1278 -0
  493. parrot/tools/scraping/driver.py +436 -0
  494. parrot/tools/scraping/models.py +576 -0
  495. parrot/tools/scraping/options.py +85 -0
  496. parrot/tools/scraping/orchestrator.py +517 -0
  497. parrot/tools/scraping/readme.md +740 -0
  498. parrot/tools/scraping/tool.py +3115 -0
  499. parrot/tools/seasonaldetection.py +642 -0
  500. parrot/tools/shell_tool/__init__.py +5 -0
  501. parrot/tools/shell_tool/actions.py +408 -0
  502. parrot/tools/shell_tool/engine.py +155 -0
  503. parrot/tools/shell_tool/models.py +322 -0
  504. parrot/tools/shell_tool/tool.py +442 -0
  505. parrot/tools/site_search.py +214 -0
  506. parrot/tools/textfile.py +418 -0
  507. parrot/tools/think.py +378 -0
  508. parrot/tools/toolkit.py +298 -0
  509. parrot/tools/webapp_tool.py +187 -0
  510. parrot/tools/whatif.py +1279 -0
  511. parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
  512. parrot/tools/workday/__init__.py +6 -0
  513. parrot/tools/workday/models.py +1389 -0
  514. parrot/tools/workday/tool.py +1293 -0
  515. parrot/tools/yfinance_tool.py +306 -0
  516. parrot/tools/zipcode.py +217 -0
  517. parrot/utils/__init__.py +2 -0
  518. parrot/utils/helpers.py +73 -0
  519. parrot/utils/parsers/__init__.py +5 -0
  520. parrot/utils/parsers/toml.c +12078 -0
  521. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  522. parrot/utils/parsers/toml.pyx +21 -0
  523. parrot/utils/toml.py +11 -0
  524. parrot/utils/types.cpp +20936 -0
  525. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  526. parrot/utils/types.pyx +213 -0
  527. parrot/utils/uv.py +11 -0
  528. parrot/version.py +10 -0
  529. parrot/yaml-rs/Cargo.lock +350 -0
  530. parrot/yaml-rs/Cargo.toml +19 -0
  531. parrot/yaml-rs/pyproject.toml +19 -0
  532. parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
  533. parrot/yaml-rs/src/lib.rs +222 -0
  534. requirements/docker-compose.yml +24 -0
  535. requirements/requirements-dev.txt +21 -0
@@ -0,0 +1,316 @@
1
+ from typing import List, Optional, Union, Tuple, Dict
2
+ from pathlib import PurePath
3
+ from collections.abc import Callable
4
+ from ..stores.models import Document
5
+ from .abstract import AbstractLoader
6
+
7
+ # Optional deps: install via
8
+ # pip install ebooklib beautifulsoup4 markdownify
9
+ try:
10
+ from ebooklib import epub
11
+ EBOOKLIB_AVAILABLE = True
12
+ try:
13
+ ITEM_DOCUMENT = epub.ITEM_DOCUMENT
14
+ except AttributeError:
15
+ try:
16
+ from ebooklib.epub import ITEM_DOCUMENT
17
+ except ImportError:
18
+ ITEM_DOCUMENT = 9 # Known constant value
19
+ except Exception:
20
+ EBOOKLIB_AVAILABLE = False
21
+
22
+ try:
23
+ from bs4 import BeautifulSoup
24
+ BS4_AVAILABLE = True
25
+ except Exception:
26
+ BS4_AVAILABLE = False
27
+
28
+ try:
29
+ from markdownify import MarkdownConverter
30
+ MD_AVAILABLE = True
31
+ except Exception:
32
+ MD_AVAILABLE = False
33
+
34
+
35
+ class EpubLoader(AbstractLoader):
36
+ """
37
+ EPUB loader that extracts clean Markdown (or plain text) from chapters/sections.
38
+
39
+ Features:
40
+ - Per-chapter documents with titles from TOC/HTML
41
+ - Optional full-book document (merged)
42
+ - Clean Markdown conversion (lists, headers, links)
43
+ - Skips non-document items (css, images, fonts)
44
+ - Configurable minimum content length
45
+ """
46
+
47
+ extensions: List[str] = ['.epub']
48
+
49
+ def __init__(
50
+ self,
51
+ source: Optional[Union[str, PurePath, List[PurePath]]] = None,
52
+ *,
53
+ tokenizer: Union[str, Callable] = None,
54
+ text_splitter: Union[str, Callable] = None,
55
+ source_type: str = 'file',
56
+
57
+ # Output controls
58
+ as_markdown: bool = True, # emit markdown instead of plain text
59
+ per_chapter: bool = True, # True => one Document per chapter; False => single full-book doc
60
+ include_toc_document: bool = False,# optional separate TOC document
61
+ min_section_length: int = 50, # drop tiny/empty sections
62
+
63
+ # Markdown conversion tuning
64
+ heading_style: str = "ATX", # for markdownify; "ATX" => # Heading
65
+ strip_whitespace: bool = True,
66
+
67
+ **kwargs
68
+ ):
69
+ super().__init__(
70
+ source,
71
+ tokenizer=tokenizer,
72
+ text_splitter=text_splitter,
73
+ source_type=source_type,
74
+ **kwargs
75
+ )
76
+ self.doctype = 'epub'
77
+ self._source_type = 'ebook'
78
+
79
+ # Options
80
+ self.as_markdown = as_markdown
81
+ self.per_chapter = per_chapter
82
+ self.include_toc_document = include_toc_document
83
+ self.min_section_length = int(min_section_length)
84
+ self.strip_whitespace = bool(strip_whitespace)
85
+ self.heading_style = heading_style
86
+
87
+ # sanity checks
88
+ if not EBOOKLIB_AVAILABLE or not BS4_AVAILABLE:
89
+ missing = []
90
+ if not EBOOKLIB_AVAILABLE:
91
+ missing.append("ebooklib")
92
+ if not BS4_AVAILABLE:
93
+ missing.append("beautifulsoup4")
94
+ raise ImportError(
95
+ f"EpubLoader requires {', '.join(missing)}. "
96
+ f"Install with: pip install ebooklib beautifulsoup4"
97
+ )
98
+
99
+ def _html_to_markdown(self, html: str) -> str:
100
+ """Convert XHTML chapter html to Markdown (fallback to plain text)."""
101
+ soup = BeautifulSoup(html, "html.parser")
102
+
103
+ # remove scripts/styles
104
+ for bad in soup(["script", "style", "noscript"]):
105
+ bad.decompose()
106
+
107
+ if MD_AVAILABLE and self.as_markdown:
108
+ md = MarkdownConverter(
109
+ heading_style=self.heading_style,
110
+ strip=['style', 'script', 'noscript']
111
+ ).convert_soup(soup)
112
+ return self._clean(md)
113
+
114
+ # plain text fallback
115
+ text = soup.get_text("\n", strip=True)
116
+ return self._clean(text)
117
+
118
+ def _clean(self, text: str) -> str:
119
+ if not text:
120
+ return ""
121
+ if self.strip_whitespace:
122
+ # Normalize multiple blank lines; trim trailing spaces
123
+ lines = [ln.rstrip() for ln in text.splitlines()]
124
+ # Collapse >2 blank lines to just one
125
+ cleaned = []
126
+ blank = 0
127
+ for ln in lines:
128
+ if ln.strip():
129
+ blank = 0
130
+ cleaned.append(ln)
131
+ else:
132
+ blank += 1
133
+ if blank <= 1:
134
+ cleaned.append("")
135
+ text = "\n".join(cleaned)
136
+ return text.strip()
137
+
138
+ def _flatten_toc(self, toc) -> List[Tuple[str, str]]:
139
+ """
140
+ Flatten ebooklib TOC into a list of (href, title) entries.
141
+ toc entries are like: Link(title, href) or nested lists/tuples.
142
+ """
143
+ flat = []
144
+
145
+ def _walk(node):
146
+ if isinstance(node, (list, tuple)):
147
+ for child in node:
148
+ _walk(child)
149
+ else:
150
+ # epub.Link or epub.Section
151
+ try:
152
+ href = getattr(node, "href", None)
153
+ title = getattr(node, "title", None)
154
+ if href and title:
155
+ flat.append((href.split("#", 1)[0], str(title)))
156
+ except Exception:
157
+ pass
158
+
159
+ _walk(toc)
160
+ return flat
161
+
162
+ def _toc_title_lookup(self, book: "epub.EpubBook") -> Dict[str, str]:
163
+ """
164
+ Build a mapping from href→title using TOC (best effort).
165
+ Keys are hrefs without fragments; values are strings.
166
+ """
167
+ try:
168
+ flat = self._flatten_toc(book.toc or [])
169
+ # Normalize: keep last title if duplicates
170
+ return {href: title for href, title in flat}
171
+ except Exception:
172
+ return {}
173
+
174
+ def _iter_document_items(self, book: "epub.EpubBook"):
175
+ """
176
+ Yield (order_idx, item) for spine items that are HTML documents.
177
+ """
178
+ id_to_item = {it.get_id(): it for it in book.get_items()}
179
+ order = 0
180
+ for entry in (book.spine or []):
181
+ if isinstance(entry, tuple) and entry and isinstance(entry[0], str):
182
+ idref = entry[0]
183
+ item = id_to_item.get(idref)
184
+ if item is None:
185
+ continue
186
+ if item.get_type() == ITEM_DOCUMENT:
187
+ yield order, item
188
+ order += 1
189
+
190
+ if order == 0:
191
+ for i, item in enumerate(book.get_items_of_type(ITEM_DOCUMENT)):
192
+ yield i, item
193
+
194
+ def _derive_title_from_html(self, html: str) -> Optional[str]:
195
+ soup = BeautifulSoup(html, "html.parser")
196
+ # Try <title>
197
+ if soup.title and soup.title.string:
198
+ t = soup.title.string.strip()
199
+ if t:
200
+ return t
201
+ # Try first heading
202
+ for tag in ["h1", "h2", "h3"]:
203
+ h = soup.find(tag)
204
+ if h and h.get_text(strip=True):
205
+ return h.get_text(strip=True)
206
+ return None
207
+
208
+ async def _load(self, path: PurePath, **kwargs) -> List[Document]:
209
+ """
210
+ Load an EPUB file into Parrot Documents.
211
+
212
+ Returns:
213
+ - Per-chapter Documents (default), or
214
+ - Single full-book Document if per_chapter=False
215
+ """
216
+ self.logger.info(f"Loading EPUB file: {path}")
217
+
218
+ docs: List[Document] = []
219
+ try:
220
+ book = epub.read_epub(str(path))
221
+ except Exception as e:
222
+ self.logger.error(f"Failed to open EPUB {path}: {e}")
223
+ return docs
224
+
225
+ toc_map = self._toc_title_lookup(book)
226
+
227
+ # Optionally create a separate TOC document
228
+ if self.include_toc_document and toc_map:
229
+ toc_lines = ["# Table of Contents"]
230
+ for href, title in toc_map.items():
231
+ toc_lines.append(f"- {title} (Link: {href})")
232
+ toc_content = "\n".join(toc_lines)
233
+ toc_meta = self.create_metadata(
234
+ path=path,
235
+ doctype="epub",
236
+ source_type="epub_toc",
237
+ doc_metadata={
238
+ "content_type": "toc",
239
+ "entries": len(toc_map)
240
+ },
241
+ )
242
+ docs.append(self.create_document(toc_content, path, toc_meta))
243
+
244
+ # Collect per-chapter or full text
245
+ all_sections = []
246
+ for order_idx, item in self._iter_document_items(book):
247
+ try:
248
+ html = item.get_content().decode("utf-8", errors="ignore")
249
+ except Exception:
250
+ continue
251
+
252
+ content = self._html_to_markdown(html)
253
+
254
+ if len(content) < self.min_section_length:
255
+ # skip boilerplate/empty stubs
256
+ continue
257
+
258
+ # Derive title from TOC → HTML <title> → filename
259
+ href = getattr(item, "file_name", "") or ""
260
+ title = toc_map.get(href) or self._derive_title_from_html(html) or PurePath(href).name or f"Section {order_idx+1}"
261
+
262
+ # Track for full-book option
263
+ all_sections.append((order_idx, title, content, href))
264
+
265
+ # Per-chapter Document
266
+ if self.per_chapter:
267
+ section_meta = self.create_metadata(
268
+ path=path,
269
+ doctype="epub",
270
+ source_type="epub_section",
271
+ doc_metadata={
272
+ "section_order": order_idx + 1,
273
+ "section_title": title,
274
+ "href": href,
275
+ "content_type": "chapter",
276
+ "output_format": "markdown" if self.as_markdown else "text",
277
+ "min_section_length": self.min_section_length
278
+ },
279
+ )
280
+
281
+ # Prepend a lightweight context header (like your PPT/PDF style)
282
+ context = [
283
+ f"File Name: {path.name if hasattr(path, 'name') else str(path)}",
284
+ f"Section: {order_idx + 1}",
285
+ f"Title: {title}",
286
+ f"Document Type: epub",
287
+ f"Source Type: ebook",
288
+ ]
289
+ full_content = "\n".join(context) + "\n======\n\n" + content
290
+
291
+ docs.append(self.create_document(full_content, path, section_meta))
292
+
293
+ if not all_sections:
294
+ self.logger.warning(f"No textual sections extracted from {path}")
295
+ return docs
296
+
297
+ # Full-book Document (if requested)
298
+ if not self.per_chapter:
299
+ merged = []
300
+ for order_idx, title, content, href in all_sections:
301
+ merged.append(f"# {title}\n\n{content}\n")
302
+ book_text = "\n\n".join(merged).strip()
303
+
304
+ full_meta = self.create_metadata(
305
+ path=path,
306
+ doctype="epub",
307
+ source_type="epub_full",
308
+ doc_metadata={
309
+ "sections": len(all_sections),
310
+ "content_type": "full_document",
311
+ "output_format": "markdown" if self.as_markdown else "text",
312
+ },
313
+ )
314
+ docs.append(self.create_document(book_text, path, full_meta))
315
+
316
+ return docs
@@ -0,0 +1,199 @@
1
+ # parrot/loaders/excel.py
2
+ from typing import List, Optional, Union, Literal, Dict
3
+ from pathlib import PurePath
4
+ from collections.abc import Callable
5
+ import pandas as pd
6
+ from navigator.libs.json import JSONContent
7
+ from ..stores.models import Document
8
+ from .abstract import AbstractLoader
9
+
10
+
11
+ class ExcelLoader(AbstractLoader):
12
+ """
13
+ Excel loader that converts an Excel workbook (or DataFrame) into per-row Documents.
14
+
15
+ - One Document per row per sheet (rows with all-empty values are skipped).
16
+ - Works for .xlsx / .xlsm / .xls files (pandas engine auto-detects).
17
+ - Also accepts a pandas.DataFrame (sheet='DataFrame').
18
+ - Output formats: markdown (default), plain, or json.
19
+ """
20
+
21
+ extensions: List[str] = ['.xlsx', '.xlsm', '.xls']
22
+
23
+ def __init__(
24
+ self,
25
+ source: Optional[Union[str, PurePath, List[PurePath]]] = None,
26
+ *,
27
+ tokenizer: Union[str, Callable] = None,
28
+ text_splitter: Union[str, Callable] = None,
29
+ source_type: str = 'file',
30
+
31
+ sheets: Optional[Union[str, int, List[Union[str, int]]]] = None,
32
+ header: Union[int, List[int], None] = 0,
33
+ usecols: Optional[Union[str, List[Union[int, str]]]] = None,
34
+ drop_empty_rows: bool = True,
35
+ max_rows: Optional[int] = None,
36
+ date_format: str = "%Y-%m-%d",
37
+ output_format: Literal["markdown", "plain", "json"] = "markdown",
38
+ min_row_length: int = 1, # skip rows with < N non-empty fields
39
+ title_column: Optional[str] = None,
40
+
41
+ **kwargs
42
+ ):
43
+ super().__init__(
44
+ source,
45
+ tokenizer=tokenizer,
46
+ text_splitter=text_splitter,
47
+ source_type=source_type,
48
+ **kwargs
49
+ )
50
+ self.doctype = 'excel'
51
+ self._source_type = source_type
52
+ self.sheets = sheets
53
+ self.header = header
54
+ self.usecols = usecols
55
+ self.drop_empty_rows = drop_empty_rows
56
+ self.max_rows = max_rows
57
+ self.date_format = date_format
58
+ self.output_format = output_format
59
+ self.min_row_length = int(min_row_length)
60
+ self.title_column = title_column
61
+
62
+ def _stringify(self, v):
63
+ if pd.isna(v):
64
+ return ""
65
+ if isinstance(v, (pd.Timestamp, )):
66
+ return v.strftime(self.date_format)
67
+ return str(v)
68
+
69
+ def _row_to_text(self, row: Dict[str, object]) -> str:
70
+ """Render a single row dict to text in the chosen output_format."""
71
+ if self.output_format == "json":
72
+ return JSONContent.dumps(row, indent=2)
73
+
74
+ items = [(k, self._stringify(v)) for k, v in row.items()]
75
+ if self.output_format == "plain":
76
+ # key: value per line
77
+ return "\n".join(f"{k}: {v}" for k, v in items if v != "")
78
+
79
+ # markdown: list of **key**: value
80
+ return "\n".join(f"- **{k}**: {v}" for k, v in items if v != "")
81
+
82
+ def _row_nonempty_count(self, row: Dict[str, object]) -> int:
83
+ return sum(1 for v in row.values() if (not pd.isna(v)) and str(v).strip() != "")
84
+
85
+ async def _load(self, source: Union[PurePath, str, pd.DataFrame], **kwargs) -> List[Document]:
86
+ """
87
+ Load an Excel file (or DataFrame) and return per-row Documents.
88
+ """
89
+ docs: List[Document] = []
90
+
91
+ # Case A: already a DataFrame (from AbstractLoader.from_dataframe)
92
+ # (sheet name is synthetic: "DataFrame")
93
+ if isinstance(source, pd.DataFrame):
94
+ sheet_name = "DataFrame"
95
+ docs.extend(await self._docs_from_dataframe(source, sheet_name, path_hint="dataframe"))
96
+ return docs
97
+
98
+ # Case B: excel path
99
+ path = PurePath(source) if not isinstance(source, PurePath) else source
100
+ self.logger.info(f"Loading Excel file: {path}")
101
+
102
+ # Read one or multiple sheets
103
+ try:
104
+ # If sheets=None -> pd returns dict of DataFrames (all sheets)
105
+ # If sheets is a single name/index -> returns a DataFrame
106
+ xls = pd.read_excel(
107
+ str(path),
108
+ sheet_name=self.sheets if self.sheets is not None else None,
109
+ header=self.header,
110
+ usecols=self.usecols,
111
+ dtype=object # keep as objects → stringify ourselves
112
+ )
113
+ except Exception as e:
114
+ self.logger.error(f"Failed to read Excel {path}: {e}")
115
+ return docs
116
+
117
+ # Normalize to dict[str, DataFrame]
118
+ if isinstance(xls, pd.DataFrame):
119
+ frames = {"Sheet1" if self.sheets is None else str(self.sheets): xls}
120
+ else:
121
+ # dict of {sheet_name: df}
122
+ frames = {str(k): v for k, v in xls.items()}
123
+
124
+ for sheet_name, df in frames.items():
125
+ # Drop fully empty rows
126
+ if self.drop_empty_rows:
127
+ df = df.dropna(how="all")
128
+
129
+ if self.max_rows is not None:
130
+ df = df.head(self.max_rows)
131
+
132
+ if df.empty:
133
+ self.logger.info(f"Sheet '{sheet_name}' is empty; skipping.")
134
+ continue
135
+
136
+ # Ensure columns are strings
137
+ df.columns = [str(c) for c in df.columns]
138
+ docs.extend(await self._docs_from_dataframe(df, sheet_name, path_hint=path))
139
+
140
+ return docs
141
+
142
+ async def _docs_from_dataframe(
143
+ self,
144
+ df: pd.DataFrame,
145
+ sheet_name: str,
146
+ path_hint: Union[str, PurePath]
147
+ ) -> List[Document]:
148
+ """Convert a DataFrame into per-row Documents."""
149
+ docs: List[Document] = []
150
+
151
+ # Convert to records for easy iteration
152
+ records = df.to_dict(orient="records")
153
+
154
+ for i, row in enumerate(records, start=1):
155
+ if self.min_row_length > 1 and self._row_nonempty_count(row) < self.min_row_length:
156
+ continue
157
+
158
+ content_body = self._row_to_text(row)
159
+
160
+ # Context header (aligns with PDF/PPT style: header + "======")
161
+ title_val = None
162
+ if self.title_column and self.title_column in row:
163
+ title_val = self._stringify(row[self.title_column]).strip() or None
164
+
165
+ context = [
166
+ f"File Name: {path_hint.name if hasattr(path_hint, 'name') else str(path_hint)}",
167
+ f"Sheet: {sheet_name}",
168
+ f"Row: {i}",
169
+ f"Document Type: excel",
170
+ f"Source Type: {self._source_type}",
171
+ ]
172
+ if title_val:
173
+ context.append(f"Title: {title_val}")
174
+
175
+ full_content = "\n".join(context) + "\n======\n\n" + content_body
176
+
177
+ # Metadata
178
+ doc_meta = {
179
+ "filename": path_hint.name if hasattr(path_hint, 'name') else str(path_hint),
180
+ "file_path": str(path_hint),
181
+ "sheet": sheet_name,
182
+ "row_index": i,
183
+ "columns": list(df.columns),
184
+ "content_type": "row",
185
+ "output_format": self.output_format,
186
+ }
187
+
188
+ meta = self.create_metadata(
189
+ path=path_hint,
190
+ doctype="excel",
191
+ source_type="excel_row",
192
+ doc_metadata=doc_meta,
193
+ )
194
+
195
+ docs.append(
196
+ self.create_document(full_content, path_hint, meta)
197
+ )
198
+
199
+ return docs
@@ -0,0 +1,55 @@
1
+ ####
2
+ # Copyright 2023 Jesus Lara.
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ #
5
+ # Loaders.
6
+ # Open, extract and load data from different sources.
7
+ #####
8
+ from .pdf import PDFLoader
9
+ from .txt import TextLoader
10
+ from .docx import MSWordLoader
11
+ from .qa import QAFileLoader
12
+ from .html import HTMLLoader
13
+ from .pdfmark import PDFMarkdownLoader
14
+ from .pdftables import PDFTablesLoader
15
+ from .csv import CSVLoader
16
+ from .youtube import YoutubeLoader
17
+ from .web import WebLoader
18
+ from .ppt import PowerPointLoader
19
+ from .markdown import MarkdownLoader
20
+ from .epubloader import EpubLoader
21
+ from .excel import ExcelLoader
22
+ # from .video import VideoLoader
23
+ from .videolocal import VideoLocalLoader
24
+ from .videounderstanding import VideoUnderstandingLoader
25
+ # from .vimeo import VimeoLoader
26
+ from .audio import AudioLoader
27
+
28
+ AVAILABLE_LOADERS = {
29
+ '.pdf': PDFLoader,
30
+ '.txt': TextLoader,
31
+ '.docx': MSWordLoader,
32
+ '.qa': QAFileLoader,
33
+ '.xlsx': ExcelLoader,
34
+ '.xlsm': ExcelLoader,
35
+ '.xls': ExcelLoader,
36
+ '.html': HTMLLoader,
37
+ '.pdfmd': PDFMarkdownLoader,
38
+ '.pdftables': PDFTablesLoader,
39
+ '.csv': CSVLoader,
40
+ '.youtube': YoutubeLoader,
41
+ '.web': WebLoader,
42
+ '.ppt': PowerPointLoader,
43
+ '.pptx': PowerPointLoader,
44
+ '.md': MarkdownLoader,
45
+ '.json': MarkdownLoader,
46
+ '.xml': MarkdownLoader,
47
+ '.epub': EpubLoader,
48
+ '.mp3': AudioLoader,
49
+ '.wav': AudioLoader,
50
+ '.avi': VideoUnderstandingLoader,
51
+ '.mp4': VideoUnderstandingLoader,
52
+ '.webm': VideoUnderstandingLoader,
53
+ '.mov': VideoUnderstandingLoader,
54
+ '.mkv': VideoUnderstandingLoader,
55
+ }
File without changes
@@ -0,0 +1,39 @@
1
+ from typing import Optional, Any
2
+ from abc import ABC, abstractmethod
3
+ from navconfig.logging import logging
4
+
5
+
6
+ class FilePlugin(ABC):
7
+ """
8
+ FilePlugin is a base class for Open Files.
9
+ It provides a common interface for all opening all kind of iles.
10
+ Subclasses should implement the `open` method to define
11
+ the specific file processing logic.
12
+ """
13
+
14
+ def __init__(self, *args, **kwargs):
15
+ """
16
+ Initialize the ImagePlugin with an optional image path.
17
+
18
+ :param image: Path to the image file.
19
+ """
20
+ self.logger = logging.getLogger(
21
+ f'parrot.FileLoader.{self.__class__.__name__}'
22
+ )
23
+
24
+ async def __aenter__(self):
25
+ if hasattr(self, "open"):
26
+ await self.open()
27
+ return self
28
+
29
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
30
+ if hasattr(self, "close"):
31
+ await self.close()
32
+ return True
33
+
34
+ @abstractmethod
35
+ async def read(self):
36
+ """
37
+ Return the content of the file, need to be implemented in the subclass.
38
+ """
39
+ pass
@@ -0,0 +1,26 @@
1
+ from bs4 import BeautifulSoup
2
+ from .text import TextFile
3
+
4
+ class HTMLFile(TextFile):
5
+ """
6
+ A class to handle HTML files asynchronously.
7
+ """
8
+ async def read(self) -> str:
9
+ """
10
+ Asynchronously read the content of the html file.
11
+
12
+ Returns:
13
+ BeautifulSoup object of HTML File.
14
+ """
15
+ if self._file is None:
16
+ await self.open()
17
+
18
+ try:
19
+ content = await self._file.read()
20
+ soup = BeautifulSoup(content, 'html.parser')
21
+ return soup, content
22
+ except Exception as e:
23
+ self.logger.error(
24
+ f"Error reading HTML file {self.path}: {str(e)}"
25
+ )
26
+ raise