ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (535) hide show
  1. agentui/.prettierrc +15 -0
  2. agentui/QUICKSTART.md +272 -0
  3. agentui/README.md +59 -0
  4. agentui/env.example +16 -0
  5. agentui/jsconfig.json +14 -0
  6. agentui/package-lock.json +4242 -0
  7. agentui/package.json +34 -0
  8. agentui/scripts/postinstall/apply-patches.mjs +260 -0
  9. agentui/src/app.css +61 -0
  10. agentui/src/app.d.ts +13 -0
  11. agentui/src/app.html +12 -0
  12. agentui/src/components/LoadingSpinner.svelte +64 -0
  13. agentui/src/components/ThemeSwitcher.svelte +159 -0
  14. agentui/src/components/index.js +4 -0
  15. agentui/src/lib/api/bots.ts +60 -0
  16. agentui/src/lib/api/chat.ts +22 -0
  17. agentui/src/lib/api/http.ts +25 -0
  18. agentui/src/lib/components/BotCard.svelte +33 -0
  19. agentui/src/lib/components/ChatBubble.svelte +63 -0
  20. agentui/src/lib/components/Toast.svelte +21 -0
  21. agentui/src/lib/config.ts +20 -0
  22. agentui/src/lib/stores/auth.svelte.ts +73 -0
  23. agentui/src/lib/stores/theme.svelte.js +64 -0
  24. agentui/src/lib/stores/toast.svelte.ts +31 -0
  25. agentui/src/lib/utils/conversation.ts +39 -0
  26. agentui/src/routes/+layout.svelte +20 -0
  27. agentui/src/routes/+page.svelte +232 -0
  28. agentui/src/routes/login/+page.svelte +200 -0
  29. agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
  30. agentui/src/routes/talk/[agentId]/+page.ts +7 -0
  31. agentui/static/README.md +1 -0
  32. agentui/svelte.config.js +11 -0
  33. agentui/tailwind.config.ts +53 -0
  34. agentui/tsconfig.json +3 -0
  35. agentui/vite.config.ts +10 -0
  36. ai_parrot-0.17.2.dist-info/METADATA +472 -0
  37. ai_parrot-0.17.2.dist-info/RECORD +535 -0
  38. ai_parrot-0.17.2.dist-info/WHEEL +6 -0
  39. ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
  40. ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
  41. ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
  42. crew-builder/.prettierrc +15 -0
  43. crew-builder/QUICKSTART.md +259 -0
  44. crew-builder/README.md +113 -0
  45. crew-builder/env.example +17 -0
  46. crew-builder/jsconfig.json +14 -0
  47. crew-builder/package-lock.json +4182 -0
  48. crew-builder/package.json +37 -0
  49. crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
  50. crew-builder/src/app.css +62 -0
  51. crew-builder/src/app.d.ts +13 -0
  52. crew-builder/src/app.html +12 -0
  53. crew-builder/src/components/LoadingSpinner.svelte +64 -0
  54. crew-builder/src/components/ThemeSwitcher.svelte +149 -0
  55. crew-builder/src/components/index.js +9 -0
  56. crew-builder/src/lib/api/bots.ts +60 -0
  57. crew-builder/src/lib/api/chat.ts +80 -0
  58. crew-builder/src/lib/api/client.ts +56 -0
  59. crew-builder/src/lib/api/crew/crew.ts +136 -0
  60. crew-builder/src/lib/api/index.ts +5 -0
  61. crew-builder/src/lib/api/o365/auth.ts +65 -0
  62. crew-builder/src/lib/auth/auth.ts +54 -0
  63. crew-builder/src/lib/components/AgentNode.svelte +43 -0
  64. crew-builder/src/lib/components/BotCard.svelte +33 -0
  65. crew-builder/src/lib/components/ChatBubble.svelte +67 -0
  66. crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
  67. crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
  68. crew-builder/src/lib/components/JsonViewer.svelte +24 -0
  69. crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
  70. crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
  71. crew-builder/src/lib/components/Toast.svelte +67 -0
  72. crew-builder/src/lib/components/Toolbar.svelte +157 -0
  73. crew-builder/src/lib/components/index.ts +10 -0
  74. crew-builder/src/lib/config.ts +8 -0
  75. crew-builder/src/lib/stores/auth.svelte.ts +228 -0
  76. crew-builder/src/lib/stores/crewStore.ts +369 -0
  77. crew-builder/src/lib/stores/theme.svelte.js +145 -0
  78. crew-builder/src/lib/stores/toast.svelte.ts +69 -0
  79. crew-builder/src/lib/utils/conversation.ts +39 -0
  80. crew-builder/src/lib/utils/markdown.ts +122 -0
  81. crew-builder/src/lib/utils/talkHistory.ts +47 -0
  82. crew-builder/src/routes/+layout.svelte +20 -0
  83. crew-builder/src/routes/+page.svelte +539 -0
  84. crew-builder/src/routes/agents/+page.svelte +247 -0
  85. crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
  86. crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
  87. crew-builder/src/routes/builder/+page.svelte +204 -0
  88. crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
  89. crew-builder/src/routes/crew/ask/+page.ts +1 -0
  90. crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
  91. crew-builder/src/routes/login/+page.svelte +197 -0
  92. crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
  93. crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
  94. crew-builder/static/README.md +1 -0
  95. crew-builder/svelte.config.js +11 -0
  96. crew-builder/tailwind.config.ts +53 -0
  97. crew-builder/tsconfig.json +3 -0
  98. crew-builder/vite.config.ts +10 -0
  99. mcp_servers/calculator_server.py +309 -0
  100. parrot/__init__.py +27 -0
  101. parrot/__pycache__/__init__.cpython-310.pyc +0 -0
  102. parrot/__pycache__/version.cpython-310.pyc +0 -0
  103. parrot/_version.py +34 -0
  104. parrot/a2a/__init__.py +48 -0
  105. parrot/a2a/client.py +658 -0
  106. parrot/a2a/discovery.py +89 -0
  107. parrot/a2a/mixin.py +257 -0
  108. parrot/a2a/models.py +376 -0
  109. parrot/a2a/server.py +770 -0
  110. parrot/agents/__init__.py +29 -0
  111. parrot/bots/__init__.py +12 -0
  112. parrot/bots/a2a_agent.py +19 -0
  113. parrot/bots/abstract.py +3139 -0
  114. parrot/bots/agent.py +1129 -0
  115. parrot/bots/basic.py +9 -0
  116. parrot/bots/chatbot.py +669 -0
  117. parrot/bots/data.py +1618 -0
  118. parrot/bots/database/__init__.py +5 -0
  119. parrot/bots/database/abstract.py +3071 -0
  120. parrot/bots/database/cache.py +286 -0
  121. parrot/bots/database/models.py +468 -0
  122. parrot/bots/database/prompts.py +154 -0
  123. parrot/bots/database/retries.py +98 -0
  124. parrot/bots/database/router.py +269 -0
  125. parrot/bots/database/sql.py +41 -0
  126. parrot/bots/db/__init__.py +6 -0
  127. parrot/bots/db/abstract.py +556 -0
  128. parrot/bots/db/bigquery.py +602 -0
  129. parrot/bots/db/cache.py +85 -0
  130. parrot/bots/db/documentdb.py +668 -0
  131. parrot/bots/db/elastic.py +1014 -0
  132. parrot/bots/db/influx.py +898 -0
  133. parrot/bots/db/mock.py +96 -0
  134. parrot/bots/db/multi.py +783 -0
  135. parrot/bots/db/prompts.py +185 -0
  136. parrot/bots/db/sql.py +1255 -0
  137. parrot/bots/db/tools.py +212 -0
  138. parrot/bots/document.py +680 -0
  139. parrot/bots/hrbot.py +15 -0
  140. parrot/bots/kb.py +170 -0
  141. parrot/bots/mcp.py +36 -0
  142. parrot/bots/orchestration/README.md +463 -0
  143. parrot/bots/orchestration/__init__.py +1 -0
  144. parrot/bots/orchestration/agent.py +155 -0
  145. parrot/bots/orchestration/crew.py +3330 -0
  146. parrot/bots/orchestration/fsm.py +1179 -0
  147. parrot/bots/orchestration/hr.py +434 -0
  148. parrot/bots/orchestration/storage/__init__.py +4 -0
  149. parrot/bots/orchestration/storage/memory.py +100 -0
  150. parrot/bots/orchestration/storage/mixin.py +119 -0
  151. parrot/bots/orchestration/verify.py +202 -0
  152. parrot/bots/product.py +204 -0
  153. parrot/bots/prompts/__init__.py +96 -0
  154. parrot/bots/prompts/agents.py +155 -0
  155. parrot/bots/prompts/data.py +216 -0
  156. parrot/bots/prompts/output_generation.py +8 -0
  157. parrot/bots/scraper/__init__.py +3 -0
  158. parrot/bots/scraper/models.py +122 -0
  159. parrot/bots/scraper/scraper.py +1173 -0
  160. parrot/bots/scraper/templates.py +115 -0
  161. parrot/bots/stores/__init__.py +5 -0
  162. parrot/bots/stores/local.py +172 -0
  163. parrot/bots/webdev.py +81 -0
  164. parrot/cli.py +17 -0
  165. parrot/clients/__init__.py +16 -0
  166. parrot/clients/base.py +1491 -0
  167. parrot/clients/claude.py +1191 -0
  168. parrot/clients/factory.py +129 -0
  169. parrot/clients/google.py +4567 -0
  170. parrot/clients/gpt.py +1975 -0
  171. parrot/clients/grok.py +432 -0
  172. parrot/clients/groq.py +986 -0
  173. parrot/clients/hf.py +582 -0
  174. parrot/clients/models.py +18 -0
  175. parrot/conf.py +395 -0
  176. parrot/embeddings/__init__.py +9 -0
  177. parrot/embeddings/base.py +157 -0
  178. parrot/embeddings/google.py +98 -0
  179. parrot/embeddings/huggingface.py +74 -0
  180. parrot/embeddings/openai.py +84 -0
  181. parrot/embeddings/processor.py +88 -0
  182. parrot/exceptions.c +13868 -0
  183. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  184. parrot/exceptions.pxd +22 -0
  185. parrot/exceptions.pxi +15 -0
  186. parrot/exceptions.pyx +44 -0
  187. parrot/generators/__init__.py +29 -0
  188. parrot/generators/base.py +200 -0
  189. parrot/generators/html.py +293 -0
  190. parrot/generators/react.py +205 -0
  191. parrot/generators/streamlit.py +203 -0
  192. parrot/generators/template.py +105 -0
  193. parrot/handlers/__init__.py +4 -0
  194. parrot/handlers/agent.py +861 -0
  195. parrot/handlers/agents/__init__.py +1 -0
  196. parrot/handlers/agents/abstract.py +900 -0
  197. parrot/handlers/bots.py +338 -0
  198. parrot/handlers/chat.py +915 -0
  199. parrot/handlers/creation.sql +192 -0
  200. parrot/handlers/crew/ARCHITECTURE.md +362 -0
  201. parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
  202. parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
  203. parrot/handlers/crew/__init__.py +0 -0
  204. parrot/handlers/crew/handler.py +801 -0
  205. parrot/handlers/crew/models.py +229 -0
  206. parrot/handlers/crew/redis_persistence.py +523 -0
  207. parrot/handlers/jobs/__init__.py +10 -0
  208. parrot/handlers/jobs/job.py +384 -0
  209. parrot/handlers/jobs/mixin.py +627 -0
  210. parrot/handlers/jobs/models.py +115 -0
  211. parrot/handlers/jobs/worker.py +31 -0
  212. parrot/handlers/models.py +596 -0
  213. parrot/handlers/o365_auth.py +105 -0
  214. parrot/handlers/stream.py +337 -0
  215. parrot/interfaces/__init__.py +6 -0
  216. parrot/interfaces/aws.py +143 -0
  217. parrot/interfaces/credentials.py +113 -0
  218. parrot/interfaces/database.py +27 -0
  219. parrot/interfaces/google.py +1123 -0
  220. parrot/interfaces/hierarchy.py +1227 -0
  221. parrot/interfaces/http.py +651 -0
  222. parrot/interfaces/images/__init__.py +0 -0
  223. parrot/interfaces/images/plugins/__init__.py +24 -0
  224. parrot/interfaces/images/plugins/abstract.py +58 -0
  225. parrot/interfaces/images/plugins/analisys.py +148 -0
  226. parrot/interfaces/images/plugins/classify.py +150 -0
  227. parrot/interfaces/images/plugins/classifybase.py +182 -0
  228. parrot/interfaces/images/plugins/detect.py +150 -0
  229. parrot/interfaces/images/plugins/exif.py +1103 -0
  230. parrot/interfaces/images/plugins/hash.py +52 -0
  231. parrot/interfaces/images/plugins/vision.py +104 -0
  232. parrot/interfaces/images/plugins/yolo.py +66 -0
  233. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  234. parrot/interfaces/o365.py +978 -0
  235. parrot/interfaces/onedrive.py +822 -0
  236. parrot/interfaces/sharepoint.py +1435 -0
  237. parrot/interfaces/soap.py +257 -0
  238. parrot/loaders/__init__.py +8 -0
  239. parrot/loaders/abstract.py +1131 -0
  240. parrot/loaders/audio.py +199 -0
  241. parrot/loaders/basepdf.py +53 -0
  242. parrot/loaders/basevideo.py +1568 -0
  243. parrot/loaders/csv.py +409 -0
  244. parrot/loaders/docx.py +116 -0
  245. parrot/loaders/epubloader.py +316 -0
  246. parrot/loaders/excel.py +199 -0
  247. parrot/loaders/factory.py +55 -0
  248. parrot/loaders/files/__init__.py +0 -0
  249. parrot/loaders/files/abstract.py +39 -0
  250. parrot/loaders/files/html.py +26 -0
  251. parrot/loaders/files/text.py +63 -0
  252. parrot/loaders/html.py +152 -0
  253. parrot/loaders/markdown.py +442 -0
  254. parrot/loaders/pdf.py +373 -0
  255. parrot/loaders/pdfmark.py +320 -0
  256. parrot/loaders/pdftables.py +506 -0
  257. parrot/loaders/ppt.py +476 -0
  258. parrot/loaders/qa.py +63 -0
  259. parrot/loaders/splitters/__init__.py +10 -0
  260. parrot/loaders/splitters/base.py +138 -0
  261. parrot/loaders/splitters/md.py +228 -0
  262. parrot/loaders/splitters/token.py +143 -0
  263. parrot/loaders/txt.py +26 -0
  264. parrot/loaders/video.py +89 -0
  265. parrot/loaders/videolocal.py +218 -0
  266. parrot/loaders/videounderstanding.py +377 -0
  267. parrot/loaders/vimeo.py +167 -0
  268. parrot/loaders/web.py +599 -0
  269. parrot/loaders/youtube.py +504 -0
  270. parrot/manager/__init__.py +5 -0
  271. parrot/manager/manager.py +1030 -0
  272. parrot/mcp/__init__.py +28 -0
  273. parrot/mcp/adapter.py +105 -0
  274. parrot/mcp/cli.py +174 -0
  275. parrot/mcp/client.py +119 -0
  276. parrot/mcp/config.py +75 -0
  277. parrot/mcp/integration.py +842 -0
  278. parrot/mcp/oauth.py +933 -0
  279. parrot/mcp/server.py +225 -0
  280. parrot/mcp/transports/__init__.py +3 -0
  281. parrot/mcp/transports/base.py +279 -0
  282. parrot/mcp/transports/grpc_session.py +163 -0
  283. parrot/mcp/transports/http.py +312 -0
  284. parrot/mcp/transports/mcp.proto +108 -0
  285. parrot/mcp/transports/quic.py +1082 -0
  286. parrot/mcp/transports/sse.py +330 -0
  287. parrot/mcp/transports/stdio.py +309 -0
  288. parrot/mcp/transports/unix.py +395 -0
  289. parrot/mcp/transports/websocket.py +547 -0
  290. parrot/memory/__init__.py +16 -0
  291. parrot/memory/abstract.py +209 -0
  292. parrot/memory/agent.py +32 -0
  293. parrot/memory/cache.py +175 -0
  294. parrot/memory/core.py +555 -0
  295. parrot/memory/file.py +153 -0
  296. parrot/memory/mem.py +131 -0
  297. parrot/memory/redis.py +613 -0
  298. parrot/models/__init__.py +46 -0
  299. parrot/models/basic.py +118 -0
  300. parrot/models/compliance.py +208 -0
  301. parrot/models/crew.py +395 -0
  302. parrot/models/detections.py +654 -0
  303. parrot/models/generation.py +85 -0
  304. parrot/models/google.py +223 -0
  305. parrot/models/groq.py +23 -0
  306. parrot/models/openai.py +30 -0
  307. parrot/models/outputs.py +285 -0
  308. parrot/models/responses.py +938 -0
  309. parrot/notifications/__init__.py +743 -0
  310. parrot/openapi/__init__.py +3 -0
  311. parrot/openapi/components.yaml +641 -0
  312. parrot/openapi/config.py +322 -0
  313. parrot/outputs/__init__.py +32 -0
  314. parrot/outputs/formats/__init__.py +108 -0
  315. parrot/outputs/formats/altair.py +359 -0
  316. parrot/outputs/formats/application.py +122 -0
  317. parrot/outputs/formats/base.py +351 -0
  318. parrot/outputs/formats/bokeh.py +356 -0
  319. parrot/outputs/formats/card.py +424 -0
  320. parrot/outputs/formats/chart.py +436 -0
  321. parrot/outputs/formats/d3.py +255 -0
  322. parrot/outputs/formats/echarts.py +310 -0
  323. parrot/outputs/formats/generators/__init__.py +0 -0
  324. parrot/outputs/formats/generators/abstract.py +61 -0
  325. parrot/outputs/formats/generators/panel.py +145 -0
  326. parrot/outputs/formats/generators/streamlit.py +86 -0
  327. parrot/outputs/formats/generators/terminal.py +63 -0
  328. parrot/outputs/formats/holoviews.py +310 -0
  329. parrot/outputs/formats/html.py +147 -0
  330. parrot/outputs/formats/jinja2.py +46 -0
  331. parrot/outputs/formats/json.py +87 -0
  332. parrot/outputs/formats/map.py +933 -0
  333. parrot/outputs/formats/markdown.py +172 -0
  334. parrot/outputs/formats/matplotlib.py +237 -0
  335. parrot/outputs/formats/mixins/__init__.py +0 -0
  336. parrot/outputs/formats/mixins/emaps.py +855 -0
  337. parrot/outputs/formats/plotly.py +341 -0
  338. parrot/outputs/formats/seaborn.py +310 -0
  339. parrot/outputs/formats/table.py +397 -0
  340. parrot/outputs/formats/template_report.py +138 -0
  341. parrot/outputs/formats/yaml.py +125 -0
  342. parrot/outputs/formatter.py +152 -0
  343. parrot/outputs/templates/__init__.py +95 -0
  344. parrot/pipelines/__init__.py +0 -0
  345. parrot/pipelines/abstract.py +210 -0
  346. parrot/pipelines/detector.py +124 -0
  347. parrot/pipelines/models.py +90 -0
  348. parrot/pipelines/planogram.py +3002 -0
  349. parrot/pipelines/table.sql +97 -0
  350. parrot/plugins/__init__.py +106 -0
  351. parrot/plugins/importer.py +80 -0
  352. parrot/py.typed +0 -0
  353. parrot/registry/__init__.py +18 -0
  354. parrot/registry/registry.py +594 -0
  355. parrot/scheduler/__init__.py +1189 -0
  356. parrot/scheduler/models.py +60 -0
  357. parrot/security/__init__.py +16 -0
  358. parrot/security/prompt_injection.py +268 -0
  359. parrot/security/security_events.sql +25 -0
  360. parrot/services/__init__.py +1 -0
  361. parrot/services/mcp/__init__.py +8 -0
  362. parrot/services/mcp/config.py +13 -0
  363. parrot/services/mcp/server.py +295 -0
  364. parrot/services/o365_remote_auth.py +235 -0
  365. parrot/stores/__init__.py +7 -0
  366. parrot/stores/abstract.py +352 -0
  367. parrot/stores/arango.py +1090 -0
  368. parrot/stores/bigquery.py +1377 -0
  369. parrot/stores/cache.py +106 -0
  370. parrot/stores/empty.py +10 -0
  371. parrot/stores/faiss_store.py +1157 -0
  372. parrot/stores/kb/__init__.py +9 -0
  373. parrot/stores/kb/abstract.py +68 -0
  374. parrot/stores/kb/cache.py +165 -0
  375. parrot/stores/kb/doc.py +325 -0
  376. parrot/stores/kb/hierarchy.py +346 -0
  377. parrot/stores/kb/local.py +457 -0
  378. parrot/stores/kb/prompt.py +28 -0
  379. parrot/stores/kb/redis.py +659 -0
  380. parrot/stores/kb/store.py +115 -0
  381. parrot/stores/kb/user.py +374 -0
  382. parrot/stores/models.py +59 -0
  383. parrot/stores/pgvector.py +3 -0
  384. parrot/stores/postgres.py +2853 -0
  385. parrot/stores/utils/__init__.py +0 -0
  386. parrot/stores/utils/chunking.py +197 -0
  387. parrot/telemetry/__init__.py +3 -0
  388. parrot/telemetry/mixin.py +111 -0
  389. parrot/template/__init__.py +3 -0
  390. parrot/template/engine.py +259 -0
  391. parrot/tools/__init__.py +23 -0
  392. parrot/tools/abstract.py +644 -0
  393. parrot/tools/agent.py +363 -0
  394. parrot/tools/arangodbsearch.py +537 -0
  395. parrot/tools/arxiv_tool.py +188 -0
  396. parrot/tools/calculator/__init__.py +3 -0
  397. parrot/tools/calculator/operations/__init__.py +38 -0
  398. parrot/tools/calculator/operations/calculus.py +80 -0
  399. parrot/tools/calculator/operations/statistics.py +76 -0
  400. parrot/tools/calculator/tool.py +150 -0
  401. parrot/tools/cloudwatch.py +988 -0
  402. parrot/tools/codeinterpreter/__init__.py +127 -0
  403. parrot/tools/codeinterpreter/executor.py +371 -0
  404. parrot/tools/codeinterpreter/internals.py +473 -0
  405. parrot/tools/codeinterpreter/models.py +643 -0
  406. parrot/tools/codeinterpreter/prompts.py +224 -0
  407. parrot/tools/codeinterpreter/tool.py +664 -0
  408. parrot/tools/company_info/__init__.py +6 -0
  409. parrot/tools/company_info/tool.py +1138 -0
  410. parrot/tools/correlationanalysis.py +437 -0
  411. parrot/tools/database/abstract.py +286 -0
  412. parrot/tools/database/bq.py +115 -0
  413. parrot/tools/database/cache.py +284 -0
  414. parrot/tools/database/models.py +95 -0
  415. parrot/tools/database/pg.py +343 -0
  416. parrot/tools/databasequery.py +1159 -0
  417. parrot/tools/db.py +1800 -0
  418. parrot/tools/ddgo.py +370 -0
  419. parrot/tools/decorators.py +271 -0
  420. parrot/tools/dftohtml.py +282 -0
  421. parrot/tools/document.py +549 -0
  422. parrot/tools/ecs.py +819 -0
  423. parrot/tools/edareport.py +368 -0
  424. parrot/tools/elasticsearch.py +1049 -0
  425. parrot/tools/employees.py +462 -0
  426. parrot/tools/epson/__init__.py +96 -0
  427. parrot/tools/excel.py +683 -0
  428. parrot/tools/file/__init__.py +13 -0
  429. parrot/tools/file/abstract.py +76 -0
  430. parrot/tools/file/gcs.py +378 -0
  431. parrot/tools/file/local.py +284 -0
  432. parrot/tools/file/s3.py +511 -0
  433. parrot/tools/file/tmp.py +309 -0
  434. parrot/tools/file/tool.py +501 -0
  435. parrot/tools/file_reader.py +129 -0
  436. parrot/tools/flowtask/__init__.py +19 -0
  437. parrot/tools/flowtask/tool.py +761 -0
  438. parrot/tools/gittoolkit.py +508 -0
  439. parrot/tools/google/__init__.py +18 -0
  440. parrot/tools/google/base.py +169 -0
  441. parrot/tools/google/tools.py +1251 -0
  442. parrot/tools/googlelocation.py +5 -0
  443. parrot/tools/googleroutes.py +5 -0
  444. parrot/tools/googlesearch.py +5 -0
  445. parrot/tools/googlesitesearch.py +5 -0
  446. parrot/tools/googlevoice.py +2 -0
  447. parrot/tools/gvoice.py +695 -0
  448. parrot/tools/ibisworld/README.md +225 -0
  449. parrot/tools/ibisworld/__init__.py +11 -0
  450. parrot/tools/ibisworld/tool.py +366 -0
  451. parrot/tools/jiratoolkit.py +1718 -0
  452. parrot/tools/manager.py +1098 -0
  453. parrot/tools/math.py +152 -0
  454. parrot/tools/metadata.py +476 -0
  455. parrot/tools/msteams.py +1621 -0
  456. parrot/tools/msword.py +635 -0
  457. parrot/tools/multidb.py +580 -0
  458. parrot/tools/multistoresearch.py +369 -0
  459. parrot/tools/networkninja.py +167 -0
  460. parrot/tools/nextstop/__init__.py +4 -0
  461. parrot/tools/nextstop/base.py +286 -0
  462. parrot/tools/nextstop/employee.py +733 -0
  463. parrot/tools/nextstop/store.py +462 -0
  464. parrot/tools/notification.py +435 -0
  465. parrot/tools/o365/__init__.py +42 -0
  466. parrot/tools/o365/base.py +295 -0
  467. parrot/tools/o365/bundle.py +522 -0
  468. parrot/tools/o365/events.py +554 -0
  469. parrot/tools/o365/mail.py +992 -0
  470. parrot/tools/o365/onedrive.py +497 -0
  471. parrot/tools/o365/sharepoint.py +641 -0
  472. parrot/tools/openapi_toolkit.py +904 -0
  473. parrot/tools/openweather.py +527 -0
  474. parrot/tools/pdfprint.py +1001 -0
  475. parrot/tools/powerbi.py +518 -0
  476. parrot/tools/powerpoint.py +1113 -0
  477. parrot/tools/pricestool.py +146 -0
  478. parrot/tools/products/__init__.py +246 -0
  479. parrot/tools/prophet_tool.py +171 -0
  480. parrot/tools/pythonpandas.py +630 -0
  481. parrot/tools/pythonrepl.py +910 -0
  482. parrot/tools/qsource.py +436 -0
  483. parrot/tools/querytoolkit.py +395 -0
  484. parrot/tools/quickeda.py +827 -0
  485. parrot/tools/resttool.py +553 -0
  486. parrot/tools/retail/__init__.py +0 -0
  487. parrot/tools/retail/bby.py +528 -0
  488. parrot/tools/sandboxtool.py +703 -0
  489. parrot/tools/sassie/__init__.py +352 -0
  490. parrot/tools/scraping/__init__.py +7 -0
  491. parrot/tools/scraping/docs/select.md +466 -0
  492. parrot/tools/scraping/documentation.md +1278 -0
  493. parrot/tools/scraping/driver.py +436 -0
  494. parrot/tools/scraping/models.py +576 -0
  495. parrot/tools/scraping/options.py +85 -0
  496. parrot/tools/scraping/orchestrator.py +517 -0
  497. parrot/tools/scraping/readme.md +740 -0
  498. parrot/tools/scraping/tool.py +3115 -0
  499. parrot/tools/seasonaldetection.py +642 -0
  500. parrot/tools/shell_tool/__init__.py +5 -0
  501. parrot/tools/shell_tool/actions.py +408 -0
  502. parrot/tools/shell_tool/engine.py +155 -0
  503. parrot/tools/shell_tool/models.py +322 -0
  504. parrot/tools/shell_tool/tool.py +442 -0
  505. parrot/tools/site_search.py +214 -0
  506. parrot/tools/textfile.py +418 -0
  507. parrot/tools/think.py +378 -0
  508. parrot/tools/toolkit.py +298 -0
  509. parrot/tools/webapp_tool.py +187 -0
  510. parrot/tools/whatif.py +1279 -0
  511. parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
  512. parrot/tools/workday/__init__.py +6 -0
  513. parrot/tools/workday/models.py +1389 -0
  514. parrot/tools/workday/tool.py +1293 -0
  515. parrot/tools/yfinance_tool.py +306 -0
  516. parrot/tools/zipcode.py +217 -0
  517. parrot/utils/__init__.py +2 -0
  518. parrot/utils/helpers.py +73 -0
  519. parrot/utils/parsers/__init__.py +5 -0
  520. parrot/utils/parsers/toml.c +12078 -0
  521. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  522. parrot/utils/parsers/toml.pyx +21 -0
  523. parrot/utils/toml.py +11 -0
  524. parrot/utils/types.cpp +20936 -0
  525. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  526. parrot/utils/types.pyx +213 -0
  527. parrot/utils/uv.py +11 -0
  528. parrot/version.py +10 -0
  529. parrot/yaml-rs/Cargo.lock +350 -0
  530. parrot/yaml-rs/Cargo.toml +19 -0
  531. parrot/yaml-rs/pyproject.toml +19 -0
  532. parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
  533. parrot/yaml-rs/src/lib.rs +222 -0
  534. requirements/docker-compose.yml +24 -0
  535. requirements/requirements-dev.txt +21 -0
@@ -0,0 +1,517 @@
1
+ """
2
+ ScrapingOrchestrator for AI-Parrot
3
+ Complete integration layer that coordinates LLM-directed web scraping
4
+ """
5
+ from typing import Dict, List, Any, Optional, Union, Callable
6
+ import asyncio
7
+ import json
8
+ import logging
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ from ...bots.scraper import ScrapingAgent
13
+ from .tool import WebScrapingTool
14
+ from .models import ScrapingStep, ScrapingSelector, ScrapingResult
15
+ from ...stores.kb import KnowledgeBaseStore
16
+ from ...loaders.text import TextLoader
17
+ from ...models.responses import AgentResponse
18
+
19
+
20
+ class ScrapingOrchestrator:
21
+ """
22
+ High-level orchestrator that manages the complete LLM-directed scraping workflow.
23
+
24
+ This class integrates with AI-parrot's existing infrastructure:
25
+ - Uses the knowledge base system for storing scraped content
26
+ - Integrates with the loader system for content processing
27
+ - Supports agent orchestration patterns
28
+ - Provides hooks for custom post-processing
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ agent_name: str = "WebScrapingAgent",
34
+ driver_type: str = 'selenium',
35
+ knowledge_base: Optional[KnowledgeBaseStore] = None,
36
+ **kwargs
37
+ ):
38
+ self.logger = logging.getLogger("AI-Parrot.ScrapingOrchestrator")
39
+
40
+ # Initialize the scraping agent
41
+ self.scraping_agent = ScrapingAgent(
42
+ name=agent_name,
43
+ driver_type=driver_type,
44
+ **kwargs
45
+ )
46
+
47
+ # Knowledge base integration
48
+ self.knowledge_base = knowledge_base
49
+ self.auto_store_results = kwargs.get('auto_store_results', True)
50
+
51
+ # Result processing
52
+ self.post_processors: List[Callable] = []
53
+ self.result_filters: List[Callable] = []
54
+
55
+ # Configuration
56
+ self.max_concurrent_scrapes = kwargs.get('max_concurrent_scrapes', 3)
57
+ self.retry_failed_scrapes = kwargs.get('retry_failed_scrapes', True)
58
+ self.respect_robots_txt = kwargs.get('respect_robots_txt', True)
59
+
60
+ # Statistics tracking
61
+ self.session_stats = {
62
+ 'total_requests': 0,
63
+ 'successful_scrapes': 0,
64
+ 'failed_scrapes': 0,
65
+ 'pages_processed': 0,
66
+ 'start_time': datetime.now()
67
+ }
68
+
69
+ async def execute_scraping_mission(
70
+ self,
71
+ mission_config: Dict[str, Any]
72
+ ) -> Dict[str, Any]:
73
+ """
74
+ Execute a complete scraping mission with multiple targets and objectives.
75
+
76
+ Args:
77
+ mission_config: Configuration dictionary containing:
78
+ - targets: List of URLs or site configurations
79
+ - objectives: What to extract from each target
80
+ - authentication: Login credentials if needed
81
+ - output_config: How to store/process results
82
+ - constraints: Rate limiting, ethics, etc.
83
+
84
+ Returns:
85
+ Dictionary with complete mission results and statistics
86
+ """
87
+ self.logger.info(f"Starting scraping mission with {len(mission_config.get('targets', []))} targets")
88
+
89
+ mission_results = {
90
+ 'mission_id': mission_config.get('mission_id', f"mission_{datetime.now().strftime('%Y%m%d_%H%M%S')}"),
91
+ 'start_time': datetime.now().isoformat(),
92
+ 'targets': [],
93
+ 'statistics': {},
94
+ 'errors': []
95
+ }
96
+
97
+ try:
98
+ targets = mission_config.get('targets', [])
99
+
100
+ # Process targets concurrently with semaphore control
101
+ semaphore = asyncio.Semaphore(self.max_concurrent_scrapes)
102
+ tasks = []
103
+
104
+ for i, target in enumerate(targets):
105
+ task = self._process_single_target(semaphore, target, mission_config, i)
106
+ tasks.append(task)
107
+
108
+ # Execute all scraping tasks
109
+ target_results = await asyncio.gather(*tasks, return_exceptions=True)
110
+
111
+ # Process results
112
+ for i, result in enumerate(target_results):
113
+ if isinstance(result, Exception):
114
+ self.logger.error(f"Target {i} failed: {str(result)}")
115
+ mission_results['errors'].append({
116
+ 'target_index': i,
117
+ 'error': str(result),
118
+ 'target_config': targets[i] if i < len(targets) else 'unknown'
119
+ })
120
+ else:
121
+ mission_results['targets'].append(result)
122
+
123
+ # Calculate statistics
124
+ mission_results['statistics'] = self._calculate_mission_statistics(mission_results)
125
+
126
+ # Store results if configured
127
+ if self.auto_store_results and self.knowledge_base:
128
+ await self._store_mission_results(mission_results)
129
+
130
+ except Exception as e:
131
+ self.logger.error(f"Mission execution failed: {str(e)}")
132
+ mission_results['errors'].append({
133
+ 'type': 'mission_failure',
134
+ 'error': str(e)
135
+ })
136
+
137
+ finally:
138
+ mission_results['end_time'] = datetime.now().isoformat()
139
+ mission_results['duration'] = (
140
+ datetime.fromisoformat(mission_results['end_time']) -
141
+ datetime.fromisoformat(mission_results['start_time'])
142
+ ).total_seconds()
143
+
144
+ return mission_results
145
+
146
+ async def _process_single_target(
147
+ self,
148
+ semaphore: asyncio.Semaphore,
149
+ target_config: Dict[str, Any],
150
+ mission_config: Dict[str, Any],
151
+ target_index: int
152
+ ) -> Dict[str, Any]:
153
+ """Process a single scraping target with concurrency control"""
154
+ async with semaphore:
155
+ self.session_stats['total_requests'] += 1
156
+
157
+ # Build complete request for this target
158
+ request = {
159
+ 'target_url': target_config.get('url') or target_config.get('target_url'),
160
+ 'objective': target_config.get('objective') or mission_config.get('default_objective'),
161
+ 'authentication': target_config.get('authentication') or mission_config.get('authentication'),
162
+ 'constraints': mission_config.get('constraints', {}),
163
+ 'base_url': target_config.get('base_url', ''),
164
+ 'custom_selectors': target_config.get('selectors', []),
165
+ 'custom_steps': target_config.get('steps', [])
166
+ }
167
+
168
+ # Check if we have prior knowledge about this site
169
+ recommendations = await self.scraping_agent.get_site_recommendations(request['target_url'])
170
+
171
+ # Execute the intelligent scraping
172
+ scraping_results = await self.scraping_agent.execute_intelligent_scraping(request)
173
+
174
+ # Process results through filters and post-processors
175
+ processed_results = await self._process_results(scraping_results, target_config)
176
+
177
+ # Update statistics
178
+ if processed_results:
179
+ successful_results = [r for r in processed_results if r.success]
180
+ self.session_stats['successful_scrapes'] += len(successful_results)
181
+ self.session_stats['failed_scrapes'] += len(processed_results) - len(successful_results)
182
+ self.session_stats['pages_processed'] += len(processed_results)
183
+
184
+ return {
185
+ 'target_index': target_index,
186
+ 'target_config': target_config,
187
+ 'request': request,
188
+ 'recommendations': recommendations,
189
+ 'scraping_results': [
190
+ {
191
+ 'url': r.url,
192
+ 'success': r.success,
193
+ 'extracted_data': r.extracted_data,
194
+ 'metadata': r.metadata,
195
+ 'error_message': r.error_message
196
+ } for r in processed_results
197
+ ],
198
+ 'processed_at': datetime.now().isoformat()
199
+ }
200
+
201
+ async def _process_results(
202
+ self,
203
+ results: List[ScrapingResult],
204
+ target_config: Dict[str, Any]
205
+ ) -> List[ScrapingResult]:
206
+ """Apply filters and post-processors to results"""
207
+ processed_results = results.copy()
208
+
209
+ # Apply result filters
210
+ for result_filter in self.result_filters:
211
+ processed_results = [r for r in processed_results if result_filter(r, target_config)]
212
+
213
+ # Apply post-processors
214
+ for post_processor in self.post_processors:
215
+ processed_results = await post_processor(processed_results, target_config)
216
+
217
+ return processed_results
218
+
219
+ def add_result_filter(self, filter_func: Callable[[ScrapingResult, Dict[str, Any]], bool]):
220
+ """Add a filter function to exclude certain results"""
221
+ self.result_filters.append(filter_func)
222
+
223
+ def add_post_processor(self, processor_func: Callable):
224
+ """Add a post-processor function for result enhancement"""
225
+ self.post_processors.append(processor_func)
226
+
227
+ async def _store_mission_results(self, mission_results: Dict[str, Any]):
228
+ """Store scraping results in the knowledge base"""
229
+ if not self.knowledge_base:
230
+ return
231
+
232
+ try:
233
+ for target_result in mission_results['targets']:
234
+ for scraping_result in target_result['scraping_results']:
235
+ if scraping_result['success'] and scraping_result['extracted_data']:
236
+ # Prepare document for knowledge base
237
+ document = {
238
+ 'content': json.dumps(scraping_result['extracted_data'], indent=2),
239
+ 'metadata': {
240
+ 'source_url': scraping_result['url'],
241
+ 'scraping_mission_id': mission_results['mission_id'],
242
+ 'scraped_at': scraping_result['metadata'].get('timestamp'),
243
+ 'content_type': 'scraped_data',
244
+ 'target_objective': target_result['request']['objective']
245
+ }
246
+ }
247
+
248
+ # Store in knowledge base
249
+ await self.knowledge_base.add_document(document)
250
+
251
+ self.logger.info(f"Stored mission results in knowledge base: {mission_results['mission_id']}")
252
+
253
+ except Exception as e:
254
+ self.logger.error(f"Failed to store mission results: {str(e)}")
255
+
256
+ def _calculate_mission_statistics(self, mission_results: Dict[str, Any]) -> Dict[str, Any]:
257
+ """Calculate comprehensive statistics for the mission"""
258
+ total_targets = len(mission_results['targets'])
259
+ total_scrapes = sum(len(t['scraping_results']) for t in mission_results['targets'])
260
+ successful_scrapes = sum(
261
+ len([r for r in t['scraping_results'] if r['success']])
262
+ for t in mission_results['targets']
263
+ )
264
+
265
+ return {
266
+ 'total_targets': total_targets,
267
+ 'total_scrapes': total_scrapes,
268
+ 'successful_scrapes': successful_scrapes,
269
+ 'success_rate': successful_scrapes / total_scrapes if total_scrapes > 0 else 0,
270
+ 'targets_with_data': len([t for t in mission_results['targets']
271
+ if any(r['extracted_data'] for r in t['scraping_results'])]),
272
+ 'average_pages_per_target': total_scrapes / total_targets if total_targets > 0 else 0,
273
+ 'session_stats': self.session_stats.copy()
274
+ }
275
+
276
+
277
+ # Example usage and integration patterns
278
+ class ScrapingMissionBuilder:
279
+ """Builder pattern for creating complex scraping missions"""
280
+
281
+ def __init__(self):
282
+ self.mission_config = {
283
+ 'targets': [],
284
+ 'constraints': {},
285
+ 'output_config': {}
286
+ }
287
+
288
+ def add_target(
289
+ self,
290
+ url: str,
291
+ objective: str = "Extract all relevant content",
292
+ authentication: Optional[Dict[str, Any]] = None,
293
+ custom_steps: Optional[List[Dict[str, Any]]] = None,
294
+ custom_selectors: Optional[List[Dict[str, Any]]] = None
295
+ ) -> 'ScrapingMissionBuilder':
296
+ """Add a target to the scraping mission"""
297
+ target = {
298
+ 'url': url,
299
+ 'objective': objective
300
+ }
301
+
302
+ if authentication:
303
+ target['authentication'] = authentication
304
+ if custom_steps:
305
+ target['steps'] = custom_steps
306
+ if custom_selectors:
307
+ target['selectors'] = custom_selectors
308
+
309
+ self.mission_config['targets'].append(target)
310
+ return self
311
+
312
+ def set_rate_limiting(
313
+ self,
314
+ requests_per_minute: int = 30,
315
+ delay_between_requests: float = 2.0
316
+ ) -> 'ScrapingMissionBuilder':
317
+ """Set rate limiting constraints"""
318
+ self.mission_config['constraints'].update({
319
+ 'requests_per_minute': requests_per_minute,
320
+ 'delay_between_requests': delay_between_requests
321
+ })
322
+ return self
323
+
324
+ def set_authentication(
325
+ self,
326
+ username: str,
327
+ password: str,
328
+ login_url: str,
329
+ username_selector: str = "#username",
330
+ password_selector: str = "#password",
331
+ submit_selector: str = "input[type=submit]"
332
+ ) -> 'ScrapingMissionBuilder':
333
+ """Set global authentication for all targets"""
334
+ self.mission_config['authentication'] = {
335
+ 'required': True,
336
+ 'username': username,
337
+ 'password': password,
338
+ 'login_url': login_url,
339
+ 'selectors': {
340
+ 'username': username_selector,
341
+ 'password': password_selector,
342
+ 'submit': submit_selector
343
+ }
344
+ }
345
+ return self
346
+
347
+ def enable_content_analysis(
348
+ self,
349
+ summarize_content: bool = True,
350
+ extract_entities: bool = True,
351
+ sentiment_analysis: bool = False
352
+ ) -> 'ScrapingMissionBuilder':
353
+ """Enable advanced content analysis features"""
354
+ self.mission_config['output_config'].update({
355
+ 'summarize_content': summarize_content,
356
+ 'extract_entities': extract_entities,
357
+ 'sentiment_analysis': sentiment_analysis
358
+ })
359
+ return self
360
+
361
+ def build(self) -> Dict[str, Any]:
362
+ """Build the final mission configuration"""
363
+ self.mission_config['mission_id'] = f"mission_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
364
+ return self.mission_config.copy()
365
+
366
+
367
+ # Example usage scenarios
368
+ async def example_ecommerce_scraping():
369
+ """Example: Scraping product information from e-commerce sites"""
370
+
371
+ # Build mission using the builder pattern
372
+ mission = (ScrapingMissionBuilder()
373
+ .add_target(
374
+ url="https://example-store.com/products/laptops",
375
+ objective="Extract laptop product details including name, price, specifications, and reviews",
376
+ custom_selectors=[
377
+ {
378
+ "name": "product_name",
379
+ "selector": "h1.product-title",
380
+ "extract_type": "text"
381
+ },
382
+ {
383
+ "name": "price",
384
+ "selector": ".price-current",
385
+ "extract_type": "text"
386
+ },
387
+ {
388
+ "name": "specifications",
389
+ "selector": ".product-specs li",
390
+ "extract_type": "text",
391
+ "multiple": True
392
+ }
393
+ ]
394
+ )
395
+ .add_target(
396
+ url="https://competitor-store.com/laptops",
397
+ objective="Extract competing laptop prices for comparison"
398
+ )
399
+ .set_rate_limiting(requests_per_minute=20, delay_between_requests=3.0)
400
+ .enable_content_analysis(summarize_content=True, extract_entities=True)
401
+ .build()
402
+ )
403
+
404
+ # Execute the mission
405
+ orchestrator = ScrapingOrchestrator(
406
+ driver_type='selenium',
407
+ headless=True
408
+ )
409
+
410
+ # Add custom post-processor for price comparison
411
+ async def price_comparison_processor(results, target_config):
412
+ """Extract and normalize price data for comparison"""
413
+ for result in results:
414
+ if 'price' in result.extracted_data:
415
+ # Add price normalization logic here
416
+ result.metadata['normalized_price'] = extract_price_number(result.extracted_data['price'])
417
+ return results
418
+
419
+ orchestrator.add_post_processor(price_comparison_processor)
420
+
421
+ # Execute mission
422
+ mission_results = await orchestrator.execute_scraping_mission(mission)
423
+
424
+ return mission_results
425
+
426
+ async def example_news_monitoring():
427
+ """Example: Monitor news sites for specific topics"""
428
+
429
+ mission = (ScrapingMissionBuilder()
430
+ .add_target(
431
+ url="https://news-site.com/technology",
432
+ objective="Extract technology news articles with headlines, summaries, and publication dates",
433
+ custom_selectors=[
434
+ {
435
+ "name": "headlines",
436
+ "selector": "h2.article-title a",
437
+ "extract_type": "text",
438
+ "multiple": True
439
+ },
440
+ {
441
+ "name": "summaries",
442
+ "selector": ".article-summary",
443
+ "extract_type": "text",
444
+ "multiple": True
445
+ }
446
+ ]
447
+ )
448
+ .set_rate_limiting(requests_per_minute=15)
449
+ .enable_content_analysis(
450
+ summarize_content=True,
451
+ extract_entities=True,
452
+ sentiment_analysis=True
453
+ )
454
+ .build()
455
+ )
456
+
457
+ orchestrator = ScrapingOrchestrator()
458
+
459
+ # Add filter to only keep articles about AI/ML
460
+ def ai_ml_filter(result: ScrapingResult, target_config: Dict[str, Any]) -> bool:
461
+ if not result.success or not result.extracted_data:
462
+ return False
463
+
464
+ content_text = str(result.extracted_data).lower()
465
+ ai_keywords = ['artificial intelligence', 'machine learning', 'deep learning', 'neural network']
466
+
467
+ return any(keyword in content_text for keyword in ai_keywords)
468
+
469
+ orchestrator.add_result_filter(ai_ml_filter)
470
+
471
+ return await orchestrator.execute_scraping_mission(mission)
472
+
473
+ def extract_price_number(price_text: str) -> Optional[float]:
474
+ """Helper function to extract numeric price from text"""
475
+ import re
476
+ price_match = re.search(r'[\d,]+\.?\d*', price_text.replace(',', ''))
477
+ return float(price_match.group()) if price_match else None
478
+
479
+
480
+ # Integration with existing AI-parrot infrastructure
481
+ async def integrate_with_knowledge_base(kb_store: KnowledgeBaseStore):
482
+ """Example of full integration with AI-parrot knowledge base"""
483
+
484
+ orchestrator = ScrapingOrchestrator(
485
+ knowledge_base=kb_store,
486
+ auto_store_results=True
487
+ )
488
+
489
+ # Custom post-processor that uses text loaders for content processing
490
+ async def knowledge_base_processor(results, target_config):
491
+ """Process scraped content using AI-parrot text loaders"""
492
+ from ..loaders.text import TextLoader
493
+
494
+ for result in results:
495
+ if result.success and result.extracted_data:
496
+ # Create temporary text file with scraped content
497
+ content = json.dumps(result.extracted_data, indent=2)
498
+
499
+ # Use text loader to process and chunk content
500
+ loader = TextLoader(
501
+ source=content,
502
+ chunk_size=800,
503
+ chunk_overlap=100
504
+ )
505
+
506
+ # Process content into chunks
507
+ chunks = await loader.process_documents()
508
+
509
+ # Add processed chunks to result metadata
510
+ result.metadata['processed_chunks'] = len(chunks)
511
+ result.metadata['content_processed'] = True
512
+
513
+ return results
514
+
515
+ orchestrator.add_post_processor(knowledge_base_processor)
516
+
517
+ return orchestrator