ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (535) hide show
  1. agentui/.prettierrc +15 -0
  2. agentui/QUICKSTART.md +272 -0
  3. agentui/README.md +59 -0
  4. agentui/env.example +16 -0
  5. agentui/jsconfig.json +14 -0
  6. agentui/package-lock.json +4242 -0
  7. agentui/package.json +34 -0
  8. agentui/scripts/postinstall/apply-patches.mjs +260 -0
  9. agentui/src/app.css +61 -0
  10. agentui/src/app.d.ts +13 -0
  11. agentui/src/app.html +12 -0
  12. agentui/src/components/LoadingSpinner.svelte +64 -0
  13. agentui/src/components/ThemeSwitcher.svelte +159 -0
  14. agentui/src/components/index.js +4 -0
  15. agentui/src/lib/api/bots.ts +60 -0
  16. agentui/src/lib/api/chat.ts +22 -0
  17. agentui/src/lib/api/http.ts +25 -0
  18. agentui/src/lib/components/BotCard.svelte +33 -0
  19. agentui/src/lib/components/ChatBubble.svelte +63 -0
  20. agentui/src/lib/components/Toast.svelte +21 -0
  21. agentui/src/lib/config.ts +20 -0
  22. agentui/src/lib/stores/auth.svelte.ts +73 -0
  23. agentui/src/lib/stores/theme.svelte.js +64 -0
  24. agentui/src/lib/stores/toast.svelte.ts +31 -0
  25. agentui/src/lib/utils/conversation.ts +39 -0
  26. agentui/src/routes/+layout.svelte +20 -0
  27. agentui/src/routes/+page.svelte +232 -0
  28. agentui/src/routes/login/+page.svelte +200 -0
  29. agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
  30. agentui/src/routes/talk/[agentId]/+page.ts +7 -0
  31. agentui/static/README.md +1 -0
  32. agentui/svelte.config.js +11 -0
  33. agentui/tailwind.config.ts +53 -0
  34. agentui/tsconfig.json +3 -0
  35. agentui/vite.config.ts +10 -0
  36. ai_parrot-0.17.2.dist-info/METADATA +472 -0
  37. ai_parrot-0.17.2.dist-info/RECORD +535 -0
  38. ai_parrot-0.17.2.dist-info/WHEEL +6 -0
  39. ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
  40. ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
  41. ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
  42. crew-builder/.prettierrc +15 -0
  43. crew-builder/QUICKSTART.md +259 -0
  44. crew-builder/README.md +113 -0
  45. crew-builder/env.example +17 -0
  46. crew-builder/jsconfig.json +14 -0
  47. crew-builder/package-lock.json +4182 -0
  48. crew-builder/package.json +37 -0
  49. crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
  50. crew-builder/src/app.css +62 -0
  51. crew-builder/src/app.d.ts +13 -0
  52. crew-builder/src/app.html +12 -0
  53. crew-builder/src/components/LoadingSpinner.svelte +64 -0
  54. crew-builder/src/components/ThemeSwitcher.svelte +149 -0
  55. crew-builder/src/components/index.js +9 -0
  56. crew-builder/src/lib/api/bots.ts +60 -0
  57. crew-builder/src/lib/api/chat.ts +80 -0
  58. crew-builder/src/lib/api/client.ts +56 -0
  59. crew-builder/src/lib/api/crew/crew.ts +136 -0
  60. crew-builder/src/lib/api/index.ts +5 -0
  61. crew-builder/src/lib/api/o365/auth.ts +65 -0
  62. crew-builder/src/lib/auth/auth.ts +54 -0
  63. crew-builder/src/lib/components/AgentNode.svelte +43 -0
  64. crew-builder/src/lib/components/BotCard.svelte +33 -0
  65. crew-builder/src/lib/components/ChatBubble.svelte +67 -0
  66. crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
  67. crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
  68. crew-builder/src/lib/components/JsonViewer.svelte +24 -0
  69. crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
  70. crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
  71. crew-builder/src/lib/components/Toast.svelte +67 -0
  72. crew-builder/src/lib/components/Toolbar.svelte +157 -0
  73. crew-builder/src/lib/components/index.ts +10 -0
  74. crew-builder/src/lib/config.ts +8 -0
  75. crew-builder/src/lib/stores/auth.svelte.ts +228 -0
  76. crew-builder/src/lib/stores/crewStore.ts +369 -0
  77. crew-builder/src/lib/stores/theme.svelte.js +145 -0
  78. crew-builder/src/lib/stores/toast.svelte.ts +69 -0
  79. crew-builder/src/lib/utils/conversation.ts +39 -0
  80. crew-builder/src/lib/utils/markdown.ts +122 -0
  81. crew-builder/src/lib/utils/talkHistory.ts +47 -0
  82. crew-builder/src/routes/+layout.svelte +20 -0
  83. crew-builder/src/routes/+page.svelte +539 -0
  84. crew-builder/src/routes/agents/+page.svelte +247 -0
  85. crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
  86. crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
  87. crew-builder/src/routes/builder/+page.svelte +204 -0
  88. crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
  89. crew-builder/src/routes/crew/ask/+page.ts +1 -0
  90. crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
  91. crew-builder/src/routes/login/+page.svelte +197 -0
  92. crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
  93. crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
  94. crew-builder/static/README.md +1 -0
  95. crew-builder/svelte.config.js +11 -0
  96. crew-builder/tailwind.config.ts +53 -0
  97. crew-builder/tsconfig.json +3 -0
  98. crew-builder/vite.config.ts +10 -0
  99. mcp_servers/calculator_server.py +309 -0
  100. parrot/__init__.py +27 -0
  101. parrot/__pycache__/__init__.cpython-310.pyc +0 -0
  102. parrot/__pycache__/version.cpython-310.pyc +0 -0
  103. parrot/_version.py +34 -0
  104. parrot/a2a/__init__.py +48 -0
  105. parrot/a2a/client.py +658 -0
  106. parrot/a2a/discovery.py +89 -0
  107. parrot/a2a/mixin.py +257 -0
  108. parrot/a2a/models.py +376 -0
  109. parrot/a2a/server.py +770 -0
  110. parrot/agents/__init__.py +29 -0
  111. parrot/bots/__init__.py +12 -0
  112. parrot/bots/a2a_agent.py +19 -0
  113. parrot/bots/abstract.py +3139 -0
  114. parrot/bots/agent.py +1129 -0
  115. parrot/bots/basic.py +9 -0
  116. parrot/bots/chatbot.py +669 -0
  117. parrot/bots/data.py +1618 -0
  118. parrot/bots/database/__init__.py +5 -0
  119. parrot/bots/database/abstract.py +3071 -0
  120. parrot/bots/database/cache.py +286 -0
  121. parrot/bots/database/models.py +468 -0
  122. parrot/bots/database/prompts.py +154 -0
  123. parrot/bots/database/retries.py +98 -0
  124. parrot/bots/database/router.py +269 -0
  125. parrot/bots/database/sql.py +41 -0
  126. parrot/bots/db/__init__.py +6 -0
  127. parrot/bots/db/abstract.py +556 -0
  128. parrot/bots/db/bigquery.py +602 -0
  129. parrot/bots/db/cache.py +85 -0
  130. parrot/bots/db/documentdb.py +668 -0
  131. parrot/bots/db/elastic.py +1014 -0
  132. parrot/bots/db/influx.py +898 -0
  133. parrot/bots/db/mock.py +96 -0
  134. parrot/bots/db/multi.py +783 -0
  135. parrot/bots/db/prompts.py +185 -0
  136. parrot/bots/db/sql.py +1255 -0
  137. parrot/bots/db/tools.py +212 -0
  138. parrot/bots/document.py +680 -0
  139. parrot/bots/hrbot.py +15 -0
  140. parrot/bots/kb.py +170 -0
  141. parrot/bots/mcp.py +36 -0
  142. parrot/bots/orchestration/README.md +463 -0
  143. parrot/bots/orchestration/__init__.py +1 -0
  144. parrot/bots/orchestration/agent.py +155 -0
  145. parrot/bots/orchestration/crew.py +3330 -0
  146. parrot/bots/orchestration/fsm.py +1179 -0
  147. parrot/bots/orchestration/hr.py +434 -0
  148. parrot/bots/orchestration/storage/__init__.py +4 -0
  149. parrot/bots/orchestration/storage/memory.py +100 -0
  150. parrot/bots/orchestration/storage/mixin.py +119 -0
  151. parrot/bots/orchestration/verify.py +202 -0
  152. parrot/bots/product.py +204 -0
  153. parrot/bots/prompts/__init__.py +96 -0
  154. parrot/bots/prompts/agents.py +155 -0
  155. parrot/bots/prompts/data.py +216 -0
  156. parrot/bots/prompts/output_generation.py +8 -0
  157. parrot/bots/scraper/__init__.py +3 -0
  158. parrot/bots/scraper/models.py +122 -0
  159. parrot/bots/scraper/scraper.py +1173 -0
  160. parrot/bots/scraper/templates.py +115 -0
  161. parrot/bots/stores/__init__.py +5 -0
  162. parrot/bots/stores/local.py +172 -0
  163. parrot/bots/webdev.py +81 -0
  164. parrot/cli.py +17 -0
  165. parrot/clients/__init__.py +16 -0
  166. parrot/clients/base.py +1491 -0
  167. parrot/clients/claude.py +1191 -0
  168. parrot/clients/factory.py +129 -0
  169. parrot/clients/google.py +4567 -0
  170. parrot/clients/gpt.py +1975 -0
  171. parrot/clients/grok.py +432 -0
  172. parrot/clients/groq.py +986 -0
  173. parrot/clients/hf.py +582 -0
  174. parrot/clients/models.py +18 -0
  175. parrot/conf.py +395 -0
  176. parrot/embeddings/__init__.py +9 -0
  177. parrot/embeddings/base.py +157 -0
  178. parrot/embeddings/google.py +98 -0
  179. parrot/embeddings/huggingface.py +74 -0
  180. parrot/embeddings/openai.py +84 -0
  181. parrot/embeddings/processor.py +88 -0
  182. parrot/exceptions.c +13868 -0
  183. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  184. parrot/exceptions.pxd +22 -0
  185. parrot/exceptions.pxi +15 -0
  186. parrot/exceptions.pyx +44 -0
  187. parrot/generators/__init__.py +29 -0
  188. parrot/generators/base.py +200 -0
  189. parrot/generators/html.py +293 -0
  190. parrot/generators/react.py +205 -0
  191. parrot/generators/streamlit.py +203 -0
  192. parrot/generators/template.py +105 -0
  193. parrot/handlers/__init__.py +4 -0
  194. parrot/handlers/agent.py +861 -0
  195. parrot/handlers/agents/__init__.py +1 -0
  196. parrot/handlers/agents/abstract.py +900 -0
  197. parrot/handlers/bots.py +338 -0
  198. parrot/handlers/chat.py +915 -0
  199. parrot/handlers/creation.sql +192 -0
  200. parrot/handlers/crew/ARCHITECTURE.md +362 -0
  201. parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
  202. parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
  203. parrot/handlers/crew/__init__.py +0 -0
  204. parrot/handlers/crew/handler.py +801 -0
  205. parrot/handlers/crew/models.py +229 -0
  206. parrot/handlers/crew/redis_persistence.py +523 -0
  207. parrot/handlers/jobs/__init__.py +10 -0
  208. parrot/handlers/jobs/job.py +384 -0
  209. parrot/handlers/jobs/mixin.py +627 -0
  210. parrot/handlers/jobs/models.py +115 -0
  211. parrot/handlers/jobs/worker.py +31 -0
  212. parrot/handlers/models.py +596 -0
  213. parrot/handlers/o365_auth.py +105 -0
  214. parrot/handlers/stream.py +337 -0
  215. parrot/interfaces/__init__.py +6 -0
  216. parrot/interfaces/aws.py +143 -0
  217. parrot/interfaces/credentials.py +113 -0
  218. parrot/interfaces/database.py +27 -0
  219. parrot/interfaces/google.py +1123 -0
  220. parrot/interfaces/hierarchy.py +1227 -0
  221. parrot/interfaces/http.py +651 -0
  222. parrot/interfaces/images/__init__.py +0 -0
  223. parrot/interfaces/images/plugins/__init__.py +24 -0
  224. parrot/interfaces/images/plugins/abstract.py +58 -0
  225. parrot/interfaces/images/plugins/analisys.py +148 -0
  226. parrot/interfaces/images/plugins/classify.py +150 -0
  227. parrot/interfaces/images/plugins/classifybase.py +182 -0
  228. parrot/interfaces/images/plugins/detect.py +150 -0
  229. parrot/interfaces/images/plugins/exif.py +1103 -0
  230. parrot/interfaces/images/plugins/hash.py +52 -0
  231. parrot/interfaces/images/plugins/vision.py +104 -0
  232. parrot/interfaces/images/plugins/yolo.py +66 -0
  233. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  234. parrot/interfaces/o365.py +978 -0
  235. parrot/interfaces/onedrive.py +822 -0
  236. parrot/interfaces/sharepoint.py +1435 -0
  237. parrot/interfaces/soap.py +257 -0
  238. parrot/loaders/__init__.py +8 -0
  239. parrot/loaders/abstract.py +1131 -0
  240. parrot/loaders/audio.py +199 -0
  241. parrot/loaders/basepdf.py +53 -0
  242. parrot/loaders/basevideo.py +1568 -0
  243. parrot/loaders/csv.py +409 -0
  244. parrot/loaders/docx.py +116 -0
  245. parrot/loaders/epubloader.py +316 -0
  246. parrot/loaders/excel.py +199 -0
  247. parrot/loaders/factory.py +55 -0
  248. parrot/loaders/files/__init__.py +0 -0
  249. parrot/loaders/files/abstract.py +39 -0
  250. parrot/loaders/files/html.py +26 -0
  251. parrot/loaders/files/text.py +63 -0
  252. parrot/loaders/html.py +152 -0
  253. parrot/loaders/markdown.py +442 -0
  254. parrot/loaders/pdf.py +373 -0
  255. parrot/loaders/pdfmark.py +320 -0
  256. parrot/loaders/pdftables.py +506 -0
  257. parrot/loaders/ppt.py +476 -0
  258. parrot/loaders/qa.py +63 -0
  259. parrot/loaders/splitters/__init__.py +10 -0
  260. parrot/loaders/splitters/base.py +138 -0
  261. parrot/loaders/splitters/md.py +228 -0
  262. parrot/loaders/splitters/token.py +143 -0
  263. parrot/loaders/txt.py +26 -0
  264. parrot/loaders/video.py +89 -0
  265. parrot/loaders/videolocal.py +218 -0
  266. parrot/loaders/videounderstanding.py +377 -0
  267. parrot/loaders/vimeo.py +167 -0
  268. parrot/loaders/web.py +599 -0
  269. parrot/loaders/youtube.py +504 -0
  270. parrot/manager/__init__.py +5 -0
  271. parrot/manager/manager.py +1030 -0
  272. parrot/mcp/__init__.py +28 -0
  273. parrot/mcp/adapter.py +105 -0
  274. parrot/mcp/cli.py +174 -0
  275. parrot/mcp/client.py +119 -0
  276. parrot/mcp/config.py +75 -0
  277. parrot/mcp/integration.py +842 -0
  278. parrot/mcp/oauth.py +933 -0
  279. parrot/mcp/server.py +225 -0
  280. parrot/mcp/transports/__init__.py +3 -0
  281. parrot/mcp/transports/base.py +279 -0
  282. parrot/mcp/transports/grpc_session.py +163 -0
  283. parrot/mcp/transports/http.py +312 -0
  284. parrot/mcp/transports/mcp.proto +108 -0
  285. parrot/mcp/transports/quic.py +1082 -0
  286. parrot/mcp/transports/sse.py +330 -0
  287. parrot/mcp/transports/stdio.py +309 -0
  288. parrot/mcp/transports/unix.py +395 -0
  289. parrot/mcp/transports/websocket.py +547 -0
  290. parrot/memory/__init__.py +16 -0
  291. parrot/memory/abstract.py +209 -0
  292. parrot/memory/agent.py +32 -0
  293. parrot/memory/cache.py +175 -0
  294. parrot/memory/core.py +555 -0
  295. parrot/memory/file.py +153 -0
  296. parrot/memory/mem.py +131 -0
  297. parrot/memory/redis.py +613 -0
  298. parrot/models/__init__.py +46 -0
  299. parrot/models/basic.py +118 -0
  300. parrot/models/compliance.py +208 -0
  301. parrot/models/crew.py +395 -0
  302. parrot/models/detections.py +654 -0
  303. parrot/models/generation.py +85 -0
  304. parrot/models/google.py +223 -0
  305. parrot/models/groq.py +23 -0
  306. parrot/models/openai.py +30 -0
  307. parrot/models/outputs.py +285 -0
  308. parrot/models/responses.py +938 -0
  309. parrot/notifications/__init__.py +743 -0
  310. parrot/openapi/__init__.py +3 -0
  311. parrot/openapi/components.yaml +641 -0
  312. parrot/openapi/config.py +322 -0
  313. parrot/outputs/__init__.py +32 -0
  314. parrot/outputs/formats/__init__.py +108 -0
  315. parrot/outputs/formats/altair.py +359 -0
  316. parrot/outputs/formats/application.py +122 -0
  317. parrot/outputs/formats/base.py +351 -0
  318. parrot/outputs/formats/bokeh.py +356 -0
  319. parrot/outputs/formats/card.py +424 -0
  320. parrot/outputs/formats/chart.py +436 -0
  321. parrot/outputs/formats/d3.py +255 -0
  322. parrot/outputs/formats/echarts.py +310 -0
  323. parrot/outputs/formats/generators/__init__.py +0 -0
  324. parrot/outputs/formats/generators/abstract.py +61 -0
  325. parrot/outputs/formats/generators/panel.py +145 -0
  326. parrot/outputs/formats/generators/streamlit.py +86 -0
  327. parrot/outputs/formats/generators/terminal.py +63 -0
  328. parrot/outputs/formats/holoviews.py +310 -0
  329. parrot/outputs/formats/html.py +147 -0
  330. parrot/outputs/formats/jinja2.py +46 -0
  331. parrot/outputs/formats/json.py +87 -0
  332. parrot/outputs/formats/map.py +933 -0
  333. parrot/outputs/formats/markdown.py +172 -0
  334. parrot/outputs/formats/matplotlib.py +237 -0
  335. parrot/outputs/formats/mixins/__init__.py +0 -0
  336. parrot/outputs/formats/mixins/emaps.py +855 -0
  337. parrot/outputs/formats/plotly.py +341 -0
  338. parrot/outputs/formats/seaborn.py +310 -0
  339. parrot/outputs/formats/table.py +397 -0
  340. parrot/outputs/formats/template_report.py +138 -0
  341. parrot/outputs/formats/yaml.py +125 -0
  342. parrot/outputs/formatter.py +152 -0
  343. parrot/outputs/templates/__init__.py +95 -0
  344. parrot/pipelines/__init__.py +0 -0
  345. parrot/pipelines/abstract.py +210 -0
  346. parrot/pipelines/detector.py +124 -0
  347. parrot/pipelines/models.py +90 -0
  348. parrot/pipelines/planogram.py +3002 -0
  349. parrot/pipelines/table.sql +97 -0
  350. parrot/plugins/__init__.py +106 -0
  351. parrot/plugins/importer.py +80 -0
  352. parrot/py.typed +0 -0
  353. parrot/registry/__init__.py +18 -0
  354. parrot/registry/registry.py +594 -0
  355. parrot/scheduler/__init__.py +1189 -0
  356. parrot/scheduler/models.py +60 -0
  357. parrot/security/__init__.py +16 -0
  358. parrot/security/prompt_injection.py +268 -0
  359. parrot/security/security_events.sql +25 -0
  360. parrot/services/__init__.py +1 -0
  361. parrot/services/mcp/__init__.py +8 -0
  362. parrot/services/mcp/config.py +13 -0
  363. parrot/services/mcp/server.py +295 -0
  364. parrot/services/o365_remote_auth.py +235 -0
  365. parrot/stores/__init__.py +7 -0
  366. parrot/stores/abstract.py +352 -0
  367. parrot/stores/arango.py +1090 -0
  368. parrot/stores/bigquery.py +1377 -0
  369. parrot/stores/cache.py +106 -0
  370. parrot/stores/empty.py +10 -0
  371. parrot/stores/faiss_store.py +1157 -0
  372. parrot/stores/kb/__init__.py +9 -0
  373. parrot/stores/kb/abstract.py +68 -0
  374. parrot/stores/kb/cache.py +165 -0
  375. parrot/stores/kb/doc.py +325 -0
  376. parrot/stores/kb/hierarchy.py +346 -0
  377. parrot/stores/kb/local.py +457 -0
  378. parrot/stores/kb/prompt.py +28 -0
  379. parrot/stores/kb/redis.py +659 -0
  380. parrot/stores/kb/store.py +115 -0
  381. parrot/stores/kb/user.py +374 -0
  382. parrot/stores/models.py +59 -0
  383. parrot/stores/pgvector.py +3 -0
  384. parrot/stores/postgres.py +2853 -0
  385. parrot/stores/utils/__init__.py +0 -0
  386. parrot/stores/utils/chunking.py +197 -0
  387. parrot/telemetry/__init__.py +3 -0
  388. parrot/telemetry/mixin.py +111 -0
  389. parrot/template/__init__.py +3 -0
  390. parrot/template/engine.py +259 -0
  391. parrot/tools/__init__.py +23 -0
  392. parrot/tools/abstract.py +644 -0
  393. parrot/tools/agent.py +363 -0
  394. parrot/tools/arangodbsearch.py +537 -0
  395. parrot/tools/arxiv_tool.py +188 -0
  396. parrot/tools/calculator/__init__.py +3 -0
  397. parrot/tools/calculator/operations/__init__.py +38 -0
  398. parrot/tools/calculator/operations/calculus.py +80 -0
  399. parrot/tools/calculator/operations/statistics.py +76 -0
  400. parrot/tools/calculator/tool.py +150 -0
  401. parrot/tools/cloudwatch.py +988 -0
  402. parrot/tools/codeinterpreter/__init__.py +127 -0
  403. parrot/tools/codeinterpreter/executor.py +371 -0
  404. parrot/tools/codeinterpreter/internals.py +473 -0
  405. parrot/tools/codeinterpreter/models.py +643 -0
  406. parrot/tools/codeinterpreter/prompts.py +224 -0
  407. parrot/tools/codeinterpreter/tool.py +664 -0
  408. parrot/tools/company_info/__init__.py +6 -0
  409. parrot/tools/company_info/tool.py +1138 -0
  410. parrot/tools/correlationanalysis.py +437 -0
  411. parrot/tools/database/abstract.py +286 -0
  412. parrot/tools/database/bq.py +115 -0
  413. parrot/tools/database/cache.py +284 -0
  414. parrot/tools/database/models.py +95 -0
  415. parrot/tools/database/pg.py +343 -0
  416. parrot/tools/databasequery.py +1159 -0
  417. parrot/tools/db.py +1800 -0
  418. parrot/tools/ddgo.py +370 -0
  419. parrot/tools/decorators.py +271 -0
  420. parrot/tools/dftohtml.py +282 -0
  421. parrot/tools/document.py +549 -0
  422. parrot/tools/ecs.py +819 -0
  423. parrot/tools/edareport.py +368 -0
  424. parrot/tools/elasticsearch.py +1049 -0
  425. parrot/tools/employees.py +462 -0
  426. parrot/tools/epson/__init__.py +96 -0
  427. parrot/tools/excel.py +683 -0
  428. parrot/tools/file/__init__.py +13 -0
  429. parrot/tools/file/abstract.py +76 -0
  430. parrot/tools/file/gcs.py +378 -0
  431. parrot/tools/file/local.py +284 -0
  432. parrot/tools/file/s3.py +511 -0
  433. parrot/tools/file/tmp.py +309 -0
  434. parrot/tools/file/tool.py +501 -0
  435. parrot/tools/file_reader.py +129 -0
  436. parrot/tools/flowtask/__init__.py +19 -0
  437. parrot/tools/flowtask/tool.py +761 -0
  438. parrot/tools/gittoolkit.py +508 -0
  439. parrot/tools/google/__init__.py +18 -0
  440. parrot/tools/google/base.py +169 -0
  441. parrot/tools/google/tools.py +1251 -0
  442. parrot/tools/googlelocation.py +5 -0
  443. parrot/tools/googleroutes.py +5 -0
  444. parrot/tools/googlesearch.py +5 -0
  445. parrot/tools/googlesitesearch.py +5 -0
  446. parrot/tools/googlevoice.py +2 -0
  447. parrot/tools/gvoice.py +695 -0
  448. parrot/tools/ibisworld/README.md +225 -0
  449. parrot/tools/ibisworld/__init__.py +11 -0
  450. parrot/tools/ibisworld/tool.py +366 -0
  451. parrot/tools/jiratoolkit.py +1718 -0
  452. parrot/tools/manager.py +1098 -0
  453. parrot/tools/math.py +152 -0
  454. parrot/tools/metadata.py +476 -0
  455. parrot/tools/msteams.py +1621 -0
  456. parrot/tools/msword.py +635 -0
  457. parrot/tools/multidb.py +580 -0
  458. parrot/tools/multistoresearch.py +369 -0
  459. parrot/tools/networkninja.py +167 -0
  460. parrot/tools/nextstop/__init__.py +4 -0
  461. parrot/tools/nextstop/base.py +286 -0
  462. parrot/tools/nextstop/employee.py +733 -0
  463. parrot/tools/nextstop/store.py +462 -0
  464. parrot/tools/notification.py +435 -0
  465. parrot/tools/o365/__init__.py +42 -0
  466. parrot/tools/o365/base.py +295 -0
  467. parrot/tools/o365/bundle.py +522 -0
  468. parrot/tools/o365/events.py +554 -0
  469. parrot/tools/o365/mail.py +992 -0
  470. parrot/tools/o365/onedrive.py +497 -0
  471. parrot/tools/o365/sharepoint.py +641 -0
  472. parrot/tools/openapi_toolkit.py +904 -0
  473. parrot/tools/openweather.py +527 -0
  474. parrot/tools/pdfprint.py +1001 -0
  475. parrot/tools/powerbi.py +518 -0
  476. parrot/tools/powerpoint.py +1113 -0
  477. parrot/tools/pricestool.py +146 -0
  478. parrot/tools/products/__init__.py +246 -0
  479. parrot/tools/prophet_tool.py +171 -0
  480. parrot/tools/pythonpandas.py +630 -0
  481. parrot/tools/pythonrepl.py +910 -0
  482. parrot/tools/qsource.py +436 -0
  483. parrot/tools/querytoolkit.py +395 -0
  484. parrot/tools/quickeda.py +827 -0
  485. parrot/tools/resttool.py +553 -0
  486. parrot/tools/retail/__init__.py +0 -0
  487. parrot/tools/retail/bby.py +528 -0
  488. parrot/tools/sandboxtool.py +703 -0
  489. parrot/tools/sassie/__init__.py +352 -0
  490. parrot/tools/scraping/__init__.py +7 -0
  491. parrot/tools/scraping/docs/select.md +466 -0
  492. parrot/tools/scraping/documentation.md +1278 -0
  493. parrot/tools/scraping/driver.py +436 -0
  494. parrot/tools/scraping/models.py +576 -0
  495. parrot/tools/scraping/options.py +85 -0
  496. parrot/tools/scraping/orchestrator.py +517 -0
  497. parrot/tools/scraping/readme.md +740 -0
  498. parrot/tools/scraping/tool.py +3115 -0
  499. parrot/tools/seasonaldetection.py +642 -0
  500. parrot/tools/shell_tool/__init__.py +5 -0
  501. parrot/tools/shell_tool/actions.py +408 -0
  502. parrot/tools/shell_tool/engine.py +155 -0
  503. parrot/tools/shell_tool/models.py +322 -0
  504. parrot/tools/shell_tool/tool.py +442 -0
  505. parrot/tools/site_search.py +214 -0
  506. parrot/tools/textfile.py +418 -0
  507. parrot/tools/think.py +378 -0
  508. parrot/tools/toolkit.py +298 -0
  509. parrot/tools/webapp_tool.py +187 -0
  510. parrot/tools/whatif.py +1279 -0
  511. parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
  512. parrot/tools/workday/__init__.py +6 -0
  513. parrot/tools/workday/models.py +1389 -0
  514. parrot/tools/workday/tool.py +1293 -0
  515. parrot/tools/yfinance_tool.py +306 -0
  516. parrot/tools/zipcode.py +217 -0
  517. parrot/utils/__init__.py +2 -0
  518. parrot/utils/helpers.py +73 -0
  519. parrot/utils/parsers/__init__.py +5 -0
  520. parrot/utils/parsers/toml.c +12078 -0
  521. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  522. parrot/utils/parsers/toml.pyx +21 -0
  523. parrot/utils/toml.py +11 -0
  524. parrot/utils/types.cpp +20936 -0
  525. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  526. parrot/utils/types.pyx +213 -0
  527. parrot/utils/uv.py +11 -0
  528. parrot/version.py +10 -0
  529. parrot/yaml-rs/Cargo.lock +350 -0
  530. parrot/yaml-rs/Cargo.toml +19 -0
  531. parrot/yaml-rs/pyproject.toml +19 -0
  532. parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
  533. parrot/yaml-rs/src/lib.rs +222 -0
  534. requirements/docker-compose.yml +24 -0
  535. requirements/requirements-dev.txt +21 -0
@@ -0,0 +1,740 @@
1
+ # ScrapingAgent for AI-Parrot
2
+
3
+ An intelligent web scraping agent that uses natural language to control web scraping operations with LLM-powered planning and execution.
4
+
5
+ ## Overview
6
+
7
+ The ScrapingAgent combines the power of large language models with browser automation to create a natural language interface for web scraping. It analyzes web pages, generates optimal scraping strategies, and executes complex scraping workflows with minimal manual configuration.
8
+
9
+ ### Key Features
10
+
11
+ - **Natural Language Control**: Describe what you want to scrape in plain English
12
+ - **Intelligent Analysis**: Automatically analyzes page structure and complexity
13
+ - **Strategic Planning**: Generates step-by-step navigation and extraction plans
14
+ - **Structured Output**: Uses Pydantic models for validation and type safety
15
+ - **Multiple Browser Support**: Selenium and Playwright, regular and undetected modes
16
+ - **Mobile Emulation**: Scrape mobile versions of websites
17
+ - **Authentication Handling**: Built-in support for login workflows
18
+ - **Plan Refinement**: Iteratively improve plans based on execution results
19
+ - **RESTful API**: Full HTTP API for integration with other services
20
+
21
+ ## Architecture
22
+
23
+ ```
24
+ ┌─────────────────────────────────────────────────────┐
25
+ │ ScrapingAgent │
26
+ │ (Inherits from BasicAgent → AbstractBot) │
27
+ ├─────────────────────────────────────────────────────┤
28
+ │ │
29
+ │ ┌──────────────┐ ┌────────────────┐ │
30
+ │ │ Analysis │ │ Plan Generation│ │
31
+ │ │ Module │ │ & Validation │ │
32
+ │ └──────┬───────┘ └────────┬────────┘ │
33
+ │ │ │ │
34
+ │ └────────┬──────────┘ │
35
+ │ │ │
36
+ │ ┌────────▼────────┐ │
37
+ │ │ Execution │ │
38
+ │ │ Orchestrator │ │
39
+ │ └────────┬────────┘ │
40
+ │ │ │
41
+ │ ┌────────▼────────┐ │
42
+ │ │ WebScrapingTool │ │
43
+ │ └────────┬────────┘ │
44
+ │ │ │
45
+ │ ┌─────────────┴─────────────┐ │
46
+ │ │ │ │
47
+ │ ┌──▼────────┐ ┌───────▼────┐ │
48
+ │ │ Selenium │ │ Playwright │ │
49
+ │ │ Driver │ │ Driver │ │
50
+ │ └───────────┘ └────────────┘ │
51
+ └─────────────────────────────────────────────────────┘
52
+ ```
53
+
54
+ ## Installation
55
+
56
+ ```bash
57
+ # Install AI-parrot with scraping dependencies
58
+ pip install ai-parrot[scraping]
59
+
60
+ # Or install individual dependencies
61
+ pip install selenium playwright undetected-chromedriver
62
+ pip install beautifulsoup4 lxml
63
+
64
+ # Install playwright browsers
65
+ playwright install
66
+ ```
67
+
68
+ ## Quick Start
69
+
70
+ ### Basic Usage
71
+
72
+ ```python
73
+ import asyncio
74
+ from parrot.agents.scraping_agent import ScrapingAgent
75
+
76
+ async def main():
77
+ # Create agent
78
+ agent = ScrapingAgent(
79
+ name="MyScraper",
80
+ llm="openai",
81
+ model="gpt-4"
82
+ )
83
+
84
+ # Configure agent
85
+ await agent.configure()
86
+
87
+ # Scrape with natural language
88
+ result = await agent.scrape(
89
+ "Extract all article titles and authors from https://news.ycombinator.com"
90
+ )
91
+
92
+ print(f"Status: {result['status']}")
93
+ print(f"Pages scraped: {result['metadata']['total_pages_scraped']}")
94
+
95
+ # Access extracted data
96
+ for page_result in result['result']:
97
+ if page_result['success']:
98
+ print(f"\nURL: {page_result['url']}")
99
+ print(f"Data: {page_result['extracted_data']}")
100
+
101
+ asyncio.run(main())
102
+ ```
103
+
104
+ ### Advanced Usage with Plan Control
105
+
106
+ ```python
107
+ async def advanced_scraping():
108
+ agent = ScrapingAgent(
109
+ name="AdvancedScraper",
110
+ llm="anthropic",
111
+ model="claude-sonnet-4"
112
+ )
113
+ await agent.configure()
114
+
115
+ # Step 1: Generate plan
116
+ plan = await agent.generate_scraping_plan(
117
+ objective="Search for Python jobs and extract job titles, companies, and locations",
118
+ url="https://jobs.example.com",
119
+ context={
120
+ "search_query": "Python Developer",
121
+ "location": "Remote"
122
+ }
123
+ )
124
+
125
+ # Step 2: Review and modify plan if needed
126
+ print(f"Generated {len(plan.steps)} steps")
127
+ print(f"Using {len(plan.selectors)} selectors")
128
+
129
+ # Optionally modify the plan
130
+ plan.browser_config.headless = False # Show browser
131
+
132
+ # Step 3: Execute the plan
133
+ result = await agent.execute_plan(plan)
134
+
135
+ # Step 4: Refine if needed
136
+ if not result['status']:
137
+ refined_plan = await agent.refine_plan(
138
+ plan,
139
+ feedback="The search button selector was incorrect. Try '#search-btn' instead."
140
+ )
141
+ result = await agent.execute_plan(refined_plan)
142
+
143
+ return result
144
+ ```
145
+
146
+ ## Structured Output Schemas
147
+
148
+ ### ScrapingPlanSchema
149
+
150
+ The complete plan for a scraping operation:
151
+
152
+ ```python
153
+ from parrot.agents.scraping_agent import (
154
+ ScrapingPlanSchema,
155
+ BrowserConfigSchema,
156
+ NavigationStepSchema,
157
+ SelectorSchema,
158
+ PageAnalysisSchema
159
+ )
160
+
161
+ # Create a manual plan
162
+ plan = ScrapingPlanSchema(
163
+ objective="Extract product information",
164
+ analysis=PageAnalysisSchema(
165
+ url="https://shop.example.com",
166
+ page_type="product listing",
167
+ complexity="moderate",
168
+ requires_javascript=True,
169
+ has_pagination=True,
170
+ has_authentication=False,
171
+ key_elements=["product cards", "prices"],
172
+ potential_challenges=["lazy loading"],
173
+ recommended_approach="Use browser with scroll"
174
+ ),
175
+ browser_config=BrowserConfigSchema(
176
+ browser="chrome",
177
+ headless=True,
178
+ mobile=False
179
+ ),
180
+ steps=[
181
+ NavigationStepSchema(
182
+ action="navigate",
183
+ description="Go to products page",
184
+ target="https://shop.example.com/products"
185
+ ),
186
+ NavigationStepSchema(
187
+ action="wait",
188
+ description="Wait for products",
189
+ target=".product-card",
190
+ timeout=10
191
+ )
192
+ ],
193
+ selectors=[
194
+ SelectorSchema(
195
+ name="titles",
196
+ selector=".product-title",
197
+ extract_type="text",
198
+ multiple=True
199
+ )
200
+ ]
201
+ )
202
+ ```
203
+
204
+ ### BrowserConfigSchema
205
+
206
+ Browser configuration options:
207
+
208
+ ```python
209
+ config = BrowserConfigSchema(
210
+ browser="chrome", # or "firefox", "edge", "safari", "undetected"
211
+ headless=True, # Run without UI
212
+ mobile=False, # Emulate mobile device
213
+ mobile_device="iPhone 12", # Specific device to emulate
214
+ driver_type="selenium", # or "playwright"
215
+ auto_install=True # Auto-install drivers
216
+ )
217
+ ```
218
+
219
+ ### NavigationStepSchema
220
+
221
+ Individual scraping steps:
222
+
223
+ ```python
224
+ # Navigate to URL
225
+ step1 = NavigationStepSchema(
226
+ action="navigate",
227
+ description="Go to homepage",
228
+ target="https://example.com"
229
+ )
230
+
231
+ # Click element
232
+ step2 = NavigationStepSchema(
233
+ action="click",
234
+ description="Click search button",
235
+ target="#search-btn",
236
+ wait_after=2.0
237
+ )
238
+
239
+ # Fill form
240
+ step3 = NavigationStepSchema(
241
+ action="fill",
242
+ description="Enter search query",
243
+ target="input[name='q']",
244
+ value="web scraping"
245
+ )
246
+
247
+ # Wait for element
248
+ step4 = NavigationStepSchema(
249
+ action="wait",
250
+ description="Wait for results",
251
+ target=".search-result",
252
+ timeout=10
253
+ )
254
+
255
+ # Scroll
256
+ step5 = NavigationStepSchema(
257
+ action="scroll",
258
+ description="Scroll to bottom",
259
+ target="bottom"
260
+ )
261
+ ```
262
+
263
+ ### SelectorSchema
264
+
265
+ Content extraction selectors:
266
+
267
+ ```python
268
+ # Extract text
269
+ selector1 = SelectorSchema(
270
+ name="product_titles",
271
+ selector=".product h2",
272
+ selector_type="css",
273
+ extract_type="text",
274
+ multiple=True
275
+ )
276
+
277
+ # Extract attribute
278
+ selector2 = SelectorSchema(
279
+ name="product_images",
280
+ selector=".product img",
281
+ selector_type="css",
282
+ extract_type="attribute",
283
+ attribute="src",
284
+ multiple=True
285
+ )
286
+
287
+ # Extract HTML
288
+ selector3 = SelectorSchema(
289
+ name="product_descriptions",
290
+ selector=".description",
291
+ extract_type="html",
292
+ multiple=False
293
+ )
294
+ ```
295
+
296
+ ## Integration Patterns
297
+
298
+ ### With BotManager
299
+
300
+ ```python
301
+ from parrot.manager import BotManager
302
+ from parrot.agents.scraping_agent import ScrapingAgent
303
+
304
+ async def with_manager():
305
+ manager = BotManager()
306
+
307
+ # Create through manager
308
+ agent = await manager.create_agent(
309
+ class_name=ScrapingAgent,
310
+ name="ManagedScraper",
311
+ llm={"name": "openai", "model": "gpt-4"}
312
+ )
313
+
314
+ # Use the agent
315
+ result = await agent.scrape(
316
+ "Extract news headlines from BBC"
317
+ )
318
+
319
+ return result
320
+ ```
321
+
322
+ ### With Agent Registry
323
+
324
+ ```python
325
+ from parrot.registry import agent_registry
326
+ from parrot.agents.scraping_agent import ScrapingAgent
327
+
328
+ # Register at startup
329
+ @agent_registry.register_agent(
330
+ name="ScrapingAgent",
331
+ singleton=True,
332
+ at_startup=True,
333
+ startup_config={
334
+ "llm": "anthropic",
335
+ "model": "claude-sonnet-4"
336
+ },
337
+ tags={"scraping", "automation"},
338
+ priority=100
339
+ )
340
+ class MyScrapingAgent(ScrapingAgent):
341
+ pass
342
+
343
+ # Later, get the agent
344
+ agent = await agent_registry.get_instance("ScrapingAgent")
345
+ ```
346
+
347
+ ### RESTful API
348
+
349
+ ```python
350
+ from aiohttp import web
351
+ from parrot.handlers.scraping_agent_handler import create_scraping_api
352
+
353
+ async def run_api():
354
+ app = await create_scraping_api(
355
+ llm="openai",
356
+ model="gpt-4"
357
+ )
358
+ web.run_app(app, host="0.0.0.0", port=8080)
359
+
360
+ # API Endpoints:
361
+ # POST /api/v1/scraping/analyze - Analyze page
362
+ # POST /api/v1/scraping/plan - Generate plan
363
+ # POST /api/v1/scraping/execute - Execute plan
364
+ # POST /api/v1/scraping/scrape - Complete workflow
365
+ # GET /api/v1/scraping/plans/{id} - Get plan
366
+ # POST /api/v1/scraping/plans/{id}/refine - Refine plan
367
+ # GET /api/v1/scraping/health - Health check
368
+ ```
369
+
370
+ Example API request:
371
+
372
+ ```bash
373
+ curl -X POST http://localhost:8080/api/v1/scraping/scrape \
374
+ -H "Content-Type: application/json" \
375
+ -d '{
376
+ "objective": "Extract product names and prices",
377
+ "url": "https://shop.example.com",
378
+ "return_plan": true
379
+ }'
380
+ ```
381
+
382
+ ## Common Use Cases
383
+
384
+ ### 1. E-commerce Scraping
385
+
386
+ ```python
387
+ result = await agent.scrape("""
388
+ Go to https://shop.example.com/laptops
389
+ Extract for each product:
390
+ - Product name
391
+ - Price
392
+ - Rating
393
+ - Availability
394
+ - Image URL
395
+ Handle pagination to get all products
396
+ """)
397
+ ```
398
+
399
+ ### 2. News Aggregation
400
+
401
+ ```python
402
+ result = await agent.scrape("""
403
+ From https://news.example.com:
404
+ 1. Get all article headlines
405
+ 2. For each article, extract:
406
+ - Title
407
+ - Author
408
+ - Publication date
409
+ - Summary
410
+ - Category tags
411
+ 3. Handle "Load More" button
412
+ """)
413
+ ```
414
+
415
+ ### 3. Job Board Scraping
416
+
417
+ ```python
418
+ result = await agent.scrape(
419
+ objective="""
420
+ Search for 'Python Developer' jobs
421
+ Extract: job title, company, location, salary range
422
+ Apply filters: Remote only, Full-time
423
+ Get results from all pages
424
+ """,
425
+ url="https://jobs.example.com",
426
+ context={
427
+ "requires_search": True,
428
+ "has_filters": True
429
+ }
430
+ )
431
+ ```
432
+
433
+ ### 4. Social Media Scraping
434
+
435
+ ```python
436
+ # Requires authentication
437
+ result = await agent.scrape(
438
+ objective="Extract my last 10 posts with engagement metrics",
439
+ url="https://social.example.com/profile",
440
+ context={
441
+ "requires_login": True,
442
+ "credentials": {
443
+ "username": "user@example.com",
444
+ "password": os.getenv("PASSWORD")
445
+ }
446
+ }
447
+ )
448
+ ```
449
+
450
+ ### 5. Real Estate Listings
451
+
452
+ ```python
453
+ result = await agent.scrape("""
454
+ From https://realestate.example.com:
455
+ Search for: Apartments in San Francisco, $2000-$3000
456
+ Extract:
457
+ - Address
458
+ - Price
459
+ - Bedrooms/Bathrooms
460
+ - Square footage
461
+ - Photos (URLs)
462
+ - Contact information
463
+ Navigate through all result pages
464
+ """)
465
+ ```
466
+
467
+ ## Advanced Features
468
+
469
+ ### Mobile Scraping
470
+
471
+ ```python
472
+ # Scrape mobile version
473
+ plan = await agent.generate_scraping_plan(
474
+ objective="Extract mobile app features",
475
+ url="https://app-store.example.com"
476
+ )
477
+
478
+ # Enable mobile mode
479
+ plan.browser_config.mobile = True
480
+ plan.browser_config.mobile_device = "iPhone 12"
481
+
482
+ result = await agent.execute_plan(plan)
483
+ ```
484
+
485
+ ### Anti-Bot Bypass
486
+
487
+ ```python
488
+ # Use undetected browser for sites with Cloudflare
489
+ plan.browser_config.browser = "undetected"
490
+ plan.browser_config.headless = False # Often required
491
+ ```
492
+
493
+ ### Authentication
494
+
495
+ ```python
496
+ result = await agent.scrape(
497
+ objective="Extract dashboard data after login",
498
+ url="https://app.example.com",
499
+ context={
500
+ "requires_login": True,
501
+ "login_url": "https://app.example.com/login",
502
+ "credentials": {
503
+ "username": "user@example.com",
504
+ "password": os.getenv("PASSWORD")
505
+ },
506
+ "username_selector": "#email",
507
+ "password_selector": "#password",
508
+ "submit_selector": "button[type='submit']"
509
+ }
510
+ )
511
+ ```
512
+
513
+ ### Pagination Handling
514
+
515
+ ```python
516
+ # Agent automatically detects and handles pagination
517
+ result = await agent.scrape("""
518
+ Extract all products from https://shop.example.com
519
+ Handle pagination - click 'Next' until no more pages
520
+ Extract: name, price, rating for each product
521
+ """)
522
+ ```
523
+
524
+ ### Error Handling and Retry
525
+
526
+ ```python
527
+ plan = await agent.generate_scraping_plan(
528
+ objective="Scrape with retry logic",
529
+ url="https://unstable-site.example.com"
530
+ )
531
+
532
+ # Configure retry behavior
533
+ plan.retry_config = {
534
+ "max_retries": 5,
535
+ "retry_delay": 3,
536
+ "retry_on_failure": True
537
+ }
538
+
539
+ result = await agent.execute_plan(plan)
540
+ ```
541
+
542
+ ## Best Practices
543
+
544
+ ### 1. Be Specific in Objectives
545
+
546
+ ❌ **Bad**: "Get data from the website"
547
+
548
+ ✅ **Good**: "Extract product names, prices, and ratings from all pages of https://shop.example.com/electronics"
549
+
550
+ ### 2. Provide Context
551
+
552
+ ```python
553
+ result = await agent.scrape(
554
+ objective="Extract job listings",
555
+ url="https://jobs.example.com",
556
+ context={
557
+ "page_type": "job board",
558
+ "requires_search": True,
559
+ "search_query": "Python Developer",
560
+ "has_filters": True,
561
+ "pagination_type": "infinite scroll"
562
+ }
563
+ )
564
+ ```
565
+
566
+ ### 3. Review Plans Before Execution
567
+
568
+ ```python
569
+ # Generate plan first
570
+ plan = await agent.generate_scraping_plan(objective, url)
571
+
572
+ # Review
573
+ print(f"Steps: {len(plan.steps)}")
574
+ print(f"Selectors: {len(plan.selectors)}")
575
+ for step in plan.steps:
576
+ print(f"- {step.action}: {step.description}")
577
+
578
+ # Modify if needed
579
+ plan.browser_config.headless = False
580
+
581
+ # Then execute
582
+ result = await agent.execute_plan(plan)
583
+ ```
584
+
585
+ ### 4. Use Appropriate Browser Mode
586
+
587
+ ```python
588
+ # For JavaScript-heavy sites
589
+ config.browser = "chrome"
590
+ config.headless = True
591
+
592
+ # For anti-bot sites
593
+ config.browser = "undetected"
594
+ config.headless = False
595
+
596
+ # For simple static sites
597
+ config.browser = "chrome"
598
+ config.headless = True
599
+ ```
600
+
601
+ ### 5. Handle Rate Limiting
602
+
603
+ ```python
604
+ # Add delays between requests
605
+ for step in plan.steps:
606
+ step.wait_after = 2.0 # Wait 2 seconds after each action
607
+
608
+ # Or in retry config
609
+ plan.retry_config["retry_delay"] = 5 # Wait 5 seconds between retries
610
+ ```
611
+
612
+ ## Troubleshooting
613
+
614
+ ### Issue: Selectors not finding elements
615
+
616
+ **Solution**: Refine the plan with correct selectors
617
+
618
+ ```python
619
+ refined_plan = await agent.refine_plan(
620
+ plan,
621
+ feedback="Selector '.title' not found. The correct selector is '.product-name'"
622
+ )
623
+ ```
624
+
625
+ ### Issue: Page requires JavaScript but not rendering
626
+
627
+ **Solution**: Ensure browser config allows JavaScript
628
+
629
+ ```python
630
+ plan.analysis.requires_javascript = True
631
+ plan.browser_config.driver_type = "selenium" # or "playwright"
632
+ ```
633
+
634
+ ### Issue: Anti-bot detection
635
+
636
+ **Solution**: Use undetected browser mode
637
+
638
+ ```python
639
+ plan.browser_config.browser = "undetected"
640
+ plan.browser_config.headless = False
641
+ ```
642
+
643
+ ### Issue: Slow page loading
644
+
645
+ **Solution**: Increase timeouts
646
+
647
+ ```python
648
+ for step in plan.steps:
649
+ if step.action == "wait":
650
+ step.timeout = 30 # Increase to 30 seconds
651
+ ```
652
+
653
+ ## Performance Considerations
654
+
655
+ ### Parallel Scraping
656
+
657
+ ```python
658
+ import asyncio
659
+
660
+ async def scrape_multiple_urls(urls):
661
+ agent = ScrapingAgent()
662
+ await agent.configure()
663
+
664
+ tasks = [
665
+ agent.scrape(f"Extract data from {url}")
666
+ for url in urls
667
+ ]
668
+
669
+ results = await asyncio.gather(*tasks)
670
+ return results
671
+ ```
672
+
673
+ ### Resource Management
674
+
675
+ ```python
676
+ # Always cleanup
677
+ async def scrape_with_cleanup():
678
+ agent = ScrapingAgent()
679
+ try:
680
+ await agent.configure()
681
+ result = await agent.scrape(objective)
682
+ return result
683
+ finally:
684
+ # Cleanup happens automatically via context manager
685
+ pass
686
+ ```
687
+
688
+ ### Caching Plans
689
+
690
+ ```python
691
+ # Store plans for reuse
692
+ plan = await agent.generate_scraping_plan(objective, url)
693
+
694
+ # Save plan
695
+ with open('scraping_plan.json', 'w') as f:
696
+ f.write(plan.model_dump_json())
697
+
698
+ # Load and reuse later
699
+ with open('scraping_plan.json', 'r') as f:
700
+ plan_data = json.load(f)
701
+ plan = ScrapingPlanSchema(**plan_data)
702
+
703
+ result = await agent.execute_plan(plan)
704
+ ```
705
+
706
+ ## Legal and Ethical Considerations
707
+
708
+ ⚠️ **Important**: Always respect website terms of service and robots.txt
709
+
710
+ - Check robots.txt before scraping
711
+ - Respect rate limits
712
+ - Don't overload servers
713
+ - Don't scrape copyrighted content without permission
714
+ - Include delays between requests
715
+ - Use appropriate user agents
716
+ - Cache results to minimize requests
717
+
718
+ ## Contributing
719
+
720
+ We welcome contributions! Areas for improvement:
721
+
722
+ - Additional browser support (Safari, Edge)
723
+ - More sophisticated anti-detection techniques
724
+ - Enhanced pagination detection
725
+ - Better error recovery strategies
726
+ - Performance optimizations
727
+
728
+ ## License
729
+
730
+ AI-Parrot and ScrapingAgent are open source under the MIT License.
731
+
732
+ ## Support
733
+
734
+ - Documentation: https://ai-parrot.readthedocs.io
735
+ - Issues: https://github.com/your-org/ai-parrot/issues
736
+ - Discord: https://discord.gg/ai-parrot
737
+
738
+ ---
739
+
740
+ Built with ❤️ by the AI-Parrot team