ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
|
@@ -0,0 +1,1278 @@
|
|
|
1
|
+
# WebScrapingTool Documentation
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
WebScrapingTool is an advanced web scraping and browser automation component of AI-Parrot that provides LLM integration support for automated web interactions.
|
|
6
|
+
|
|
7
|
+
### Key Features
|
|
8
|
+
|
|
9
|
+
- **Dual Driver Support**: Works with both Selenium and Playwright
|
|
10
|
+
- **Step-by-step Navigation**: Declarative action-based workflow
|
|
11
|
+
- **Flexible Content Extraction**: Multiple extraction methods
|
|
12
|
+
- **Error Handling**: Built-in retry logic and error management
|
|
13
|
+
- **Mobile Emulation**: Support for mobile device simulation
|
|
14
|
+
- **Authentication**: HTTP and cookie-based authentication
|
|
15
|
+
- **File Operations**: Upload and download monitoring
|
|
16
|
+
|
|
17
|
+
### Driver Support
|
|
18
|
+
|
|
19
|
+
- **Selenium**: Chrome, Firefox, Edge, Safari, Undetected Chrome
|
|
20
|
+
- **Playwright**: Chrome, Firefox, WebKit
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Configuration
|
|
25
|
+
|
|
26
|
+
### Initialization Parameters
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from aiparrot.tools import WebScrapingTool
|
|
30
|
+
|
|
31
|
+
tool = WebScrapingTool(
|
|
32
|
+
browser='chrome', # Browser type: 'chrome', 'firefox', 'edge', 'safari', 'undetected'
|
|
33
|
+
driver_type='selenium', # Driver: 'selenium' or 'playwright'
|
|
34
|
+
headless=True, # Run in headless mode
|
|
35
|
+
mobile=False, # Enable mobile emulation
|
|
36
|
+
mobile_device='iPhone 14 Pro Max', # Specific mobile device
|
|
37
|
+
browser_binary=None, # Custom browser path
|
|
38
|
+
driver_binary=None, # Custom driver path
|
|
39
|
+
auto_install=True, # Auto-install driver
|
|
40
|
+
default_timeout=10, # Default timeout (seconds)
|
|
41
|
+
retry_attempts=3, # Retry attempts for actions
|
|
42
|
+
delay_between_actions=1, # Delay between actions (seconds)
|
|
43
|
+
overlay_housekeeping=True # Automatic overlay handling
|
|
44
|
+
)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Designing JSON Action Flows for LLMs
|
|
48
|
+
|
|
49
|
+
WebScrapingTool expects a list of declarative `steps` where each item is a JSON object. When crafting flows (manually or from an
|
|
50
|
+
LLM), keep the following conventions in mind so complex browser sessions remain readable and deterministic:
|
|
51
|
+
|
|
52
|
+
- **Always include a `description`**: Short sentences make it easy for another model or human to reason about intent without
|
|
53
|
+
re-parsing selectors.
|
|
54
|
+
- **Be explicit about selectors**: Pair every `selector` with `selector_type` (`css`, `xpath`, or `text`) to avoid ambiguity
|
|
55
|
+
across engines.
|
|
56
|
+
- **Spell out waits**: Use `condition_type` to signal what the wait is checking—`selector`, `url_is`, `url_contains`, `title_contains`, or
|
|
57
|
+
`simple` for a timed pause. Add `timeout` to cap how long the tool should wait.
|
|
58
|
+
- **Structure authentication data**: For `authenticate` actions, choose the `method` (`form`, `basic`, `oauth`, `custom`) and
|
|
59
|
+
include explicit selectors (`username_selector`, `password_selector`, `submit_selector`) so credentials can be filled
|
|
60
|
+
predictably.
|
|
61
|
+
- **Surface timing**: Add `delay_between_actions` at tool init and `timeout`/`duration` per step to keep long flows stable in
|
|
62
|
+
headless environments.
|
|
63
|
+
|
|
64
|
+
### Wait condition types
|
|
65
|
+
|
|
66
|
+
| `condition_type` | What it checks | Example payload |
|
|
67
|
+
| --- | --- | --- |
|
|
68
|
+
| `simple` | Fixed pause using `timeout` | `{ "action": "wait", "condition_type": "simple", "timeout": 2 }` |
|
|
69
|
+
| `selector` | Element presence/visibility | `{ "action": "wait", "condition_type": "selector", "condition": "#loading-done" }` |
|
|
70
|
+
| `url_is` | Exact URL match | `{ "action": "wait", "condition_type": "url_is", "condition": "https://app.example.com/home" }` |
|
|
71
|
+
| `url_contains` | URL substring match | `{ "action": "wait", "condition_type": "url_contains", "condition": "dashboard" }` |
|
|
72
|
+
| `title_contains` | Page title substring | `{ "action": "wait", "condition_type": "title_contains", "condition": "Welcome" }` |
|
|
73
|
+
| `custom` | Custom JS returning truthy | `{ "action": "wait", "condition_type": "custom", "custom_script": "return window.ready === true" }` |
|
|
74
|
+
|
|
75
|
+
### End-to-end JSON flow example
|
|
76
|
+
|
|
77
|
+
The following recipe shows how to combine navigation, form authentication, deterministic waits, and element interactions in a
|
|
78
|
+
single `execute` call:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
steps = [
|
|
82
|
+
{
|
|
83
|
+
"action": "navigate",
|
|
84
|
+
"url": "https://manage.dispatch.me/login",
|
|
85
|
+
"description": "Open Dispatch login page",
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"action": "authenticate",
|
|
89
|
+
"method": "form",
|
|
90
|
+
"username_selector": "input[name='email']",
|
|
91
|
+
"username": config.get('DISPATCHME_USERNAME'),
|
|
92
|
+
"enter_on_username": True,
|
|
93
|
+
"password_selector": "input[name='password']",
|
|
94
|
+
"password": config.get('DISPATCHME_PASSWORD'),
|
|
95
|
+
"submit_selector": "button[type='submit']",
|
|
96
|
+
"description": "Fill login form and submit",
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"action": "wait",
|
|
100
|
+
"timeout": 5,
|
|
101
|
+
"condition_type": "url_is",
|
|
102
|
+
"condition": "https://manage.dispatch.me/providers/list",
|
|
103
|
+
"description": "Wait for redirect to providers list",
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"action": "navigate",
|
|
107
|
+
"url": "https://manage.dispatch.me/recruit/out-of-network/list",
|
|
108
|
+
"description": "Open recruiters page",
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"action": "click",
|
|
112
|
+
"selector": "//button[contains(., 'Filtering On')]",
|
|
113
|
+
"selector_type": "xpath",
|
|
114
|
+
"description": "Open Filters button",
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"action": "wait",
|
|
118
|
+
"timeout": 2,
|
|
119
|
+
"condition_type": "simple",
|
|
120
|
+
"description": "Let UI settle",
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
"action": "click",
|
|
124
|
+
"selector": "//button[contains(., 'Filters')]",
|
|
125
|
+
"selector_type": "xpath",
|
|
126
|
+
"description": "Toggle filters again",
|
|
127
|
+
},
|
|
128
|
+
]
|
|
129
|
+
result = await scraper.execute(steps=steps)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Available Actions
|
|
135
|
+
|
|
136
|
+
### Navigation Actions
|
|
137
|
+
|
|
138
|
+
#### 1. Navigate
|
|
139
|
+
|
|
140
|
+
Navigate to a specific URL.
|
|
141
|
+
|
|
142
|
+
**Parameters:**
|
|
143
|
+
- `action`: `"navigate"` (required)
|
|
144
|
+
- `url`: Target URL (required)
|
|
145
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
146
|
+
- `description`: Human-readable description (optional)
|
|
147
|
+
|
|
148
|
+
**Example:**
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
{
|
|
152
|
+
"action": "navigate",
|
|
153
|
+
"url": "https://example.com",
|
|
154
|
+
"timeout": 10,
|
|
155
|
+
"description": "Navigate to example homepage"
|
|
156
|
+
}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
#### 2. Back
|
|
160
|
+
|
|
161
|
+
Navigate back in browser history.
|
|
162
|
+
|
|
163
|
+
**Parameters:**
|
|
164
|
+
- `action`: `"back"` (required)
|
|
165
|
+
- `description`: Human-readable description (optional)
|
|
166
|
+
|
|
167
|
+
**Example:**
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
{
|
|
171
|
+
"action": "back",
|
|
172
|
+
"description": "Go back to previous page"
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
#### 3. Refresh
|
|
177
|
+
|
|
178
|
+
Refresh the current page.
|
|
179
|
+
|
|
180
|
+
**Parameters:**
|
|
181
|
+
- `action`: `"refresh"` (required)
|
|
182
|
+
- `description`: Human-readable description (optional)
|
|
183
|
+
|
|
184
|
+
**Example:**
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
{
|
|
188
|
+
"action": "refresh",
|
|
189
|
+
"description": "Reload the current page"
|
|
190
|
+
}
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
### Interaction Actions
|
|
196
|
+
|
|
197
|
+
#### 4. Click
|
|
198
|
+
|
|
199
|
+
Click on a web page element.
|
|
200
|
+
|
|
201
|
+
**Parameters:**
|
|
202
|
+
- `action`: `"click"` (required)
|
|
203
|
+
- `selector`: CSS/XPath selector (required)
|
|
204
|
+
- `selector_type`: Selector type: `"css"`, `"xpath"`, or `"text"` (default: `"css"`)
|
|
205
|
+
- `click_type`: Click type: `"single"`, `"double"`, or `"right"` (default: `"single"`)
|
|
206
|
+
- `wait_after_click`: CSS selector to wait for after clicking (optional)
|
|
207
|
+
- `wait_timeout`: Timeout for post-click wait in seconds (default: 2)
|
|
208
|
+
- `no_wait`: Skip waiting after click (default: False)
|
|
209
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
210
|
+
- `description`: Human-readable description (optional)
|
|
211
|
+
|
|
212
|
+
**Examples:**
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
# Basic click
|
|
216
|
+
{
|
|
217
|
+
"action": "click",
|
|
218
|
+
"selector": "#submit-button",
|
|
219
|
+
"description": "Click submit button"
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# Click with XPath
|
|
223
|
+
{
|
|
224
|
+
"action": "click",
|
|
225
|
+
"selector": "//button[@type='submit']",
|
|
226
|
+
"selector_type": "xpath",
|
|
227
|
+
"description": "Click submit using XPath"
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
# Click by text content
|
|
231
|
+
{
|
|
232
|
+
"action": "click",
|
|
233
|
+
"selector": "Sign In",
|
|
234
|
+
"selector_type": "text",
|
|
235
|
+
"description": "Click sign in link"
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# Double click
|
|
239
|
+
{
|
|
240
|
+
"action": "click",
|
|
241
|
+
"selector": ".item",
|
|
242
|
+
"click_type": "double",
|
|
243
|
+
"description": "Double click item"
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
# Wait for element after clicking
|
|
247
|
+
{
|
|
248
|
+
"action": "click",
|
|
249
|
+
"selector": "#load-more",
|
|
250
|
+
"wait_after_click": ".new-content",
|
|
251
|
+
"wait_timeout": 5,
|
|
252
|
+
"description": "Click load more and wait for content"
|
|
253
|
+
}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
#### 5. Fill
|
|
257
|
+
|
|
258
|
+
Fill text into an input field.
|
|
259
|
+
|
|
260
|
+
**Parameters:**
|
|
261
|
+
- `action`: `"fill"` (required)
|
|
262
|
+
- `selector`: CSS selector for input field (required)
|
|
263
|
+
- `value`: Text to enter (required)
|
|
264
|
+
- `clear_first`: Clear existing content before filling (default: True)
|
|
265
|
+
- `press_enter`: Press Enter after filling (default: False)
|
|
266
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
267
|
+
- `description`: Human-readable description (optional)
|
|
268
|
+
|
|
269
|
+
**Examples:**
|
|
270
|
+
|
|
271
|
+
```python
|
|
272
|
+
# Basic fill
|
|
273
|
+
{
|
|
274
|
+
"action": "fill",
|
|
275
|
+
"selector": "#username",
|
|
276
|
+
"value": "user@example.com",
|
|
277
|
+
"description": "Enter username"
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
# Fill and submit with Enter
|
|
281
|
+
{
|
|
282
|
+
"action": "fill",
|
|
283
|
+
"selector": "#search-box",
|
|
284
|
+
"value": "Python tutorials",
|
|
285
|
+
"press_enter": True,
|
|
286
|
+
"description": "Search for Python tutorials"
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
# Fill without clearing
|
|
290
|
+
{
|
|
291
|
+
"action": "fill",
|
|
292
|
+
"selector": "#notes",
|
|
293
|
+
"value": " - Additional note",
|
|
294
|
+
"clear_first": False,
|
|
295
|
+
"description": "Append to notes"
|
|
296
|
+
}
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
#### 6. PressKey
|
|
300
|
+
|
|
301
|
+
Press keyboard keys.
|
|
302
|
+
|
|
303
|
+
**Parameters:**
|
|
304
|
+
- `action`: `"press_key"` (required)
|
|
305
|
+
- `key`: Key to press: `"enter"`, `"tab"`, `"escape"`, `"space"`, `"arrow_up"`, `"arrow_down"`, `"arrow_left"`, `"arrow_right"` (required)
|
|
306
|
+
- `selector`: CSS selector to focus before pressing (optional)
|
|
307
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
308
|
+
- `description`: Human-readable description (optional)
|
|
309
|
+
|
|
310
|
+
**Examples:**
|
|
311
|
+
|
|
312
|
+
```python
|
|
313
|
+
# Press Enter
|
|
314
|
+
{
|
|
315
|
+
"action": "press_key",
|
|
316
|
+
"key": "enter",
|
|
317
|
+
"description": "Submit form with Enter"
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
# Press Tab on specific element
|
|
321
|
+
{
|
|
322
|
+
"action": "press_key",
|
|
323
|
+
"key": "tab",
|
|
324
|
+
"selector": "#first-field",
|
|
325
|
+
"description": "Tab to next field"
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
# Press Escape
|
|
329
|
+
{
|
|
330
|
+
"action": "press_key",
|
|
331
|
+
"key": "escape",
|
|
332
|
+
"description": "Close modal"
|
|
333
|
+
}
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
#### 7. Scroll
|
|
337
|
+
|
|
338
|
+
Scroll the page or a specific element.
|
|
339
|
+
|
|
340
|
+
**Parameters:**
|
|
341
|
+
- `action`: `"scroll"` (required)
|
|
342
|
+
- `direction`: Scroll direction: `"up"`, `"down"`, `"top"`, `"bottom"` (default: `"down"`)
|
|
343
|
+
- `amount`: Scroll amount in pixels (default: 500)
|
|
344
|
+
- `selector`: CSS selector of element to scroll (optional, scrolls page if not provided)
|
|
345
|
+
- `smooth`: Use smooth scrolling (default: False)
|
|
346
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
347
|
+
- `description`: Human-readable description (optional)
|
|
348
|
+
|
|
349
|
+
**Examples:**
|
|
350
|
+
|
|
351
|
+
```python
|
|
352
|
+
# Scroll down page
|
|
353
|
+
{
|
|
354
|
+
"action": "scroll",
|
|
355
|
+
"direction": "down",
|
|
356
|
+
"amount": 1000,
|
|
357
|
+
"description": "Scroll down 1000px"
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
# Scroll to bottom
|
|
361
|
+
{
|
|
362
|
+
"action": "scroll",
|
|
363
|
+
"direction": "bottom",
|
|
364
|
+
"description": "Scroll to page bottom"
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
# Scroll specific element
|
|
368
|
+
{
|
|
369
|
+
"action": "scroll",
|
|
370
|
+
"selector": "#scrollable-div",
|
|
371
|
+
"direction": "down",
|
|
372
|
+
"amount": 300,
|
|
373
|
+
"description": "Scroll container"
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
# Smooth scroll
|
|
377
|
+
{
|
|
378
|
+
"action": "scroll",
|
|
379
|
+
"direction": "top",
|
|
380
|
+
"smooth": True,
|
|
381
|
+
"description": "Smooth scroll to top"
|
|
382
|
+
}
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
---
|
|
386
|
+
|
|
387
|
+
### Data Extraction Actions
|
|
388
|
+
|
|
389
|
+
#### 8. GetText
|
|
390
|
+
|
|
391
|
+
Extract text content from elements.
|
|
392
|
+
|
|
393
|
+
**Parameters:**
|
|
394
|
+
- `action`: `"get_text"` (required)
|
|
395
|
+
- `selector`: CSS selector for elements (required)
|
|
396
|
+
- `extract_type`: Extraction type: `"text"`, `"attribute"`, `"html"` (default: `"text"`)
|
|
397
|
+
- `attribute`: Attribute name if extract_type is `"attribute"` (optional)
|
|
398
|
+
- `multiple`: Extract from multiple elements (default: False)
|
|
399
|
+
- `wait_for`: Wait for element to appear (default: True)
|
|
400
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
401
|
+
- `description`: Human-readable description (optional)
|
|
402
|
+
|
|
403
|
+
**Examples:**
|
|
404
|
+
|
|
405
|
+
```python
|
|
406
|
+
# Extract text from single element
|
|
407
|
+
{
|
|
408
|
+
"action": "get_text",
|
|
409
|
+
"selector": "h1.title",
|
|
410
|
+
"description": "Get page title"
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
# Extract from multiple elements
|
|
414
|
+
{
|
|
415
|
+
"action": "get_text",
|
|
416
|
+
"selector": ".product-name",
|
|
417
|
+
"multiple": True,
|
|
418
|
+
"description": "Get all product names"
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
# Extract attribute
|
|
422
|
+
{
|
|
423
|
+
"action": "get_text",
|
|
424
|
+
"selector": "a.download-link",
|
|
425
|
+
"extract_type": "attribute",
|
|
426
|
+
"attribute": "href",
|
|
427
|
+
"description": "Get download URL"
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
# Extract HTML content
|
|
431
|
+
{
|
|
432
|
+
"action": "get_text",
|
|
433
|
+
"selector": ".article-content",
|
|
434
|
+
"extract_type": "html",
|
|
435
|
+
"description": "Get article HTML"
|
|
436
|
+
}
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
#### 9. GetHTML
|
|
440
|
+
|
|
441
|
+
Get the complete page HTML.
|
|
442
|
+
|
|
443
|
+
**Parameters:**
|
|
444
|
+
- `action`: `"get_html"` (required)
|
|
445
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
446
|
+
- `description`: Human-readable description (optional)
|
|
447
|
+
|
|
448
|
+
**Example:**
|
|
449
|
+
|
|
450
|
+
```python
|
|
451
|
+
{
|
|
452
|
+
"action": "get_html",
|
|
453
|
+
"description": "Get full page HTML"
|
|
454
|
+
}
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
---
|
|
458
|
+
|
|
459
|
+
### Cookie Management
|
|
460
|
+
|
|
461
|
+
#### 10. GetCookies
|
|
462
|
+
|
|
463
|
+
Retrieve browser cookies.
|
|
464
|
+
|
|
465
|
+
**Parameters:**
|
|
466
|
+
- `action`: `"get_cookies"` (required)
|
|
467
|
+
- `domain`: Filter cookies by domain (optional)
|
|
468
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
469
|
+
- `description`: Human-readable description (optional)
|
|
470
|
+
|
|
471
|
+
**Examples:**
|
|
472
|
+
|
|
473
|
+
```python
|
|
474
|
+
# Get all cookies
|
|
475
|
+
{
|
|
476
|
+
"action": "get_cookies",
|
|
477
|
+
"description": "Get all cookies"
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
# Get cookies for specific domain
|
|
481
|
+
{
|
|
482
|
+
"action": "get_cookies",
|
|
483
|
+
"domain": "example.com",
|
|
484
|
+
"description": "Get example.com cookies"
|
|
485
|
+
}
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
#### 11. SetCookies
|
|
489
|
+
|
|
490
|
+
Set browser cookies.
|
|
491
|
+
|
|
492
|
+
**Parameters:**
|
|
493
|
+
- `action`: `"set_cookies"` (required)
|
|
494
|
+
- `cookies`: List of cookie dictionaries (required)
|
|
495
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
496
|
+
- `description`: Human-readable description (optional)
|
|
497
|
+
|
|
498
|
+
**Example:**
|
|
499
|
+
|
|
500
|
+
```python
|
|
501
|
+
{
|
|
502
|
+
"action": "set_cookies",
|
|
503
|
+
"cookies": [
|
|
504
|
+
{
|
|
505
|
+
"name": "session_id",
|
|
506
|
+
"value": "abc123",
|
|
507
|
+
"domain": "example.com",
|
|
508
|
+
"path": "/",
|
|
509
|
+
"secure": True
|
|
510
|
+
},
|
|
511
|
+
{
|
|
512
|
+
"name": "user_pref",
|
|
513
|
+
"value": "dark_mode",
|
|
514
|
+
"domain": "example.com"
|
|
515
|
+
}
|
|
516
|
+
],
|
|
517
|
+
"description": "Set authentication cookies"
|
|
518
|
+
}
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
---
|
|
522
|
+
|
|
523
|
+
### Authentication
|
|
524
|
+
|
|
525
|
+
#### 12. Authenticate
|
|
526
|
+
|
|
527
|
+
Perform HTTP authentication.
|
|
528
|
+
|
|
529
|
+
**Parameters:**
|
|
530
|
+
- `action`: `"authenticate"` (required)
|
|
531
|
+
- `username`: Username for authentication (required)
|
|
532
|
+
- `password`: Password for authentication (required)
|
|
533
|
+
- `auth_type`: Authentication type: `"basic"`, `"digest"`, `"ntlm"` (default: `"basic"`)
|
|
534
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
535
|
+
- `description`: Human-readable description (optional)
|
|
536
|
+
|
|
537
|
+
**Example:**
|
|
538
|
+
|
|
539
|
+
```python
|
|
540
|
+
{
|
|
541
|
+
"action": "authenticate",
|
|
542
|
+
"username": "admin",
|
|
543
|
+
"password": "secret123",
|
|
544
|
+
"auth_type": "basic",
|
|
545
|
+
"description": "HTTP Basic Auth"
|
|
546
|
+
}
|
|
547
|
+
```
|
|
548
|
+
|
|
549
|
+
---
|
|
550
|
+
|
|
551
|
+
### Waiting Actions
|
|
552
|
+
|
|
553
|
+
#### 13. Wait
|
|
554
|
+
|
|
555
|
+
Wait for a specified duration or condition.
|
|
556
|
+
|
|
557
|
+
**Parameters:**
|
|
558
|
+
- `action`: `"wait"` (required)
|
|
559
|
+
- `duration`: Wait duration in seconds (optional)
|
|
560
|
+
- `condition`: Wait condition: `"element"`, `"element_visible"`, `"element_hidden"`, `"url_contains"`, `"title_contains"` (optional)
|
|
561
|
+
- `selector`: CSS selector for element conditions (optional)
|
|
562
|
+
- `value`: Value for URL/title conditions (optional)
|
|
563
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
564
|
+
- `description`: Human-readable description (optional)
|
|
565
|
+
|
|
566
|
+
**Examples:**
|
|
567
|
+
|
|
568
|
+
```python
|
|
569
|
+
# Wait for duration
|
|
570
|
+
{
|
|
571
|
+
"action": "wait",
|
|
572
|
+
"duration": 3,
|
|
573
|
+
"description": "Wait 3 seconds"
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
# Wait for element
|
|
577
|
+
{
|
|
578
|
+
"action": "wait",
|
|
579
|
+
"condition": "element",
|
|
580
|
+
"selector": ".loading-complete",
|
|
581
|
+
"timeout": 10,
|
|
582
|
+
"description": "Wait for loading to complete"
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
# Wait for element to be visible
|
|
586
|
+
{
|
|
587
|
+
"action": "wait",
|
|
588
|
+
"condition": "element_visible",
|
|
589
|
+
"selector": "#content",
|
|
590
|
+
"timeout": 5,
|
|
591
|
+
"description": "Wait for content to appear"
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
# Wait for URL change
|
|
595
|
+
{
|
|
596
|
+
"action": "wait",
|
|
597
|
+
"condition": "url_contains",
|
|
598
|
+
"value": "dashboard",
|
|
599
|
+
"timeout": 10,
|
|
600
|
+
"description": "Wait for redirect to dashboard"
|
|
601
|
+
}
|
|
602
|
+
```
|
|
603
|
+
|
|
604
|
+
#### 14. AwaitHuman
|
|
605
|
+
|
|
606
|
+
Pause for human intervention with optional message display.
|
|
607
|
+
|
|
608
|
+
**Parameters:**
|
|
609
|
+
- `action`: `"await_human"` (required)
|
|
610
|
+
- `message`: Message to display (optional)
|
|
611
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
612
|
+
- `description`: Human-readable description (optional)
|
|
613
|
+
|
|
614
|
+
**Example:**
|
|
615
|
+
|
|
616
|
+
```python
|
|
617
|
+
{
|
|
618
|
+
"action": "await_human",
|
|
619
|
+
"message": "Please complete the CAPTCHA and press Enter to continue",
|
|
620
|
+
"timeout": 300,
|
|
621
|
+
"description": "Wait for CAPTCHA resolution"
|
|
622
|
+
}
|
|
623
|
+
```
|
|
624
|
+
|
|
625
|
+
#### 15. AwaitKeyPress
|
|
626
|
+
|
|
627
|
+
Wait for a specific keyboard input.
|
|
628
|
+
|
|
629
|
+
**Parameters:**
|
|
630
|
+
- `action`: `"await_keypress"` (required)
|
|
631
|
+
- `expected_key`: Key to wait for (default: `"enter"`)
|
|
632
|
+
- `message`: Message to display (optional)
|
|
633
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
634
|
+
- `description`: Human-readable description (optional)
|
|
635
|
+
|
|
636
|
+
**Example:**
|
|
637
|
+
|
|
638
|
+
```python
|
|
639
|
+
{
|
|
640
|
+
"action": "await_keypress",
|
|
641
|
+
"expected_key": "enter",
|
|
642
|
+
"message": "Review the data and press Enter to continue",
|
|
643
|
+
"description": "Wait for user confirmation"
|
|
644
|
+
}
|
|
645
|
+
```
|
|
646
|
+
|
|
647
|
+
#### 16. AwaitBrowserEvent
|
|
648
|
+
|
|
649
|
+
Wait for specific browser events.
|
|
650
|
+
|
|
651
|
+
**Parameters:**
|
|
652
|
+
- `action`: `"await_browser_event"` (required)
|
|
653
|
+
- `event_type`: Event type: `"load"`, `"domcontentloaded"`, `"networkidle"`, `"popup"`, `"dialog"` (required)
|
|
654
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
655
|
+
- `description`: Human-readable description (optional)
|
|
656
|
+
|
|
657
|
+
**Examples:**
|
|
658
|
+
|
|
659
|
+
```python
|
|
660
|
+
# Wait for page load
|
|
661
|
+
{
|
|
662
|
+
"action": "await_browser_event",
|
|
663
|
+
"event_type": "load",
|
|
664
|
+
"timeout": 30,
|
|
665
|
+
"description": "Wait for page fully loaded"
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
# Wait for network idle
|
|
669
|
+
{
|
|
670
|
+
"action": "await_browser_event",
|
|
671
|
+
"event_type": "networkidle",
|
|
672
|
+
"timeout": 10,
|
|
673
|
+
"description": "Wait for AJAX requests to complete"
|
|
674
|
+
}
|
|
675
|
+
```
|
|
676
|
+
|
|
677
|
+
---
|
|
678
|
+
|
|
679
|
+
### File Operations
|
|
680
|
+
|
|
681
|
+
#### 17. Screenshot
|
|
682
|
+
|
|
683
|
+
Capture a screenshot.
|
|
684
|
+
|
|
685
|
+
**Parameters:**
|
|
686
|
+
- `action`: `"screenshot"` (required)
|
|
687
|
+
- `filepath`: Path to save screenshot (required)
|
|
688
|
+
- `full_page`: Capture full page (default: False)
|
|
689
|
+
- `selector`: CSS selector to capture specific element (optional)
|
|
690
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
691
|
+
- `description`: Human-readable description (optional)
|
|
692
|
+
|
|
693
|
+
**Examples:**
|
|
694
|
+
|
|
695
|
+
```python
|
|
696
|
+
# Screenshot viewport
|
|
697
|
+
{
|
|
698
|
+
"action": "screenshot",
|
|
699
|
+
"filepath": "./screenshots/page.png",
|
|
700
|
+
"description": "Capture viewport"
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
# Full page screenshot
|
|
704
|
+
{
|
|
705
|
+
"action": "screenshot",
|
|
706
|
+
"filepath": "./screenshots/full_page.png",
|
|
707
|
+
"full_page": True,
|
|
708
|
+
"description": "Capture full page"
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
# Element screenshot
|
|
712
|
+
{
|
|
713
|
+
"action": "screenshot",
|
|
714
|
+
"filepath": "./screenshots/element.png",
|
|
715
|
+
"selector": "#main-content",
|
|
716
|
+
"description": "Capture specific element"
|
|
717
|
+
}
|
|
718
|
+
```
|
|
719
|
+
|
|
720
|
+
#### 18. UploadFile
|
|
721
|
+
|
|
722
|
+
Upload a file to an input field.
|
|
723
|
+
|
|
724
|
+
**Parameters:**
|
|
725
|
+
- `action`: `"upload_file"` (required)
|
|
726
|
+
- `selector`: CSS selector for file input (required)
|
|
727
|
+
- `filepath`: Path to file to upload (required)
|
|
728
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
729
|
+
- `description`: Human-readable description (optional)
|
|
730
|
+
|
|
731
|
+
**Example:**
|
|
732
|
+
|
|
733
|
+
```python
|
|
734
|
+
{
|
|
735
|
+
"action": "upload_file",
|
|
736
|
+
"selector": "input[type='file']",
|
|
737
|
+
"filepath": "./documents/resume.pdf",
|
|
738
|
+
"description": "Upload resume"
|
|
739
|
+
}
|
|
740
|
+
```
|
|
741
|
+
|
|
742
|
+
#### 19. WaitForDownload
|
|
743
|
+
|
|
744
|
+
Monitor and wait for file download completion.
|
|
745
|
+
|
|
746
|
+
**Parameters:**
|
|
747
|
+
- `action`: `"wait_for_download"` (required)
|
|
748
|
+
- `download_dir`: Directory to monitor for downloads (required)
|
|
749
|
+
- `expected_filename`: Expected filename pattern (optional)
|
|
750
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
751
|
+
- `description`: Human-readable description (optional)
|
|
752
|
+
|
|
753
|
+
**Example:**
|
|
754
|
+
|
|
755
|
+
```python
|
|
756
|
+
{
|
|
757
|
+
"action": "wait_for_download",
|
|
758
|
+
"download_dir": "./downloads",
|
|
759
|
+
"expected_filename": "report_*.pdf",
|
|
760
|
+
"timeout": 60,
|
|
761
|
+
"description": "Wait for report download"
|
|
762
|
+
}
|
|
763
|
+
```
|
|
764
|
+
|
|
765
|
+
---
|
|
766
|
+
|
|
767
|
+
### Advanced Actions
|
|
768
|
+
|
|
769
|
+
#### 20. Evaluate
|
|
770
|
+
|
|
771
|
+
Execute JavaScript code in the browser context.
|
|
772
|
+
|
|
773
|
+
**Parameters:**
|
|
774
|
+
- `action`: `"evaluate"` (required)
|
|
775
|
+
- `script`: JavaScript code to execute (required)
|
|
776
|
+
- `args`: Arguments to pass to the script (optional)
|
|
777
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
778
|
+
- `description`: Human-readable description (optional)
|
|
779
|
+
|
|
780
|
+
**Examples:**
|
|
781
|
+
|
|
782
|
+
```python
|
|
783
|
+
# Simple evaluation
|
|
784
|
+
{
|
|
785
|
+
"action": "evaluate",
|
|
786
|
+
"script": "return document.title;",
|
|
787
|
+
"description": "Get page title via JS"
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
# Complex evaluation with arguments
|
|
791
|
+
{
|
|
792
|
+
"action": "evaluate",
|
|
793
|
+
"script": "return arguments[0] + arguments[1];",
|
|
794
|
+
"args": [5, 10],
|
|
795
|
+
"description": "Calculate sum"
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
# Modify page content
|
|
799
|
+
{
|
|
800
|
+
"action": "evaluate",
|
|
801
|
+
"script": "document.querySelector('#banner').style.display = 'none';",
|
|
802
|
+
"description": "Hide banner"
|
|
803
|
+
}
|
|
804
|
+
```
|
|
805
|
+
|
|
806
|
+
#### 21. Loop
|
|
807
|
+
|
|
808
|
+
Execute a sequence of actions repeatedly.
|
|
809
|
+
|
|
810
|
+
**Parameters:**
|
|
811
|
+
- `action`: `"loop"` (required)
|
|
812
|
+
- `actions`: List of actions to execute in loop (required)
|
|
813
|
+
- `iterations`: Number of iterations (optional, use with values or condition)
|
|
814
|
+
- `condition`: JavaScript condition to continue looping (optional)
|
|
815
|
+
- `values`: List of values to iterate over (optional)
|
|
816
|
+
- `value_name`: Variable name for current value (default: `"value"`)
|
|
817
|
+
- `break_on_error`: Stop loop on error (default: True)
|
|
818
|
+
- `max_iterations`: Safety limit (default: 100)
|
|
819
|
+
- `start_index`: Starting index (default: 0)
|
|
820
|
+
- `do_replace`: Replace `{{index}}` and `{{index_1}}` in parameters (default: True)
|
|
821
|
+
- `timeout`: Maximum wait time in seconds (optional)
|
|
822
|
+
- `description`: Human-readable description (optional)
|
|
823
|
+
|
|
824
|
+
**Examples:**
|
|
825
|
+
|
|
826
|
+
```python
|
|
827
|
+
# Fixed iteration loop
|
|
828
|
+
{
|
|
829
|
+
"action": "loop",
|
|
830
|
+
"iterations": 5,
|
|
831
|
+
"actions": [
|
|
832
|
+
{
|
|
833
|
+
"action": "click",
|
|
834
|
+
"selector": ".load-more"
|
|
835
|
+
},
|
|
836
|
+
{
|
|
837
|
+
"action": "wait",
|
|
838
|
+
"duration": 2
|
|
839
|
+
}
|
|
840
|
+
],
|
|
841
|
+
"description": "Load more content 5 times"
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
# Condition-based loop
|
|
845
|
+
{
|
|
846
|
+
"action": "loop",
|
|
847
|
+
"condition": "document.querySelector('.next-page') !== null",
|
|
848
|
+
"actions": [
|
|
849
|
+
{
|
|
850
|
+
"action": "click",
|
|
851
|
+
"selector": ".next-page"
|
|
852
|
+
},
|
|
853
|
+
{
|
|
854
|
+
"action": "wait",
|
|
855
|
+
"condition": "element",
|
|
856
|
+
"selector": ".content-loaded"
|
|
857
|
+
}
|
|
858
|
+
],
|
|
859
|
+
"max_iterations": 20,
|
|
860
|
+
"description": "Navigate through all pages"
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
# Value iteration with index replacement
|
|
864
|
+
{
|
|
865
|
+
"action": "loop",
|
|
866
|
+
"values": ["apple", "banana", "cherry"],
|
|
867
|
+
"value_name": "fruit",
|
|
868
|
+
"actions": [
|
|
869
|
+
{
|
|
870
|
+
"action": "fill",
|
|
871
|
+
"selector": "#search-{{index}}",
|
|
872
|
+
"value": "{{fruit}}"
|
|
873
|
+
},
|
|
874
|
+
{
|
|
875
|
+
"action": "click",
|
|
876
|
+
"selector": "#submit-{{index_1}}"
|
|
877
|
+
}
|
|
878
|
+
],
|
|
879
|
+
"description": "Search for each fruit"
|
|
880
|
+
}
|
|
881
|
+
```
|
|
882
|
+
|
|
883
|
+
---
|
|
884
|
+
|
|
885
|
+
## Complete Usage Examples
|
|
886
|
+
|
|
887
|
+
### Example 1: Basic Login Flow
|
|
888
|
+
|
|
889
|
+
```python
|
|
890
|
+
from aiparrot.tools import WebScrapingTool
|
|
891
|
+
|
|
892
|
+
tool = WebScrapingTool(browser='chrome', headless=False)
|
|
893
|
+
|
|
894
|
+
steps = [
|
|
895
|
+
{
|
|
896
|
+
"action": "navigate",
|
|
897
|
+
"url": "https://example.com/login",
|
|
898
|
+
"description": "Go to login page"
|
|
899
|
+
},
|
|
900
|
+
{
|
|
901
|
+
"action": "fill",
|
|
902
|
+
"selector": "#username",
|
|
903
|
+
"value": "user@example.com",
|
|
904
|
+
"description": "Enter username"
|
|
905
|
+
},
|
|
906
|
+
{
|
|
907
|
+
"action": "fill",
|
|
908
|
+
"selector": "#password",
|
|
909
|
+
"value": "secret123",
|
|
910
|
+
"description": "Enter password"
|
|
911
|
+
},
|
|
912
|
+
{
|
|
913
|
+
"action": "click",
|
|
914
|
+
"selector": "#login-button",
|
|
915
|
+
"wait_after_click": ".dashboard",
|
|
916
|
+
"description": "Click login and wait for dashboard"
|
|
917
|
+
}
|
|
918
|
+
]
|
|
919
|
+
|
|
920
|
+
result = await tool.execute(steps=steps)
|
|
921
|
+
```
|
|
922
|
+
|
|
923
|
+
### Example 2: E-commerce Product Scraping
|
|
924
|
+
|
|
925
|
+
```python
|
|
926
|
+
steps = [
|
|
927
|
+
{
|
|
928
|
+
"action": "navigate",
|
|
929
|
+
"url": "https://shop.example.com/products",
|
|
930
|
+
"description": "Navigate to products page"
|
|
931
|
+
},
|
|
932
|
+
{
|
|
933
|
+
"action": "wait",
|
|
934
|
+
"condition": "element",
|
|
935
|
+
"selector": ".product-list",
|
|
936
|
+
"description": "Wait for products to load"
|
|
937
|
+
},
|
|
938
|
+
{
|
|
939
|
+
"action": "loop",
|
|
940
|
+
"iterations": 3,
|
|
941
|
+
"actions": [
|
|
942
|
+
{
|
|
943
|
+
"action": "scroll",
|
|
944
|
+
"direction": "down",
|
|
945
|
+
"amount": 1000
|
|
946
|
+
},
|
|
947
|
+
{
|
|
948
|
+
"action": "wait",
|
|
949
|
+
"duration": 2
|
|
950
|
+
}
|
|
951
|
+
],
|
|
952
|
+
"description": "Scroll to load more products"
|
|
953
|
+
},
|
|
954
|
+
{
|
|
955
|
+
"action": "get_text",
|
|
956
|
+
"selector": ".product-name",
|
|
957
|
+
"multiple": True,
|
|
958
|
+
"description": "Extract all product names"
|
|
959
|
+
},
|
|
960
|
+
{
|
|
961
|
+
"action": "get_text",
|
|
962
|
+
"selector": ".product-price",
|
|
963
|
+
"multiple": True,
|
|
964
|
+
"description": "Extract all prices"
|
|
965
|
+
}
|
|
966
|
+
]
|
|
967
|
+
|
|
968
|
+
selectors = [
|
|
969
|
+
{
|
|
970
|
+
"name": "products",
|
|
971
|
+
"selector": ".product-card",
|
|
972
|
+
"extract_type": "html",
|
|
973
|
+
"multiple": True
|
|
974
|
+
}
|
|
975
|
+
]
|
|
976
|
+
|
|
977
|
+
result = await tool.execute(steps=steps, selectors=selectors)
|
|
978
|
+
```
|
|
979
|
+
|
|
980
|
+
### Example 3: Form Automation with File Upload
|
|
981
|
+
|
|
982
|
+
```python
|
|
983
|
+
steps = [
|
|
984
|
+
{
|
|
985
|
+
"action": "navigate",
|
|
986
|
+
"url": "https://example.com/application",
|
|
987
|
+
"description": "Go to application form"
|
|
988
|
+
},
|
|
989
|
+
{
|
|
990
|
+
"action": "fill",
|
|
991
|
+
"selector": "#name",
|
|
992
|
+
"value": "John Doe",
|
|
993
|
+
"description": "Enter name"
|
|
994
|
+
},
|
|
995
|
+
{
|
|
996
|
+
"action": "fill",
|
|
997
|
+
"selector": "#email",
|
|
998
|
+
"value": "john@example.com",
|
|
999
|
+
"description": "Enter email"
|
|
1000
|
+
},
|
|
1001
|
+
{
|
|
1002
|
+
"action": "upload_file",
|
|
1003
|
+
"selector": "input[type='file']",
|
|
1004
|
+
"filepath": "./documents/cv.pdf",
|
|
1005
|
+
"description": "Upload CV"
|
|
1006
|
+
},
|
|
1007
|
+
{
|
|
1008
|
+
"action": "click",
|
|
1009
|
+
"selector": "#submit",
|
|
1010
|
+
"description": "Submit form"
|
|
1011
|
+
},
|
|
1012
|
+
{
|
|
1013
|
+
"action": "wait",
|
|
1014
|
+
"condition": "element",
|
|
1015
|
+
"selector": ".success-message",
|
|
1016
|
+
"timeout": 10,
|
|
1017
|
+
"description": "Wait for confirmation"
|
|
1018
|
+
},
|
|
1019
|
+
{
|
|
1020
|
+
"action": "screenshot",
|
|
1021
|
+
"filepath": "./confirmation.png",
|
|
1022
|
+
"description": "Capture confirmation"
|
|
1023
|
+
}
|
|
1024
|
+
]
|
|
1025
|
+
|
|
1026
|
+
result = await tool.execute(steps=steps)
|
|
1027
|
+
```
|
|
1028
|
+
|
|
1029
|
+
### Example 4: Data Extraction with Pagination
|
|
1030
|
+
|
|
1031
|
+
```python
|
|
1032
|
+
steps = [
|
|
1033
|
+
{
|
|
1034
|
+
"action": "navigate",
|
|
1035
|
+
"url": "https://example.com/data",
|
|
1036
|
+
"description": "Navigate to data page"
|
|
1037
|
+
},
|
|
1038
|
+
{
|
|
1039
|
+
"action": "loop",
|
|
1040
|
+
"condition": "document.querySelector('.next-page:not(.disabled)') !== null",
|
|
1041
|
+
"max_iterations": 50,
|
|
1042
|
+
"actions": [
|
|
1043
|
+
{
|
|
1044
|
+
"action": "get_text",
|
|
1045
|
+
"selector": "table tr",
|
|
1046
|
+
"multiple": True,
|
|
1047
|
+
"extract_type": "html",
|
|
1048
|
+
"description": "Extract table rows"
|
|
1049
|
+
},
|
|
1050
|
+
{
|
|
1051
|
+
"action": "click",
|
|
1052
|
+
"selector": ".next-page",
|
|
1053
|
+
"wait_after_click": "table",
|
|
1054
|
+
"description": "Go to next page"
|
|
1055
|
+
},
|
|
1056
|
+
{
|
|
1057
|
+
"action": "wait",
|
|
1058
|
+
"duration": 1
|
|
1059
|
+
}
|
|
1060
|
+
],
|
|
1061
|
+
"description": "Extract data from all pages"
|
|
1062
|
+
}
|
|
1063
|
+
]
|
|
1064
|
+
|
|
1065
|
+
result = await tool.execute(steps=steps)
|
|
1066
|
+
```
|
|
1067
|
+
|
|
1068
|
+
### Example 5: Authenticated Session with Cookies
|
|
1069
|
+
|
|
1070
|
+
```python
|
|
1071
|
+
steps = [
|
|
1072
|
+
{
|
|
1073
|
+
"action": "navigate",
|
|
1074
|
+
"url": "https://example.com",
|
|
1075
|
+
"description": "Navigate to site"
|
|
1076
|
+
},
|
|
1077
|
+
{
|
|
1078
|
+
"action": "set_cookies",
|
|
1079
|
+
"cookies": [
|
|
1080
|
+
{
|
|
1081
|
+
"name": "auth_token",
|
|
1082
|
+
"value": "your_token_here",
|
|
1083
|
+
"domain": "example.com",
|
|
1084
|
+
"secure": True
|
|
1085
|
+
}
|
|
1086
|
+
],
|
|
1087
|
+
"description": "Set authentication cookies"
|
|
1088
|
+
},
|
|
1089
|
+
{
|
|
1090
|
+
"action": "refresh",
|
|
1091
|
+
"description": "Reload with authentication"
|
|
1092
|
+
},
|
|
1093
|
+
{
|
|
1094
|
+
"action": "wait",
|
|
1095
|
+
"condition": "element",
|
|
1096
|
+
"selector": ".user-profile",
|
|
1097
|
+
"description": "Wait for authenticated content"
|
|
1098
|
+
},
|
|
1099
|
+
{
|
|
1100
|
+
"action": "get_text",
|
|
1101
|
+
"selector": ".user-name",
|
|
1102
|
+
"description": "Get username"
|
|
1103
|
+
}
|
|
1104
|
+
]
|
|
1105
|
+
|
|
1106
|
+
result = await tool.execute(steps=steps)
|
|
1107
|
+
```
|
|
1108
|
+
|
|
1109
|
+
---
|
|
1110
|
+
|
|
1111
|
+
## Best Practices
|
|
1112
|
+
|
|
1113
|
+
### 1. Always Use Descriptions
|
|
1114
|
+
Add clear descriptions to each action for better debugging and logging:
|
|
1115
|
+
|
|
1116
|
+
```python
|
|
1117
|
+
{
|
|
1118
|
+
"action": "click",
|
|
1119
|
+
"selector": "#submit",
|
|
1120
|
+
"description": "Submit the registration form" # Good practice
|
|
1121
|
+
}
|
|
1122
|
+
```
|
|
1123
|
+
|
|
1124
|
+
### 2. Use Appropriate Waits
|
|
1125
|
+
Don't rely solely on fixed delays. Use conditional waits:
|
|
1126
|
+
|
|
1127
|
+
```python
|
|
1128
|
+
# Good
|
|
1129
|
+
{
|
|
1130
|
+
"action": "wait",
|
|
1131
|
+
"condition": "element_visible",
|
|
1132
|
+
"selector": ".results"
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
# Avoid when possible
|
|
1136
|
+
{
|
|
1137
|
+
"action": "wait",
|
|
1138
|
+
"duration": 5 # Fixed delays are fragile
|
|
1139
|
+
}
|
|
1140
|
+
```
|
|
1141
|
+
|
|
1142
|
+
### 3. Handle Dynamic Content
|
|
1143
|
+
For AJAX-heavy sites, wait for specific elements or network idle:
|
|
1144
|
+
|
|
1145
|
+
```python
|
|
1146
|
+
{
|
|
1147
|
+
"action": "await_browser_event",
|
|
1148
|
+
"event_type": "networkidle",
|
|
1149
|
+
"timeout": 10
|
|
1150
|
+
}
|
|
1151
|
+
```
|
|
1152
|
+
|
|
1153
|
+
### 4. Use Loops for Repetitive Tasks
|
|
1154
|
+
Instead of repeating actions, use loops:
|
|
1155
|
+
|
|
1156
|
+
```python
|
|
1157
|
+
{
|
|
1158
|
+
"action": "loop",
|
|
1159
|
+
"iterations": 10,
|
|
1160
|
+
"actions": [
|
|
1161
|
+
{"action": "click", "selector": ".load-more"},
|
|
1162
|
+
{"action": "wait", "duration": 2}
|
|
1163
|
+
]
|
|
1164
|
+
}
|
|
1165
|
+
```
|
|
1166
|
+
|
|
1167
|
+
### 5. Selector Best Practices
|
|
1168
|
+
- Prefer ID selectors: `#unique-id`
|
|
1169
|
+
- Use data attributes: `[data-testid='submit']`
|
|
1170
|
+
- Avoid brittle class names: `.btn-primary` may change
|
|
1171
|
+
- Use text selectors carefully: `selector_type: "text"`
|
|
1172
|
+
|
|
1173
|
+
### 6. Error Handling
|
|
1174
|
+
Set appropriate timeouts and use `break_on_error` in loops:
|
|
1175
|
+
|
|
1176
|
+
```python
|
|
1177
|
+
{
|
|
1178
|
+
"action": "loop",
|
|
1179
|
+
"iterations": 5,
|
|
1180
|
+
"break_on_error": True, # Stop on first error
|
|
1181
|
+
"actions": [...]
|
|
1182
|
+
}
|
|
1183
|
+
```
|
|
1184
|
+
|
|
1185
|
+
### 7. Screenshots for Debugging
|
|
1186
|
+
Capture screenshots at key points:
|
|
1187
|
+
|
|
1188
|
+
```python
|
|
1189
|
+
{
|
|
1190
|
+
"action": "screenshot",
|
|
1191
|
+
"filepath": "./debug/step_5.png",
|
|
1192
|
+
"description": "Debug screenshot after form submission"
|
|
1193
|
+
}
|
|
1194
|
+
```
|
|
1195
|
+
|
|
1196
|
+
---
|
|
1197
|
+
|
|
1198
|
+
## Error Handling
|
|
1199
|
+
|
|
1200
|
+
The tool includes automatic retry logic and error handling. Results are stored in the `results` attribute:
|
|
1201
|
+
|
|
1202
|
+
```python
|
|
1203
|
+
tool = WebScrapingTool()
|
|
1204
|
+
result = await tool.execute(steps=steps)
|
|
1205
|
+
|
|
1206
|
+
# Check for errors
|
|
1207
|
+
if result.get("error"):
|
|
1208
|
+
print(f"Error: {result['error']}")
|
|
1209
|
+
else:
|
|
1210
|
+
print(f"Success: {result}")
|
|
1211
|
+
```
|
|
1212
|
+
|
|
1213
|
+
---
|
|
1214
|
+
|
|
1215
|
+
## Advanced Configuration
|
|
1216
|
+
|
|
1217
|
+
### Mobile Emulation
|
|
1218
|
+
|
|
1219
|
+
```python
|
|
1220
|
+
tool = WebScrapingTool(
|
|
1221
|
+
browser='chrome',
|
|
1222
|
+
mobile=True,
|
|
1223
|
+
mobile_device='iPhone 14 Pro Max'
|
|
1224
|
+
)
|
|
1225
|
+
```
|
|
1226
|
+
|
|
1227
|
+
### Custom User Agent
|
|
1228
|
+
|
|
1229
|
+
```python
|
|
1230
|
+
tool = WebScrapingTool(
|
|
1231
|
+
browser='chrome',
|
|
1232
|
+
custom_user_agent='Mozilla/5.0 (Custom) ...'
|
|
1233
|
+
)
|
|
1234
|
+
```
|
|
1235
|
+
|
|
1236
|
+
### Disable Resources for Speed
|
|
1237
|
+
|
|
1238
|
+
```python
|
|
1239
|
+
tool = WebScrapingTool(
|
|
1240
|
+
browser='chrome',
|
|
1241
|
+
disable_images=True,
|
|
1242
|
+
disable_javascript=False # Usually keep JS enabled
|
|
1243
|
+
)
|
|
1244
|
+
```
|
|
1245
|
+
|
|
1246
|
+
### Undetected Chrome (Anti-bot)
|
|
1247
|
+
|
|
1248
|
+
```python
|
|
1249
|
+
tool = WebScrapingTool(
|
|
1250
|
+
browser='undetected',
|
|
1251
|
+
headless=False
|
|
1252
|
+
)
|
|
1253
|
+
```
|
|
1254
|
+
|
|
1255
|
+
---
|
|
1256
|
+
|
|
1257
|
+
## Integration with AI-Parrot
|
|
1258
|
+
|
|
1259
|
+
The WebScrapingTool integrates seamlessly with AI-Parrot's LLM capabilities for intelligent scraping workflows:
|
|
1260
|
+
|
|
1261
|
+
```python
|
|
1262
|
+
from aiparrot import Agent
|
|
1263
|
+
from aiparrot.tools import WebScrapingTool
|
|
1264
|
+
|
|
1265
|
+
scraper = WebScrapingTool(browser='chrome')
|
|
1266
|
+
|
|
1267
|
+
agent = Agent(
|
|
1268
|
+
name="WebScraperAgent",
|
|
1269
|
+
tools=[scraper],
|
|
1270
|
+
llm=your_llm_client
|
|
1271
|
+
)
|
|
1272
|
+
|
|
1273
|
+
response = await agent.run(
|
|
1274
|
+
"Navigate to example.com and extract all product information"
|
|
1275
|
+
)
|
|
1276
|
+
```
|
|
1277
|
+
|
|
1278
|
+
The LLM can generate the appropriate steps dynamically based on natural language instructions.
|