ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# IBISWorld Tool
|
|
2
|
+
|
|
3
|
+
A Parrot Tool for searching and extracting content from IBISWorld industry research articles.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The IBISWorld Tool extends the Google Site Search functionality to provide specialized content extraction from IBISWorld.com. It searches within the IBISWorld domain and automatically extracts article content, statistics, tables, and metadata.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Site-Specific Search**: Searches exclusively within ibisworld.com using Google Custom Search API
|
|
12
|
+
- **Content Extraction**: Automatically extracts article text, titles, and metadata
|
|
13
|
+
- **Table Extraction**: Captures data tables and structured information
|
|
14
|
+
- **Statistics Parsing**: Identifies and extracts key statistics and metrics
|
|
15
|
+
- **Flexible Options**: Choose whether to extract full content or just get search results
|
|
16
|
+
|
|
17
|
+
## Requirements
|
|
18
|
+
|
|
19
|
+
- Google Custom Search API credentials (GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID)
|
|
20
|
+
- BeautifulSoup4 for HTML parsing
|
|
21
|
+
- aiohttp for async HTTP requests
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
The tool is included in the ai-parrot framework. Ensure you have the required dependencies:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install beautifulsoup4 aiohttp lxml
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Usage
|
|
32
|
+
|
|
33
|
+
### Basic Usage
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import asyncio
|
|
37
|
+
from parrot.tools.ibisworld import IBISWorldTool
|
|
38
|
+
|
|
39
|
+
async def search_ibisworld():
|
|
40
|
+
tool = IBISWorldTool()
|
|
41
|
+
|
|
42
|
+
result = await tool.execute(
|
|
43
|
+
query="restaurant industry trends",
|
|
44
|
+
max_results=5,
|
|
45
|
+
extract_content=True,
|
|
46
|
+
include_tables=True
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if result.status == "success":
|
|
50
|
+
for item in result.result['results']:
|
|
51
|
+
print(f"Title: {item['title']}")
|
|
52
|
+
print(f"URL: {item['link']}")
|
|
53
|
+
|
|
54
|
+
if 'extracted_content' in item:
|
|
55
|
+
content = item['extracted_content']
|
|
56
|
+
print(f"Content: {content['content'][:200]}...")
|
|
57
|
+
print(f"Tables found: {len(content['tables'])}")
|
|
58
|
+
|
|
59
|
+
asyncio.run(search_ibisworld())
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Quick Search (No Content Extraction)
|
|
63
|
+
|
|
64
|
+
For faster results when you only need search result links:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
result = await tool.execute(
|
|
68
|
+
query="automotive manufacturing",
|
|
69
|
+
max_results=10,
|
|
70
|
+
extract_content=False # Skip content extraction
|
|
71
|
+
)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### With Table Extraction
|
|
75
|
+
|
|
76
|
+
Extract structured data from tables in articles:
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
result = await tool.execute(
|
|
80
|
+
query="healthcare industry statistics",
|
|
81
|
+
max_results=3,
|
|
82
|
+
extract_content=True,
|
|
83
|
+
include_tables=True # Extract tables from articles
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
for item in result.result['results']:
|
|
87
|
+
if 'extracted_content' in item:
|
|
88
|
+
tables = item['extracted_content']['tables']
|
|
89
|
+
for table in tables:
|
|
90
|
+
print(f"Table with {len(table['rows'])} rows")
|
|
91
|
+
print(f"Headers: {table['headers']}")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Arguments
|
|
95
|
+
|
|
96
|
+
### IBISWorldSearchArgs
|
|
97
|
+
|
|
98
|
+
- **query** (str, required): Search query for IBISWorld content
|
|
99
|
+
- **max_results** (int, optional): Maximum number of results to return (1-10, default: 5)
|
|
100
|
+
- **extract_content** (bool, optional): Extract full article content (default: True)
|
|
101
|
+
- **include_tables** (bool, optional): Include tables and structured data (default: True)
|
|
102
|
+
|
|
103
|
+
## Response Structure
|
|
104
|
+
|
|
105
|
+
### Success Response
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
{
|
|
109
|
+
'status': 'success',
|
|
110
|
+
'result': {
|
|
111
|
+
'query': 'original search query',
|
|
112
|
+
'site': 'ibisworld.com',
|
|
113
|
+
'search_query': 'original search query site:ibisworld.com',
|
|
114
|
+
'total_results': 5,
|
|
115
|
+
'source': 'IBISWorld',
|
|
116
|
+
'domain': 'ibisworld.com',
|
|
117
|
+
'content_extracted': True,
|
|
118
|
+
'results': [
|
|
119
|
+
{
|
|
120
|
+
'title': 'Article Title',
|
|
121
|
+
'link': 'https://ibisworld.com/...',
|
|
122
|
+
'snippet': 'Search result snippet',
|
|
123
|
+
'description': 'Search result description',
|
|
124
|
+
'has_content': True,
|
|
125
|
+
'extracted_content': {
|
|
126
|
+
'url': 'https://ibisworld.com/...',
|
|
127
|
+
'title': 'Full Article Title',
|
|
128
|
+
'content': 'Full article text...',
|
|
129
|
+
'metadata': {
|
|
130
|
+
'publication_date': '2024-01-15',
|
|
131
|
+
'author': 'Author Name',
|
|
132
|
+
# ... other meta tags
|
|
133
|
+
},
|
|
134
|
+
'tables': [
|
|
135
|
+
{
|
|
136
|
+
'table_id': 1,
|
|
137
|
+
'headers': ['Column 1', 'Column 2'],
|
|
138
|
+
'rows': [
|
|
139
|
+
['Data 1', 'Data 2'],
|
|
140
|
+
['Data 3', 'Data 4']
|
|
141
|
+
]
|
|
142
|
+
}
|
|
143
|
+
],
|
|
144
|
+
'statistics': {
|
|
145
|
+
'Market Size': '$XX billion',
|
|
146
|
+
'Growth Rate': 'X.X%',
|
|
147
|
+
# ... other statistics
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
]
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Content Extraction Details
|
|
157
|
+
|
|
158
|
+
The tool extracts content using multiple strategies:
|
|
159
|
+
|
|
160
|
+
1. **Title Extraction**: Searches for article titles using common HTML patterns
|
|
161
|
+
2. **Main Content**: Identifies article content containers and extracts paragraphs
|
|
162
|
+
3. **Metadata**: Extracts publication dates, authors, and meta tags
|
|
163
|
+
4. **Tables**: Parses HTML tables with headers and data rows
|
|
164
|
+
5. **Statistics**: Identifies key-value pairs and numeric data
|
|
165
|
+
|
|
166
|
+
## Integration with Agents
|
|
167
|
+
|
|
168
|
+
Use with conversational agents or LLM-based systems:
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from parrot.agents import Agent
|
|
172
|
+
from parrot.tools.ibisworld import IBISWorldTool
|
|
173
|
+
|
|
174
|
+
agent = Agent(
|
|
175
|
+
name="Industry Researcher",
|
|
176
|
+
tools=[IBISWorldTool()],
|
|
177
|
+
instructions="Research industry trends using IBISWorld"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
response = await agent.run(
|
|
181
|
+
"What are the latest trends in the restaurant industry?"
|
|
182
|
+
)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Error Handling
|
|
186
|
+
|
|
187
|
+
The tool handles common errors gracefully:
|
|
188
|
+
|
|
189
|
+
- HTTP errors (403, 404, 500, etc.)
|
|
190
|
+
- Timeout errors
|
|
191
|
+
- Parsing errors
|
|
192
|
+
- Missing content
|
|
193
|
+
|
|
194
|
+
Errors are logged and included in the response:
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
{
|
|
198
|
+
'error': 'HTTP 403',
|
|
199
|
+
'content': None
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Limitations
|
|
204
|
+
|
|
205
|
+
- Requires valid Google Custom Search API credentials
|
|
206
|
+
- Subject to Google API rate limits and quotas
|
|
207
|
+
- Content extraction accuracy depends on IBISWorld's HTML structure
|
|
208
|
+
- Some content may be behind paywalls or require authentication
|
|
209
|
+
|
|
210
|
+
## Configuration
|
|
211
|
+
|
|
212
|
+
Set environment variables or configure in your application:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
export GOOGLE_SEARCH_API_KEY="your-api-key"
|
|
216
|
+
export GOOGLE_SEARCH_ENGINE_ID="your-cse-id"
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Examples
|
|
220
|
+
|
|
221
|
+
See `examples/tools/ibisworld.py` for complete working examples.
|
|
222
|
+
|
|
223
|
+
## License
|
|
224
|
+
|
|
225
|
+
Part of the AI-Parrot framework.
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""
|
|
2
|
+
IBISWorld Tool for AI-Parrot
|
|
3
|
+
Search and extract content from IBISWorld industry research articles.
|
|
4
|
+
"""
|
|
5
|
+
from typing import Dict, Any
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import tempfile
|
|
8
|
+
import aiohttp
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
from markitdown import MarkItDown
|
|
12
|
+
from ..google.tools import GoogleSiteSearchTool, GoogleSiteSearchArgs
|
|
13
|
+
from ..abstract import AbstractTool
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class IBISWorldSearchArgs(BaseModel):
|
|
17
|
+
"""Arguments schema for IBISWorld Search Tool."""
|
|
18
|
+
query: str = Field(description="Search query for IBISWorld content")
|
|
19
|
+
max_results: int = Field(
|
|
20
|
+
default=5,
|
|
21
|
+
ge=1,
|
|
22
|
+
le=10,
|
|
23
|
+
description="Maximum number of results to return"
|
|
24
|
+
)
|
|
25
|
+
extract_content: bool = Field(
|
|
26
|
+
default=True,
|
|
27
|
+
description="If True, extract full article content from each result"
|
|
28
|
+
)
|
|
29
|
+
include_tables: bool = Field(
|
|
30
|
+
default=True,
|
|
31
|
+
description="If True, include tables and structured data from articles"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class IBISWorldTool(GoogleSiteSearchTool):
|
|
36
|
+
"""
|
|
37
|
+
IBISWorld search and content extraction tool.
|
|
38
|
+
|
|
39
|
+
Searches within ibisworld.com and extracts industry research content,
|
|
40
|
+
including article text, statistics, and tables.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
name = "ibisworld_search"
|
|
44
|
+
description = "Search IBISWorld industry research and extract detailed content from articles"
|
|
45
|
+
args_schema = IBISWorldSearchArgs
|
|
46
|
+
|
|
47
|
+
# IBISWorld specific configuration
|
|
48
|
+
IBISWORLD_DOMAIN = "ibisworld.com"
|
|
49
|
+
|
|
50
|
+
async def _extract_article_content(
|
|
51
|
+
self,
|
|
52
|
+
url: str,
|
|
53
|
+
include_tables: bool = True
|
|
54
|
+
) -> Dict[str, Any]:
|
|
55
|
+
"""
|
|
56
|
+
Extract content from an IBISWorld article.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
url: Article URL
|
|
60
|
+
include_tables: Whether to extract table data
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Dictionary containing extracted content
|
|
64
|
+
"""
|
|
65
|
+
try:
|
|
66
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
67
|
+
headers = {
|
|
68
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
|
69
|
+
'(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
|
|
73
|
+
async with session.get(url) as response:
|
|
74
|
+
if response.status != 200:
|
|
75
|
+
return {
|
|
76
|
+
'error': f'HTTP {response.status}',
|
|
77
|
+
'content': None
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# Check if content is a PDF
|
|
81
|
+
content_type = response.headers.get('Content-Type', '').lower()
|
|
82
|
+
is_pdf = url.lower().endswith('.pdf') or 'application/pdf' in content_type
|
|
83
|
+
|
|
84
|
+
if is_pdf:
|
|
85
|
+
# Handle PDF content using markitdown
|
|
86
|
+
try:
|
|
87
|
+
# Download PDF content to a temporary file
|
|
88
|
+
pdf_content = await response.read()
|
|
89
|
+
with tempfile.NamedTemporaryFile(mode='wb', suffix='.pdf', delete=False) as tmp_file:
|
|
90
|
+
tmp_file.write(pdf_content)
|
|
91
|
+
tmp_file_path = tmp_file.name
|
|
92
|
+
|
|
93
|
+
# Extract content using markitdown
|
|
94
|
+
markitdown = MarkItDown()
|
|
95
|
+
result = markitdown.convert(tmp_file_path)
|
|
96
|
+
|
|
97
|
+
# Clean up temporary file
|
|
98
|
+
Path(tmp_file_path).unlink(missing_ok=True)
|
|
99
|
+
|
|
100
|
+
# Return PDF content in a structured format
|
|
101
|
+
content_data = {
|
|
102
|
+
'url': url,
|
|
103
|
+
'title': self._extract_pdf_title(url),
|
|
104
|
+
'content': result.text_content if result.text_content else "PDF content could not be extracted",
|
|
105
|
+
'metadata': {'content_type': 'application/pdf', 'source': 'markitdown'},
|
|
106
|
+
'statistics': {},
|
|
107
|
+
'tables': []
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return content_data
|
|
111
|
+
|
|
112
|
+
except Exception as pdf_error:
|
|
113
|
+
self.logger.error(f"Error extracting PDF content from {url}: {pdf_error}")
|
|
114
|
+
return {
|
|
115
|
+
'error': f'PDF extraction error: {str(pdf_error)}',
|
|
116
|
+
'content': None
|
|
117
|
+
}
|
|
118
|
+
else:
|
|
119
|
+
# Handle regular HTML content
|
|
120
|
+
html = await response.text()
|
|
121
|
+
soup = BeautifulSoup(html, 'html.parser')
|
|
122
|
+
|
|
123
|
+
# Extract article content
|
|
124
|
+
content_data = {
|
|
125
|
+
'url': url,
|
|
126
|
+
'title': self._extract_title(soup),
|
|
127
|
+
'content': self._extract_main_content(soup),
|
|
128
|
+
'metadata': self._extract_metadata(soup),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if include_tables:
|
|
132
|
+
content_data['tables'] = self._extract_tables(soup)
|
|
133
|
+
|
|
134
|
+
# Extract key statistics if available
|
|
135
|
+
content_data['statistics'] = self._extract_statistics(soup)
|
|
136
|
+
|
|
137
|
+
return content_data
|
|
138
|
+
|
|
139
|
+
except Exception as e:
|
|
140
|
+
self.logger.error(f"Error extracting content from {url}: {e}")
|
|
141
|
+
return {
|
|
142
|
+
'error': str(e),
|
|
143
|
+
'content': None
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
def _extract_pdf_title(self, url: str) -> str:
|
|
147
|
+
"""Extract title from PDF URL."""
|
|
148
|
+
# Get the filename from the URL
|
|
149
|
+
filename = url.split('/')[-1]
|
|
150
|
+
# Remove .pdf extension and convert hyphens/underscores to spaces
|
|
151
|
+
title = filename.replace('.pdf', '').replace('-', ' ').replace('_', ' ')
|
|
152
|
+
# Capitalize words
|
|
153
|
+
return title.title()
|
|
154
|
+
|
|
155
|
+
def _extract_title(self, soup: BeautifulSoup) -> str:
|
|
156
|
+
"""Extract article title."""
|
|
157
|
+
# Try multiple title selectors
|
|
158
|
+
title_selectors = [
|
|
159
|
+
('h1', {'class': 'article-title'}),
|
|
160
|
+
('h1', {'class': 'report-title'}),
|
|
161
|
+
('h1', {}),
|
|
162
|
+
('title', {}),
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
for tag, attrs in title_selectors:
|
|
166
|
+
title_elem = soup.find(tag, attrs)
|
|
167
|
+
if title_elem:
|
|
168
|
+
return title_elem.get_text(strip=True)
|
|
169
|
+
|
|
170
|
+
return "Title not found"
|
|
171
|
+
|
|
172
|
+
def _extract_main_content(self, soup: BeautifulSoup) -> str:
|
|
173
|
+
"""Extract main article content."""
|
|
174
|
+
# Common content container selectors for IBISWorld
|
|
175
|
+
content_selectors = [
|
|
176
|
+
{'class': 'article-content'},
|
|
177
|
+
{'class': 'report-content'},
|
|
178
|
+
{'class': 'industry-report'},
|
|
179
|
+
{'class': 'main-content'},
|
|
180
|
+
{'id': 'content'},
|
|
181
|
+
{'class': 'content'},
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
content_parts = []
|
|
185
|
+
|
|
186
|
+
# Try to find main content container
|
|
187
|
+
for selector in content_selectors:
|
|
188
|
+
content_elem = soup.find('div', selector) or soup.find('article', selector)
|
|
189
|
+
if content_elem:
|
|
190
|
+
# Extract text from paragraphs
|
|
191
|
+
paragraphs = content_elem.find_all('p')
|
|
192
|
+
content_parts.extend([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
|
|
193
|
+
|
|
194
|
+
# Extract text from sections
|
|
195
|
+
sections = content_elem.find_all(['section', 'div'], class_=lambda x: x and 'section' in x.lower())
|
|
196
|
+
for section in sections:
|
|
197
|
+
section_text = section.get_text(strip=True)
|
|
198
|
+
if section_text and section_text not in content_parts:
|
|
199
|
+
content_parts.append(section_text)
|
|
200
|
+
|
|
201
|
+
# If no content found with selectors, try to get all paragraphs
|
|
202
|
+
if not content_parts:
|
|
203
|
+
all_paragraphs = soup.find_all('p')
|
|
204
|
+
content_parts = [p.get_text(strip=True) for p in all_paragraphs if len(p.get_text(strip=True)) > 50]
|
|
205
|
+
|
|
206
|
+
return '\n\n'.join(content_parts) if content_parts else "Content not found"
|
|
207
|
+
|
|
208
|
+
def _extract_metadata(self, soup: BeautifulSoup) -> Dict[str, Any]:
|
|
209
|
+
"""Extract metadata from the article."""
|
|
210
|
+
metadata = {}
|
|
211
|
+
|
|
212
|
+
# Extract meta tags
|
|
213
|
+
meta_tags = soup.find_all('meta')
|
|
214
|
+
for meta in meta_tags:
|
|
215
|
+
name = meta.get('name') or meta.get('property')
|
|
216
|
+
content = meta.get('content')
|
|
217
|
+
if name and content:
|
|
218
|
+
metadata[name] = content
|
|
219
|
+
|
|
220
|
+
# Extract publication date
|
|
221
|
+
date_selectors = [
|
|
222
|
+
{'class': 'publish-date'},
|
|
223
|
+
{'class': 'article-date'},
|
|
224
|
+
{'itemprop': 'datePublished'},
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
for selector in date_selectors:
|
|
228
|
+
date_elem = soup.find(['time', 'span', 'div'], selector)
|
|
229
|
+
if date_elem:
|
|
230
|
+
metadata['publication_date'] = date_elem.get_text(strip=True)
|
|
231
|
+
break
|
|
232
|
+
|
|
233
|
+
# Extract author if available
|
|
234
|
+
author_elem = soup.find(['span', 'div'], class_=lambda x: x and 'author' in x.lower())
|
|
235
|
+
if author_elem:
|
|
236
|
+
metadata['author'] = author_elem.get_text(strip=True)
|
|
237
|
+
|
|
238
|
+
return metadata
|
|
239
|
+
|
|
240
|
+
def _extract_tables(self, soup: BeautifulSoup) -> list:
|
|
241
|
+
"""Extract tables from the article."""
|
|
242
|
+
tables_data = []
|
|
243
|
+
tables = soup.find_all('table')
|
|
244
|
+
|
|
245
|
+
for idx, table in enumerate(tables):
|
|
246
|
+
table_data = {
|
|
247
|
+
'table_id': idx + 1,
|
|
248
|
+
'headers': [],
|
|
249
|
+
'rows': []
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
# Extract headers
|
|
253
|
+
headers = table.find_all('th')
|
|
254
|
+
table_data['headers'] = [th.get_text(strip=True) for th in headers]
|
|
255
|
+
|
|
256
|
+
# Extract rows
|
|
257
|
+
rows = table.find_all('tr')
|
|
258
|
+
for row in rows:
|
|
259
|
+
cells = row.find_all(['td', 'th'])
|
|
260
|
+
if cells:
|
|
261
|
+
row_data = [cell.get_text(strip=True) for cell in cells]
|
|
262
|
+
table_data['rows'].append(row_data)
|
|
263
|
+
|
|
264
|
+
if table_data['rows']: # Only add if there's data
|
|
265
|
+
tables_data.append(table_data)
|
|
266
|
+
|
|
267
|
+
return tables_data
|
|
268
|
+
|
|
269
|
+
def _extract_statistics(self, soup: BeautifulSoup) -> Dict[str, Any]:
|
|
270
|
+
"""Extract key statistics and figures."""
|
|
271
|
+
statistics = {}
|
|
272
|
+
|
|
273
|
+
# Look for statistics containers
|
|
274
|
+
stat_selectors = [
|
|
275
|
+
{'class': 'statistics'},
|
|
276
|
+
{'class': 'key-stats'},
|
|
277
|
+
{'class': 'industry-stats'},
|
|
278
|
+
{'class': 'highlights'},
|
|
279
|
+
]
|
|
280
|
+
|
|
281
|
+
for selector in stat_selectors:
|
|
282
|
+
stat_container = soup.find('div', selector)
|
|
283
|
+
if stat_container:
|
|
284
|
+
# Extract key-value pairs
|
|
285
|
+
stat_items = stat_container.find_all(['dt', 'dd', 'li'])
|
|
286
|
+
for item in stat_items:
|
|
287
|
+
text = item.get_text(strip=True)
|
|
288
|
+
if ':' in text:
|
|
289
|
+
key, value = text.split(':', 1)
|
|
290
|
+
statistics[key.strip()] = value.strip()
|
|
291
|
+
|
|
292
|
+
# Look for numeric data in spans or divs with specific classes
|
|
293
|
+
numeric_elements = soup.find_all(['span', 'div'], class_=lambda x: x and any(
|
|
294
|
+
term in x.lower() for term in ['stat', 'metric', 'value', 'number']
|
|
295
|
+
))
|
|
296
|
+
|
|
297
|
+
for elem in numeric_elements:
|
|
298
|
+
text = elem.get_text(strip=True)
|
|
299
|
+
if text and any(char.isdigit() for char in text):
|
|
300
|
+
# Try to find associated label
|
|
301
|
+
label = elem.find_previous(['label', 'span', 'div'])
|
|
302
|
+
if label:
|
|
303
|
+
label_text = label.get_text(strip=True)
|
|
304
|
+
if label_text and label_text != text:
|
|
305
|
+
statistics[label_text] = text
|
|
306
|
+
|
|
307
|
+
return statistics
|
|
308
|
+
|
|
309
|
+
async def _execute(self, **kwargs) -> Dict[str, Any]:
|
|
310
|
+
"""
|
|
311
|
+
Execute IBISWorld search and content extraction.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
query: Search query
|
|
315
|
+
max_results: Maximum number of results
|
|
316
|
+
extract_content: Whether to extract full content
|
|
317
|
+
include_tables: Whether to extract tables
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Search results with extracted content
|
|
321
|
+
"""
|
|
322
|
+
query = kwargs['query']
|
|
323
|
+
max_results = kwargs['max_results']
|
|
324
|
+
extract_content = kwargs.get('extract_content', True)
|
|
325
|
+
include_tables = kwargs.get('include_tables', True)
|
|
326
|
+
|
|
327
|
+
self.logger.info(f"Searching IBISWorld for: {query}")
|
|
328
|
+
|
|
329
|
+
# Use parent class to perform site search
|
|
330
|
+
search_kwargs = {
|
|
331
|
+
'query': query,
|
|
332
|
+
'site': self.IBISWORLD_DOMAIN,
|
|
333
|
+
'max_results': max_results,
|
|
334
|
+
'preview': False, # We'll do our own content extraction
|
|
335
|
+
'preview_method': 'aiohttp'
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
# Get search results from Google Site Search
|
|
339
|
+
search_results = await super()._execute(**search_kwargs)
|
|
340
|
+
|
|
341
|
+
# Extract content from each result if requested
|
|
342
|
+
if extract_content:
|
|
343
|
+
self.logger.info(f"Extracting content from {len(search_results['results'])} results")
|
|
344
|
+
|
|
345
|
+
for result in search_results['results']:
|
|
346
|
+
url = result['link']
|
|
347
|
+
self.logger.info(f"Extracting content from: {url}")
|
|
348
|
+
|
|
349
|
+
content_data = await self._extract_article_content(
|
|
350
|
+
url,
|
|
351
|
+
include_tables=include_tables
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Add extracted content to result
|
|
355
|
+
result['extracted_content'] = content_data
|
|
356
|
+
result['has_content'] = content_data.get('content') is not None
|
|
357
|
+
|
|
358
|
+
# Add IBISWorld-specific metadata to response
|
|
359
|
+
search_results['source'] = 'IBISWorld'
|
|
360
|
+
search_results['domain'] = self.IBISWORLD_DOMAIN
|
|
361
|
+
search_results['content_extracted'] = extract_content
|
|
362
|
+
|
|
363
|
+
return search_results
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
__all__ = ['IBISWorldTool', 'IBISWorldSearchArgs']
|