ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
from typing import List, Dict, Any
|
|
2
|
+
import re
|
|
3
|
+
from .base import BaseTextSplitter
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MarkdownTextSplitter(BaseTextSplitter):
|
|
7
|
+
"""
|
|
8
|
+
Text splitter that respects Markdown structure.
|
|
9
|
+
|
|
10
|
+
Features:
|
|
11
|
+
- Splits at markdown headers (maintaining hierarchy)
|
|
12
|
+
- Preserves code blocks
|
|
13
|
+
- Maintains lists and formatting
|
|
14
|
+
- Respects table structures
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
chunk_size: int = 4000,
|
|
20
|
+
chunk_overlap: int = 200,
|
|
21
|
+
strip_headers: bool = False,
|
|
22
|
+
return_each_line: bool = False,
|
|
23
|
+
**kwargs
|
|
24
|
+
):
|
|
25
|
+
"""
|
|
26
|
+
Initialize MarkdownTextSplitter.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
chunk_size: Maximum size per chunk (in characters)
|
|
30
|
+
chunk_overlap: Overlap between chunks
|
|
31
|
+
strip_headers: Whether to strip headers from chunks
|
|
32
|
+
return_each_line: Whether to return each line as a separate chunk
|
|
33
|
+
"""
|
|
34
|
+
super().__init__(chunk_size, chunk_overlap, **kwargs)
|
|
35
|
+
self.strip_headers = strip_headers
|
|
36
|
+
self.return_each_line = return_each_line
|
|
37
|
+
|
|
38
|
+
# Markdown parsing patterns
|
|
39
|
+
self.header_pattern = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
|
|
40
|
+
self.code_block_pattern = re.compile(r'```.*?```', re.DOTALL)
|
|
41
|
+
self.list_pattern = re.compile(r'^[\s]*[-*+]\s+', re.MULTILINE)
|
|
42
|
+
self.table_pattern = re.compile(r'^[\s]*\|.*\|[\s]*$', re.MULTILINE)
|
|
43
|
+
|
|
44
|
+
def _count_tokens(self, text: str) -> int:
|
|
45
|
+
"""Count tokens (approximation using words for markdown)"""
|
|
46
|
+
# Simple word-based counting for markdown
|
|
47
|
+
words = len(text.split())
|
|
48
|
+
return int(words * 1.3) # Rough token approximation
|
|
49
|
+
|
|
50
|
+
def split_text(self, text: str) -> List[str]:
|
|
51
|
+
"""Split markdown text respecting structure"""
|
|
52
|
+
if not text:
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
if self.return_each_line:
|
|
56
|
+
return [line for line in text.split('\n') if line.strip()]
|
|
57
|
+
|
|
58
|
+
# First, identify markdown sections
|
|
59
|
+
sections = self._parse_markdown_sections(text)
|
|
60
|
+
|
|
61
|
+
# Then merge sections respecting chunk size
|
|
62
|
+
chunks = self._merge_markdown_sections(sections)
|
|
63
|
+
|
|
64
|
+
return chunks
|
|
65
|
+
|
|
66
|
+
def _parse_markdown_sections(self, text: str) -> List[Dict[str, Any]]:
|
|
67
|
+
"""Parse markdown into hierarchical sections"""
|
|
68
|
+
lines = text.split('\n')
|
|
69
|
+
sections = []
|
|
70
|
+
current_section = {
|
|
71
|
+
'type': 'content',
|
|
72
|
+
'level': 0,
|
|
73
|
+
'header': '',
|
|
74
|
+
'content': [],
|
|
75
|
+
'start_line': 0
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
in_code_block = False
|
|
79
|
+
code_block_lang = ''
|
|
80
|
+
|
|
81
|
+
for i, line in enumerate(lines):
|
|
82
|
+
# Handle code blocks
|
|
83
|
+
if line.strip().startswith('```'):
|
|
84
|
+
if not in_code_block:
|
|
85
|
+
# Starting code block
|
|
86
|
+
in_code_block = True
|
|
87
|
+
code_block_lang = line.strip()[3:]
|
|
88
|
+
if current_section['content']:
|
|
89
|
+
sections.append(current_section)
|
|
90
|
+
current_section = {
|
|
91
|
+
'type': 'code_block',
|
|
92
|
+
'level': 0,
|
|
93
|
+
'header': f'Code ({code_block_lang})',
|
|
94
|
+
'content': [line],
|
|
95
|
+
'start_line': i
|
|
96
|
+
}
|
|
97
|
+
else:
|
|
98
|
+
# Ending code block
|
|
99
|
+
current_section['content'].append(line)
|
|
100
|
+
sections.append(current_section)
|
|
101
|
+
in_code_block = False
|
|
102
|
+
current_section = {
|
|
103
|
+
'type': 'content',
|
|
104
|
+
'level': 0,
|
|
105
|
+
'header': '',
|
|
106
|
+
'content': [],
|
|
107
|
+
'start_line': i + 1
|
|
108
|
+
}
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
if in_code_block:
|
|
112
|
+
current_section['content'].append(line)
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
# Handle headers
|
|
116
|
+
header_match = self.header_pattern.match(line)
|
|
117
|
+
if header_match:
|
|
118
|
+
# Save current section if it has content
|
|
119
|
+
if current_section['content']:
|
|
120
|
+
sections.append(current_section)
|
|
121
|
+
|
|
122
|
+
# Start new section
|
|
123
|
+
level = len(header_match.group(1))
|
|
124
|
+
header_text = header_match.group(2).strip()
|
|
125
|
+
|
|
126
|
+
current_section = {
|
|
127
|
+
'type': 'section',
|
|
128
|
+
'level': level,
|
|
129
|
+
'header': header_text,
|
|
130
|
+
'content': [] if self.strip_headers else [line],
|
|
131
|
+
'start_line': i
|
|
132
|
+
}
|
|
133
|
+
else:
|
|
134
|
+
current_section['content'].append(line)
|
|
135
|
+
|
|
136
|
+
# Add final section
|
|
137
|
+
if current_section['content']:
|
|
138
|
+
sections.append(current_section)
|
|
139
|
+
|
|
140
|
+
return sections
|
|
141
|
+
|
|
142
|
+
def _merge_markdown_sections(self, sections: List[Dict[str, Any]]) -> List[str]:
|
|
143
|
+
"""Merge sections respecting chunk size limits"""
|
|
144
|
+
if not sections:
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
chunks = []
|
|
148
|
+
current_chunk_parts = []
|
|
149
|
+
current_size = 0
|
|
150
|
+
|
|
151
|
+
for section in sections:
|
|
152
|
+
section_text = '\n'.join(section['content'])
|
|
153
|
+
section_size = len(section_text)
|
|
154
|
+
|
|
155
|
+
# If section alone exceeds chunk size, split it
|
|
156
|
+
if section_size > self.chunk_size:
|
|
157
|
+
# Save current chunk if exists
|
|
158
|
+
if current_chunk_parts:
|
|
159
|
+
chunks.append('\n\n'.join(current_chunk_parts))
|
|
160
|
+
current_chunk_parts = []
|
|
161
|
+
current_size = 0
|
|
162
|
+
|
|
163
|
+
# Split large section
|
|
164
|
+
split_sections = self._split_large_section(section_text)
|
|
165
|
+
chunks.extend(split_sections)
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
# Check if adding this section exceeds chunk size
|
|
169
|
+
if current_size + section_size > self.chunk_size and current_chunk_parts:
|
|
170
|
+
# Save current chunk
|
|
171
|
+
chunks.append('\n\n'.join(current_chunk_parts))
|
|
172
|
+
|
|
173
|
+
# Start new chunk with overlap
|
|
174
|
+
overlap_parts = self._get_overlap_content(current_chunk_parts)
|
|
175
|
+
current_chunk_parts = overlap_parts + [section_text]
|
|
176
|
+
current_size = sum(len(part) for part in current_chunk_parts)
|
|
177
|
+
else:
|
|
178
|
+
current_chunk_parts.append(section_text)
|
|
179
|
+
current_size += section_size
|
|
180
|
+
|
|
181
|
+
# Add final chunk
|
|
182
|
+
if current_chunk_parts:
|
|
183
|
+
chunks.append('\n\n'.join(current_chunk_parts))
|
|
184
|
+
|
|
185
|
+
return [chunk for chunk in chunks if chunk.strip()]
|
|
186
|
+
|
|
187
|
+
def _split_large_section(self, text: str) -> List[str]:
|
|
188
|
+
"""Split a large section that exceeds chunk size"""
|
|
189
|
+
# For very large sections, fall back to paragraph-based splitting
|
|
190
|
+
paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
|
|
191
|
+
|
|
192
|
+
chunks = []
|
|
193
|
+
current_chunk = []
|
|
194
|
+
current_size = 0
|
|
195
|
+
|
|
196
|
+
for paragraph in paragraphs:
|
|
197
|
+
para_size = len(paragraph)
|
|
198
|
+
|
|
199
|
+
if current_size + para_size > self.chunk_size and current_chunk:
|
|
200
|
+
chunks.append('\n\n'.join(current_chunk))
|
|
201
|
+
current_chunk = [paragraph]
|
|
202
|
+
current_size = para_size
|
|
203
|
+
else:
|
|
204
|
+
current_chunk.append(paragraph)
|
|
205
|
+
current_size += para_size
|
|
206
|
+
|
|
207
|
+
if current_chunk:
|
|
208
|
+
chunks.append('\n\n'.join(current_chunk))
|
|
209
|
+
|
|
210
|
+
return chunks
|
|
211
|
+
|
|
212
|
+
def _get_overlap_content(self, parts: List[str]) -> List[str]:
|
|
213
|
+
"""Get content for overlap between chunks"""
|
|
214
|
+
if not parts or self.chunk_overlap == 0:
|
|
215
|
+
return []
|
|
216
|
+
|
|
217
|
+
# Take last parts that fit in overlap size
|
|
218
|
+
overlap_parts = []
|
|
219
|
+
overlap_size = 0
|
|
220
|
+
|
|
221
|
+
for part in reversed(parts):
|
|
222
|
+
if overlap_size + len(part) <= self.chunk_overlap:
|
|
223
|
+
overlap_parts.insert(0, part)
|
|
224
|
+
overlap_size += len(part)
|
|
225
|
+
else:
|
|
226
|
+
break
|
|
227
|
+
|
|
228
|
+
return overlap_parts
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from typing import List, Callable, Optional, Any, Union
|
|
2
|
+
from .base import BaseTextSplitter
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TokenTextSplitter(BaseTextSplitter):
|
|
6
|
+
"""
|
|
7
|
+
Text splitter that splits based on token count using various tokenizers.
|
|
8
|
+
|
|
9
|
+
Supports:
|
|
10
|
+
- OpenAI tiktoken tokenizers (gpt-3.5-turbo, gpt-4, etc.)
|
|
11
|
+
- Hugging Face transformers tokenizers
|
|
12
|
+
- Custom tokenization functions
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
chunk_size: int = 4000,
|
|
18
|
+
chunk_overlap: int = 200,
|
|
19
|
+
model_name: str = "gpt-3.5-turbo",
|
|
20
|
+
encoding_name: Optional[str] = None,
|
|
21
|
+
tokenizer: Optional[Any] = None,
|
|
22
|
+
tokenizer_function: Optional[Callable[[str], List[str]]] = None,
|
|
23
|
+
allowed_special: Union[str, set] = "all",
|
|
24
|
+
disallowed_special: Union[str, set, List] = (),
|
|
25
|
+
**kwargs
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Initialize TokenTextSplitter.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
chunk_size: Maximum number of tokens per chunk
|
|
32
|
+
chunk_overlap: Number of tokens to overlap between chunks
|
|
33
|
+
model_name: Name of the model for tiktoken (e.g., 'gpt-3.5-turbo', 'gpt-4')
|
|
34
|
+
encoding_name: Specific encoding name for tiktoken (e.g., 'cl100k_base')
|
|
35
|
+
tokenizer: Custom tokenizer object (Hugging Face or other)
|
|
36
|
+
tokenizer_function: Custom function that takes text and returns tokens
|
|
37
|
+
allowed_special: Special tokens allowed during encoding
|
|
38
|
+
disallowed_special: Special tokens that raise errors during encoding
|
|
39
|
+
"""
|
|
40
|
+
super().__init__(chunk_size, chunk_overlap, **kwargs)
|
|
41
|
+
|
|
42
|
+
self.model_name = model_name
|
|
43
|
+
self.encoding_name = encoding_name
|
|
44
|
+
self.allowed_special = allowed_special
|
|
45
|
+
self.disallowed_special = disallowed_special
|
|
46
|
+
|
|
47
|
+
# Initialize tokenizer
|
|
48
|
+
if tokenizer_function:
|
|
49
|
+
self._tokenize = tokenizer_function
|
|
50
|
+
self._encode = lambda text: tokenizer_function(text)
|
|
51
|
+
self._decode = lambda tokens: " ".join(tokens)
|
|
52
|
+
elif tokenizer:
|
|
53
|
+
# Hugging Face tokenizer
|
|
54
|
+
self._tokenizer = tokenizer
|
|
55
|
+
self._tokenize = self._hf_tokenize
|
|
56
|
+
self._encode = self._hf_encode
|
|
57
|
+
self._decode = self._hf_decode
|
|
58
|
+
else:
|
|
59
|
+
# Use tiktoken
|
|
60
|
+
import tiktoken
|
|
61
|
+
try:
|
|
62
|
+
if encoding_name:
|
|
63
|
+
self._enc = tiktoken.get_encoding(encoding_name)
|
|
64
|
+
else:
|
|
65
|
+
self._enc = tiktoken.encoding_for_model(model_name)
|
|
66
|
+
|
|
67
|
+
self._tokenize = self._tiktoken_tokenize
|
|
68
|
+
self._encode = self._tiktoken_encode
|
|
69
|
+
self._decode = self._tiktoken_decode
|
|
70
|
+
except Exception as e:
|
|
71
|
+
raise ValueError(f"Could not load tokenizer for model {model_name}: {e}")
|
|
72
|
+
|
|
73
|
+
def _tiktoken_tokenize(self, text: str) -> List[str]:
|
|
74
|
+
"""Tokenize using tiktoken and return string tokens"""
|
|
75
|
+
tokens = self._enc.encode(
|
|
76
|
+
text,
|
|
77
|
+
allowed_special=self.allowed_special,
|
|
78
|
+
disallowed_special=self.disallowed_special
|
|
79
|
+
)
|
|
80
|
+
return [self._enc.decode([token]) for token in tokens]
|
|
81
|
+
|
|
82
|
+
def _tiktoken_encode(self, text: str) -> List[int]:
|
|
83
|
+
"""Encode text to token IDs using tiktoken"""
|
|
84
|
+
return self._enc.encode(
|
|
85
|
+
text,
|
|
86
|
+
allowed_special=self.allowed_special,
|
|
87
|
+
disallowed_special=self.disallowed_special
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def _tiktoken_decode(self, tokens: List[int]) -> str:
|
|
91
|
+
"""Decode token IDs to text using tiktoken"""
|
|
92
|
+
return self._enc.decode(tokens)
|
|
93
|
+
|
|
94
|
+
def _hf_tokenize(self, text: str) -> List[str]:
|
|
95
|
+
"""Tokenize using Hugging Face tokenizer"""
|
|
96
|
+
return self._tokenizer.tokenize(text)
|
|
97
|
+
|
|
98
|
+
def _hf_encode(self, text: str) -> List[int]:
|
|
99
|
+
"""Encode text using Hugging Face tokenizer"""
|
|
100
|
+
return self._tokenizer.encode(text, add_special_tokens=False)
|
|
101
|
+
|
|
102
|
+
def _hf_decode(self, tokens: List[int]) -> str:
|
|
103
|
+
"""Decode tokens using Hugging Face tokenizer"""
|
|
104
|
+
return self._tokenizer.decode(tokens, skip_special_tokens=True)
|
|
105
|
+
|
|
106
|
+
def _count_tokens(self, text: str) -> int:
|
|
107
|
+
"""Count tokens in text"""
|
|
108
|
+
return len(self._encode(text))
|
|
109
|
+
|
|
110
|
+
def split_text(self, text: str) -> List[str]:
|
|
111
|
+
"""Split text based on token count"""
|
|
112
|
+
if not text:
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
# Encode the entire text
|
|
116
|
+
tokens = self._encode(text)
|
|
117
|
+
|
|
118
|
+
if len(tokens) <= self.chunk_size:
|
|
119
|
+
return [text]
|
|
120
|
+
|
|
121
|
+
chunks = []
|
|
122
|
+
start_idx = 0
|
|
123
|
+
|
|
124
|
+
while start_idx < len(tokens):
|
|
125
|
+
# Determine end index for this chunk
|
|
126
|
+
end_idx = min(start_idx + self.chunk_size, len(tokens))
|
|
127
|
+
|
|
128
|
+
# Get tokens for this chunk
|
|
129
|
+
chunk_tokens = tokens[start_idx:end_idx]
|
|
130
|
+
|
|
131
|
+
# Decode back to text
|
|
132
|
+
chunk_text = self._decode(chunk_tokens)
|
|
133
|
+
chunks.append(chunk_text)
|
|
134
|
+
|
|
135
|
+
# Move start index with overlap
|
|
136
|
+
if end_idx == len(tokens):
|
|
137
|
+
break
|
|
138
|
+
|
|
139
|
+
start_idx = end_idx - self.chunk_overlap
|
|
140
|
+
if start_idx < 0:
|
|
141
|
+
start_idx = 0
|
|
142
|
+
|
|
143
|
+
return chunks
|
parrot/loaders/txt.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from .abstract import AbstractLoader
|
|
4
|
+
from .files.text import TextFile
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TextLoader(AbstractLoader):
|
|
8
|
+
"""
|
|
9
|
+
Loader for Text-based Files.
|
|
10
|
+
"""
|
|
11
|
+
extensions: List[str] = ['.txt', '.text', '.md', '.markdown', '.rd']
|
|
12
|
+
|
|
13
|
+
async def _load(self, path: Path, **kwargs) -> list:
|
|
14
|
+
"""
|
|
15
|
+
Load a TXT file.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
path (Path): The path to the TXT file.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
list: A list of Langchain Documents.
|
|
22
|
+
"""
|
|
23
|
+
async with TextFile(path) as file:
|
|
24
|
+
content = await file.read()
|
|
25
|
+
return self.create_document(content, path)
|
|
26
|
+
return []
|
parrot/loaders/video.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import subprocess
|
|
5
|
+
from ..stores.models import Document
|
|
6
|
+
from .basevideo import BaseVideoLoader
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class VideoLoader(BaseVideoLoader):
|
|
10
|
+
"""
|
|
11
|
+
Generating Video transcripts from URL Videos.
|
|
12
|
+
"""
|
|
13
|
+
def download_video(self, url: str, path: str) -> Path:
|
|
14
|
+
"""
|
|
15
|
+
Downloads a video from a URL using yt-dlp.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
video_url (str): The URL of the video to download.
|
|
19
|
+
output_path (str): The directory where the video will be saved.
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
command = [
|
|
23
|
+
"yt-dlp",
|
|
24
|
+
"--get-filename",
|
|
25
|
+
"-o",
|
|
26
|
+
str(path / "%(title)s.%(ext)s"),
|
|
27
|
+
url
|
|
28
|
+
]
|
|
29
|
+
result = subprocess.run(command, check=True, stdout=subprocess.PIPE, text=True)
|
|
30
|
+
except Exception as e:
|
|
31
|
+
try:
|
|
32
|
+
command = [
|
|
33
|
+
"yt-dlp",
|
|
34
|
+
"--get-filename",
|
|
35
|
+
url
|
|
36
|
+
]
|
|
37
|
+
result = subprocess.run(
|
|
38
|
+
command,
|
|
39
|
+
check=True,
|
|
40
|
+
stdout=subprocess.PIPE,
|
|
41
|
+
text=True
|
|
42
|
+
)
|
|
43
|
+
except Exception as e:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
f"Unable to Download Video: {e}"
|
|
46
|
+
)
|
|
47
|
+
try:
|
|
48
|
+
filename = result.stdout.strip() # Remove any trailing newline characters
|
|
49
|
+
print('FILENAME > ', filename)
|
|
50
|
+
file_path = path.joinpath(filename)
|
|
51
|
+
if file_path.exists():
|
|
52
|
+
print(f"Video already downloaded: {filename}")
|
|
53
|
+
return file_path
|
|
54
|
+
print(f"Downloading video: {filename}")
|
|
55
|
+
# after extracted filename, download the video
|
|
56
|
+
command = [
|
|
57
|
+
"yt-dlp",
|
|
58
|
+
url,
|
|
59
|
+
"-o",
|
|
60
|
+
str(path / "%(title)s.%(ext)s")
|
|
61
|
+
]
|
|
62
|
+
subprocess.run(command, check=True)
|
|
63
|
+
return file_path
|
|
64
|
+
except subprocess.CalledProcessError as e:
|
|
65
|
+
print(f"Error downloading video: {e}")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
async def _load(self, source: str, **kwargs) -> List[Document]:
|
|
69
|
+
documents = []
|
|
70
|
+
transcript = None
|
|
71
|
+
video_title = source
|
|
72
|
+
if isinstance(source, dict):
|
|
73
|
+
path = list(source.keys())[0]
|
|
74
|
+
parts = source[path]
|
|
75
|
+
if isinstance(parts, str):
|
|
76
|
+
video_title = parts
|
|
77
|
+
elif isinstance(parts, dict):
|
|
78
|
+
video_title = parts['title']
|
|
79
|
+
docs = await self.load_video(source, video_title, transcript)
|
|
80
|
+
documents.extend(docs)
|
|
81
|
+
# return documents
|
|
82
|
+
return documents
|
|
83
|
+
|
|
84
|
+
@abstractmethod
|
|
85
|
+
async def load_video(self, url: str, video_title: str, transcript: str) -> list:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
def parse(self, source):
|
|
89
|
+
pass
|