ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import subprocess
|
|
3
|
+
from typing import Optional, Union, List
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import re
|
|
7
|
+
import json
|
|
8
|
+
import aiofiles
|
|
9
|
+
import yt_dlp
|
|
10
|
+
try:
|
|
11
|
+
from pytube import YouTube # optional, best-effort only
|
|
12
|
+
except Exception:
|
|
13
|
+
YouTube = None # type: ignore
|
|
14
|
+
from ..stores.models import Document
|
|
15
|
+
from .video import VideoLoader
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
_YT_ID_RE = re.compile(r"(?:v=|\/)([0-9A-Za-z_-]{11})")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def extract_video_id(url: str) -> Optional[str]:
|
|
22
|
+
m = _YT_ID_RE.search(url)
|
|
23
|
+
return m.group(1) if m else None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
logging.getLogger("yt_dlp").setLevel(logging.WARNING)
|
|
27
|
+
logging.getLogger("h5py._conv").setLevel(logging.WARNING)
|
|
28
|
+
logging.getLogger("tensorflow").setLevel(logging.WARNING)
|
|
29
|
+
|
|
30
|
+
class YoutubeLoader(VideoLoader):
|
|
31
|
+
"""
|
|
32
|
+
Loader for Youtube videos.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def _ensure_video_dir(self, path: Optional[Union[str, Path]]) -> Path:
|
|
36
|
+
"""
|
|
37
|
+
Normalize/ensure a usable download directory.
|
|
38
|
+
Priority: explicit arg > self._video_path > ./videos
|
|
39
|
+
"""
|
|
40
|
+
if isinstance(path, (str, Path)) and path:
|
|
41
|
+
p = Path(path)
|
|
42
|
+
else:
|
|
43
|
+
default = getattr(self, "_video_path", None)
|
|
44
|
+
if isinstance(default, (str, Path)) and default:
|
|
45
|
+
p = Path(default)
|
|
46
|
+
else:
|
|
47
|
+
p = Path.cwd() / "videos"
|
|
48
|
+
self._video_path = p
|
|
49
|
+
p.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
self._video_path = p
|
|
51
|
+
return p
|
|
52
|
+
|
|
53
|
+
def get_video_info(self, url: str) -> dict:
|
|
54
|
+
# Primary: yt-dlp (no download)
|
|
55
|
+
try:
|
|
56
|
+
ydl_opts = {"quiet": True, "noprogress": True, "skip_download": True}
|
|
57
|
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
58
|
+
info = ydl.extract_info(url, download=False)
|
|
59
|
+
upload_date = info.get("upload_date") # YYYYMMDD
|
|
60
|
+
publish = (
|
|
61
|
+
f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]} 00:00:00"
|
|
62
|
+
if upload_date else "Unknown"
|
|
63
|
+
)
|
|
64
|
+
vid = info.get("id") or extract_video_id(url) or "unknown"
|
|
65
|
+
return {
|
|
66
|
+
"url": url,
|
|
67
|
+
"video_id": vid,
|
|
68
|
+
"watch_url": info.get("webpage_url") or url,
|
|
69
|
+
"embed_url": f"https://www.youtube.com/embed/{vid}" if vid != "unknown" else url,
|
|
70
|
+
"title": info.get("title") or "Unknown",
|
|
71
|
+
"description": info.get("description") or "Unknown",
|
|
72
|
+
"view_count": info.get("view_count") or 0,
|
|
73
|
+
"publish_date": publish,
|
|
74
|
+
"author": info.get("uploader") or info.get("channel") or "Unknown",
|
|
75
|
+
}
|
|
76
|
+
except Exception as e:
|
|
77
|
+
self.logger.error(f"yt-dlp metadata failed for {url}: {e}")
|
|
78
|
+
|
|
79
|
+
# Best-effort fallback: pytube (optional)
|
|
80
|
+
if YouTube:
|
|
81
|
+
try:
|
|
82
|
+
yt = YouTube(url)
|
|
83
|
+
return {
|
|
84
|
+
"url": url,
|
|
85
|
+
"video_id": yt.video_id or extract_video_id(url) or "unknown",
|
|
86
|
+
"watch_url": yt.watch_url or url,
|
|
87
|
+
"embed_url": yt.embed_url or url,
|
|
88
|
+
"title": yt.title or "Unknown",
|
|
89
|
+
"description": yt.description or "Unknown",
|
|
90
|
+
"view_count": yt.views or 0,
|
|
91
|
+
"publish_date": yt.publish_date.strftime("%Y-%m-%d %H:%M:%S") if yt.publish_date else "Unknown",
|
|
92
|
+
"author": yt.author or "Unknown",
|
|
93
|
+
}
|
|
94
|
+
except Exception as e2:
|
|
95
|
+
self.logger.error(f"pytube fallback failed for {url}: {e2}")
|
|
96
|
+
|
|
97
|
+
# Final fallback
|
|
98
|
+
vid = extract_video_id(url) or "unknown"
|
|
99
|
+
return {
|
|
100
|
+
"url": url,
|
|
101
|
+
"video_id": vid,
|
|
102
|
+
"watch_url": url,
|
|
103
|
+
"embed_url": f"https://www.youtube.com/embed/{vid}" if vid != "unknown" else url,
|
|
104
|
+
"title": "Unknown",
|
|
105
|
+
"description": "Unknown",
|
|
106
|
+
"view_count": 0,
|
|
107
|
+
"publish_date": "Unknown",
|
|
108
|
+
"author": "Unknown",
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
def download_audio_wav(self, url: str, path: Optional[Union[str, Path]] = None) -> Path:
|
|
112
|
+
"""
|
|
113
|
+
Download best audio and convert to WAV (16 kHz mono) via ffmpeg (required by yt-dlp).
|
|
114
|
+
Returns the final .wav Path.
|
|
115
|
+
"""
|
|
116
|
+
out_dir = self._ensure_video_dir(path)
|
|
117
|
+
ydl_opts = {
|
|
118
|
+
"format": "bestaudio/best",
|
|
119
|
+
"outtmpl": str(out_dir / "%(title)s.%(ext)s"),
|
|
120
|
+
"quiet": True,
|
|
121
|
+
"noprogress": True,
|
|
122
|
+
"postprocessors": [
|
|
123
|
+
{"key": "FFmpegExtractAudio", "preferredcodec": "wav", "preferredquality": "0"},
|
|
124
|
+
],
|
|
125
|
+
# enforce mono 16k
|
|
126
|
+
"postprocessor_args": ["-ac", "1", "-ar", "16000"],
|
|
127
|
+
}
|
|
128
|
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
129
|
+
info = ydl.extract_info(url, download=True)
|
|
130
|
+
# final file is same basename with .wav
|
|
131
|
+
wav_path = Path(ydl.prepare_filename(info)).with_suffix(".wav")
|
|
132
|
+
if not wav_path.exists():
|
|
133
|
+
# try to find the newest .wav if ext varied
|
|
134
|
+
candidates = list((out_dir).glob("*.wav"))
|
|
135
|
+
wav_path = max(candidates, key=lambda p: p.stat().st_mtime) if candidates else None
|
|
136
|
+
if not wav_path or not wav_path.exists():
|
|
137
|
+
raise ValueError("WAV file not produced by yt-dlp/ffmpeg")
|
|
138
|
+
return wav_path
|
|
139
|
+
|
|
140
|
+
def download_video(self, url: str, path: Path) -> Path:
|
|
141
|
+
"""
|
|
142
|
+
Downloads a video from a URL using yt-dlp with enhanced error handling.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
url (str): The URL of the video to download.
|
|
146
|
+
path (Path): The directory where the video will be saved.
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
self.logger.debug(f"Starting video download for: {url}")
|
|
150
|
+
path = self._ensure_video_dir(path)
|
|
151
|
+
self.logger.debug(f"Download path: {path}")
|
|
152
|
+
|
|
153
|
+
# Ensure path exists
|
|
154
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
|
|
156
|
+
ydl_opts = {
|
|
157
|
+
"noplaylist": True,
|
|
158
|
+
"format": "bv*[height<=720]+ba/b[height<=720]/b",
|
|
159
|
+
"outtmpl": str(path / "%(title)s.%(ext)s"),
|
|
160
|
+
"merge_output_format": "mp4", # or "mkv"
|
|
161
|
+
"quiet": True,
|
|
162
|
+
"noprogress": True,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
166
|
+
info = ydl.extract_info(url, download=True)
|
|
167
|
+
file_path = Path(ydl.prepare_filename(info))
|
|
168
|
+
# return file_path
|
|
169
|
+
|
|
170
|
+
except subprocess.TimeoutExpired:
|
|
171
|
+
self.logger.error("Timeout getting filename from yt-dlp")
|
|
172
|
+
raise ValueError("Timeout getting video filename")
|
|
173
|
+
except subprocess.CalledProcessError as e:
|
|
174
|
+
self.logger.error(f"yt-dlp get-filename failed: {e}")
|
|
175
|
+
self.logger.error(f"yt-dlp stderr: {e.stderr}")
|
|
176
|
+
except FileNotFoundError:
|
|
177
|
+
raise ValueError("yt-dlp not found on PATH. Please install yt-dlp.")
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
# raw_name = result.stdout.strip().splitlines()[-1].strip()
|
|
181
|
+
# candidate = Path(raw_name)
|
|
182
|
+
# file_path = candidate if candidate.is_absolute() else (path / candidate)
|
|
183
|
+
|
|
184
|
+
# Already downloaded?
|
|
185
|
+
if file_path.exists():
|
|
186
|
+
self.logger.info(f"Video already downloaded: {file_path.name}")
|
|
187
|
+
return file_path
|
|
188
|
+
|
|
189
|
+
self.logger.info(f"Downloading video: {file_path.name}")
|
|
190
|
+
|
|
191
|
+
dl_cmd = [
|
|
192
|
+
"yt-dlp",
|
|
193
|
+
"--no-playlist",
|
|
194
|
+
"--format", "best[height<=720]/best", # prefer <=720p; fallback best
|
|
195
|
+
"-o", "%(title)s.%(ext)s",
|
|
196
|
+
"-P", str(path),
|
|
197
|
+
url
|
|
198
|
+
]
|
|
199
|
+
self.logger.debug(f"Download command: {' '.join(dl_cmd)}")
|
|
200
|
+
subprocess.run(
|
|
201
|
+
dl_cmd,
|
|
202
|
+
check=True,
|
|
203
|
+
timeout=600,
|
|
204
|
+
stdout=subprocess.PIPE,
|
|
205
|
+
stderr=subprocess.PIPE
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
if not file_path.exists():
|
|
209
|
+
# Sometimes container chooses a different ext; re-probe actual filename in folder by id or title
|
|
210
|
+
# Simple fallback: pick the newest file in the dir
|
|
211
|
+
latest = max(path.glob(f"{file_path.stem}.*"), key=lambda p: p.stat().st_mtime, default=None)
|
|
212
|
+
if not latest:
|
|
213
|
+
raise ValueError(f"Downloaded file not found: {file_path}")
|
|
214
|
+
file_path = latest
|
|
215
|
+
|
|
216
|
+
self.logger.info(f"Successfully downloaded video: {file_path}")
|
|
217
|
+
return file_path
|
|
218
|
+
|
|
219
|
+
except subprocess.TimeoutExpired:
|
|
220
|
+
self.logger.error("Timeout downloading video with yt-dlp")
|
|
221
|
+
raise ValueError("Timeout downloading video")
|
|
222
|
+
except subprocess.CalledProcessError as e:
|
|
223
|
+
self.logger.error(f"Error downloading video with yt-dlp: {e}")
|
|
224
|
+
raise ValueError(f"Unable to Download Video: {e}")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
self.logger.error(f"Unexpected error in download_video: {e}")
|
|
227
|
+
raise ValueError(f"Unexpected error downloading video: {e}")
|
|
228
|
+
|
|
229
|
+
async def save_file_async(self, file_path: Path, content: Union[str, bytes]) -> None:
|
|
230
|
+
"""Async file saving utility."""
|
|
231
|
+
mode = 'wb' if isinstance(content, bytes) else 'w'
|
|
232
|
+
encoding = None if isinstance(content, bytes) else 'utf-8'
|
|
233
|
+
|
|
234
|
+
async with aiofiles.open(str(file_path), mode=mode, encoding=encoding) as f:
|
|
235
|
+
await f.write(content)
|
|
236
|
+
|
|
237
|
+
async def read_file_async(self, file_path: Path) -> str:
|
|
238
|
+
"""Async file reading utility."""
|
|
239
|
+
async with aiofiles.open(str(file_path), 'r', encoding='utf-8') as f:
|
|
240
|
+
return await f.read()
|
|
241
|
+
|
|
242
|
+
async def load_video(
|
|
243
|
+
self,
|
|
244
|
+
url: str,
|
|
245
|
+
video_title: str,
|
|
246
|
+
transcript: Optional[Union[str, None]] = None
|
|
247
|
+
) -> List[Document]:
|
|
248
|
+
"""
|
|
249
|
+
Async method to load video and create documents.
|
|
250
|
+
"""
|
|
251
|
+
# Get video metadata
|
|
252
|
+
video_info = self.get_video_info(url)
|
|
253
|
+
|
|
254
|
+
if transcript is None:
|
|
255
|
+
try:
|
|
256
|
+
documents = []
|
|
257
|
+
docs = []
|
|
258
|
+
|
|
259
|
+
# Download video
|
|
260
|
+
if self._download_video:
|
|
261
|
+
file_path = await asyncio.get_running_loop().run_in_executor(
|
|
262
|
+
None, self.download_video, url, self._video_path
|
|
263
|
+
)
|
|
264
|
+
audio_path = file_path.with_suffix('.wav')
|
|
265
|
+
# Extract audio
|
|
266
|
+
await asyncio.get_event_loop().run_in_executor(
|
|
267
|
+
None, self.extract_audio, file_path, audio_path
|
|
268
|
+
)
|
|
269
|
+
else:
|
|
270
|
+
# Download bestaudio → WAV (16k mono)
|
|
271
|
+
audio_path = await asyncio.get_running_loop().run_in_executor(
|
|
272
|
+
None, self.download_audio_wav, url, self._video_path
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
transcript_path = audio_path.with_suffix('.vtt')
|
|
276
|
+
|
|
277
|
+
# Get transcript using Whisper
|
|
278
|
+
transcript_whisper = await asyncio.get_event_loop().run_in_executor(
|
|
279
|
+
None, self.get_whisper_transcript, audio_path
|
|
280
|
+
)
|
|
281
|
+
if not transcript_whisper or not transcript_whisper.get('text'):
|
|
282
|
+
raise ValueError("Transcription failed or empty")
|
|
283
|
+
|
|
284
|
+
transcript_text = transcript_whisper['text']
|
|
285
|
+
|
|
286
|
+
# Generate summary
|
|
287
|
+
try:
|
|
288
|
+
summary = await self.summary_from_text(transcript_text)
|
|
289
|
+
except Exception:
|
|
290
|
+
summary = ''
|
|
291
|
+
|
|
292
|
+
# Metadata
|
|
293
|
+
base_metadata = {
|
|
294
|
+
"url": url,
|
|
295
|
+
"source": url,
|
|
296
|
+
"filename": video_title or video_info.get("title") or url,
|
|
297
|
+
"question": '',
|
|
298
|
+
"answer": '',
|
|
299
|
+
"source_type": self._source_type,
|
|
300
|
+
"type": "video_transcript",
|
|
301
|
+
"summary": f"{summary!s}",
|
|
302
|
+
"document_meta": {
|
|
303
|
+
"language": self._language,
|
|
304
|
+
"title": video_title or video_info.get("title") or url,
|
|
305
|
+
"docinfo": video_info,
|
|
306
|
+
},
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if self.topics:
|
|
310
|
+
base_metadata["document_meta"]['topic_tags'] = self.topics
|
|
311
|
+
|
|
312
|
+
# Create main transcript document
|
|
313
|
+
doc = Document(
|
|
314
|
+
page_content=transcript_text,
|
|
315
|
+
metadata=base_metadata.copy()
|
|
316
|
+
)
|
|
317
|
+
documents.append(doc)
|
|
318
|
+
|
|
319
|
+
# Create VTT document
|
|
320
|
+
vtt_content = self.transcript_to_vtt(transcript_whisper, transcript_path)
|
|
321
|
+
if vtt_content:
|
|
322
|
+
vtt_doc = Document(
|
|
323
|
+
page_content=vtt_content,
|
|
324
|
+
metadata=base_metadata.copy()
|
|
325
|
+
)
|
|
326
|
+
documents.append(vtt_doc)
|
|
327
|
+
|
|
328
|
+
# Create individual dialog chunk documents
|
|
329
|
+
dialogs = self.transcript_to_blocks(transcript_whisper)
|
|
330
|
+
for chunk in dialogs:
|
|
331
|
+
chunk_metadata = base_metadata.copy()
|
|
332
|
+
chunk_metadata["document_meta"].update({
|
|
333
|
+
"start": f"{chunk['start_time']}",
|
|
334
|
+
"end": f"{chunk['end_time']}",
|
|
335
|
+
"id": f"{chunk['id']}",
|
|
336
|
+
})
|
|
337
|
+
|
|
338
|
+
doc = Document(
|
|
339
|
+
page_content=chunk['text'],
|
|
340
|
+
metadata=chunk_metadata
|
|
341
|
+
)
|
|
342
|
+
docs.append(doc)
|
|
343
|
+
|
|
344
|
+
documents.extend(docs)
|
|
345
|
+
return documents
|
|
346
|
+
|
|
347
|
+
except Exception as e:
|
|
348
|
+
self.logger.warning(f"Error processing video {url}: {e}")
|
|
349
|
+
# Fallback to basic processing without chunks
|
|
350
|
+
return await self._fallback_processing(url, video_info)
|
|
351
|
+
|
|
352
|
+
else:
|
|
353
|
+
# Load transcript from file
|
|
354
|
+
if isinstance(transcript, (str, Path)):
|
|
355
|
+
transcript_content = await self.read_file_async(Path(transcript))
|
|
356
|
+
else:
|
|
357
|
+
transcript_content = transcript
|
|
358
|
+
|
|
359
|
+
if transcript_content:
|
|
360
|
+
try:
|
|
361
|
+
summary = await self.summary_from_text(transcript_content)
|
|
362
|
+
except Exception as e:
|
|
363
|
+
self.logger.warning(f"Error summarizing transcript for {url}: {e}")
|
|
364
|
+
summary = ''
|
|
365
|
+
|
|
366
|
+
metadata = {
|
|
367
|
+
"source": url,
|
|
368
|
+
"url": url,
|
|
369
|
+
"filename": video_title,
|
|
370
|
+
"question": '',
|
|
371
|
+
"answer": '',
|
|
372
|
+
"source_type": self._source_type,
|
|
373
|
+
'type': 'video_transcript',
|
|
374
|
+
'summary': f"{summary!s}",
|
|
375
|
+
"document_meta": {
|
|
376
|
+
"language": self._language,
|
|
377
|
+
"title": video_title
|
|
378
|
+
},
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
if self.topics:
|
|
382
|
+
metadata['document_meta']['topic_tags'] = self.topics
|
|
383
|
+
|
|
384
|
+
doc = Document(
|
|
385
|
+
page_content=transcript_content,
|
|
386
|
+
metadata=metadata
|
|
387
|
+
)
|
|
388
|
+
return [doc]
|
|
389
|
+
|
|
390
|
+
return []
|
|
391
|
+
|
|
392
|
+
async def _fallback_processing(self, url: str, video_info: dict) -> List[Document]:
|
|
393
|
+
try:
|
|
394
|
+
audio_path = await asyncio.get_running_loop().run_in_executor(
|
|
395
|
+
None, self.download_audio_wav, url, self._video_path
|
|
396
|
+
)
|
|
397
|
+
transcript_result = await asyncio.get_running_loop().run_in_executor(
|
|
398
|
+
None, self.get_whisper_transcript, audio_path
|
|
399
|
+
)
|
|
400
|
+
if not transcript_result:
|
|
401
|
+
self.logger.warning(f"Unable to load Youtube Video {url}")
|
|
402
|
+
return []
|
|
403
|
+
|
|
404
|
+
transcript_text = transcript_result['text']
|
|
405
|
+
try:
|
|
406
|
+
summary = await self.summary_from_text(transcript_text)
|
|
407
|
+
except Exception:
|
|
408
|
+
summary = ''
|
|
409
|
+
metadata = {
|
|
410
|
+
"source": url,
|
|
411
|
+
"url": url,
|
|
412
|
+
"source_type": self._source_type,
|
|
413
|
+
"summary": f"{summary!s}",
|
|
414
|
+
"filename": video_info.get('title', 'Unknown'),
|
|
415
|
+
"question": '',
|
|
416
|
+
"answer": '',
|
|
417
|
+
"type": "video_transcript",
|
|
418
|
+
"document_meta": video_info,
|
|
419
|
+
}
|
|
420
|
+
if self.topics:
|
|
421
|
+
metadata['document_meta']['topic_tags'] = self.topics
|
|
422
|
+
|
|
423
|
+
return [Document(page_content=transcript_text, metadata=metadata)]
|
|
424
|
+
except Exception as e:
|
|
425
|
+
self.logger.error(f"Fallback processing failed for {url}: {e}")
|
|
426
|
+
return []
|
|
427
|
+
|
|
428
|
+
async def extract_video(self, url: str) -> dict:
|
|
429
|
+
"""
|
|
430
|
+
Extract video and return metadata with file paths.
|
|
431
|
+
"""
|
|
432
|
+
# Get video metadata
|
|
433
|
+
video_info = self.get_video_info(url)
|
|
434
|
+
|
|
435
|
+
# Download video
|
|
436
|
+
file_path = self.download_video(url, self._video_path)
|
|
437
|
+
audio_path = file_path.with_suffix('.wav')
|
|
438
|
+
transcript_path = file_path.with_suffix('.txt')
|
|
439
|
+
vtt_path = file_path.with_suffix('.vtt')
|
|
440
|
+
summary_path = file_path.with_suffix('.summary')
|
|
441
|
+
|
|
442
|
+
# Extract audio
|
|
443
|
+
await asyncio.get_event_loop().run_in_executor(
|
|
444
|
+
None, self.extract_audio, file_path, audio_path
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
# Get transcript
|
|
448
|
+
transcript_whisper = await asyncio.get_event_loop().run_in_executor(
|
|
449
|
+
None, self.get_whisper_transcript, audio_path
|
|
450
|
+
)
|
|
451
|
+
transcript_text = transcript_whisper['text']
|
|
452
|
+
|
|
453
|
+
# Generate summary
|
|
454
|
+
try:
|
|
455
|
+
summary = await self.summary_from_text(transcript_text)
|
|
456
|
+
await self.save_file_async(summary_path, summary.encode('utf-8'))
|
|
457
|
+
except Exception:
|
|
458
|
+
summary = ''
|
|
459
|
+
|
|
460
|
+
# Create VTT format
|
|
461
|
+
vtt_content = self.transcript_to_vtt(transcript_whisper, vtt_path)
|
|
462
|
+
|
|
463
|
+
# Save transcript
|
|
464
|
+
await self.save_file_async(transcript_path, transcript_text.encode('utf-8'))
|
|
465
|
+
|
|
466
|
+
# Create metadata
|
|
467
|
+
metadata = {
|
|
468
|
+
"url": f"{url}",
|
|
469
|
+
"source": f"{url}",
|
|
470
|
+
"source_type": self._source_type,
|
|
471
|
+
'type': 'video_transcript',
|
|
472
|
+
"summary": f"{summary!s}",
|
|
473
|
+
"video_info": video_info,
|
|
474
|
+
"transcript": transcript_path,
|
|
475
|
+
"summary_file": summary_path,
|
|
476
|
+
"vtt": vtt_path,
|
|
477
|
+
"audio": audio_path,
|
|
478
|
+
"video": file_path
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
return metadata
|
|
482
|
+
|
|
483
|
+
async def extract(self) -> List[dict]:
|
|
484
|
+
"""
|
|
485
|
+
Extract all videos and return metadata.
|
|
486
|
+
"""
|
|
487
|
+
documents = []
|
|
488
|
+
tasks = []
|
|
489
|
+
|
|
490
|
+
# Create async tasks for all URLs
|
|
491
|
+
for url in self.urls:
|
|
492
|
+
task = self.extract_video(url)
|
|
493
|
+
tasks.append(task)
|
|
494
|
+
|
|
495
|
+
# Run all extractions concurrently
|
|
496
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
497
|
+
|
|
498
|
+
for result in results:
|
|
499
|
+
if isinstance(result, Exception):
|
|
500
|
+
self.logger.error(f"Error extracting video: {result}")
|
|
501
|
+
else:
|
|
502
|
+
documents.append(result)
|
|
503
|
+
|
|
504
|
+
return documents
|