ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
|
@@ -0,0 +1,1435 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import asyncio
|
|
4
|
+
from typing import List, Optional, Union, Dict, Any
|
|
5
|
+
import contextlib
|
|
6
|
+
from pathlib import Path, PurePath
|
|
7
|
+
from datetime import datetime, timedelta, timezone
|
|
8
|
+
from urllib.parse import urlparse, quote, unquote
|
|
9
|
+
import aiofiles
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
import httpx
|
|
12
|
+
import aiohttp
|
|
13
|
+
# Microsoft Graph SDK imports (replacing office365-rest-python-client)
|
|
14
|
+
from msgraph.generated.models.subscription import Subscription
|
|
15
|
+
from msgraph.generated.models.drive_item import DriveItem
|
|
16
|
+
from msgraph.generated.models.folder import Folder
|
|
17
|
+
from msgraph.generated.models.file import File
|
|
18
|
+
from msgraph.generated.models.upload_session import UploadSession
|
|
19
|
+
from msgraph.generated.drives.item.items.item.create_upload_session.create_upload_session_post_request_body import (
|
|
20
|
+
CreateUploadSessionPostRequestBody
|
|
21
|
+
)
|
|
22
|
+
from msgraph.generated.models.drive_item_uploadable_properties import DriveItemUploadableProperties
|
|
23
|
+
from .o365 import O365Client
|
|
24
|
+
from ..conf import (
|
|
25
|
+
SHAREPOINT_APP_ID,
|
|
26
|
+
SHAREPOINT_APP_SECRET,
|
|
27
|
+
SHAREPOINT_TENANT_ID,
|
|
28
|
+
SHAREPOINT_TENANT_NAME
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SharepointClient(O365Client):
|
|
33
|
+
"""
|
|
34
|
+
SharePoint Client - Migrated to Microsoft Graph SDK
|
|
35
|
+
|
|
36
|
+
Uses Microsoft Graph SDK for all SharePoint operations.
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, *args, **kwargs):
|
|
41
|
+
super().__init__(*args, **kwargs)
|
|
42
|
+
|
|
43
|
+
# Default credentials for SharePoint-specific config
|
|
44
|
+
self._default_tenant_id = SHAREPOINT_TENANT_ID
|
|
45
|
+
self._default_client_id = SHAREPOINT_APP_ID
|
|
46
|
+
self._default_client_secret = SHAREPOINT_APP_SECRET
|
|
47
|
+
self._default_tenant_name = SHAREPOINT_TENANT_NAME
|
|
48
|
+
|
|
49
|
+
# SharePoint-specific properties
|
|
50
|
+
self.directory: Optional[str] = None
|
|
51
|
+
self.filename: Optional[str] = None
|
|
52
|
+
self._srcfiles: List = []
|
|
53
|
+
self._destination: List = []
|
|
54
|
+
|
|
55
|
+
# Upload settings
|
|
56
|
+
self.small_file_threshold = 4 * 1024 * 1024 # 4 MB
|
|
57
|
+
self.chunk_size = 10 * 1024 * 1024 # 10 MB
|
|
58
|
+
|
|
59
|
+
# Cached SharePoint objects
|
|
60
|
+
self._site_id: Optional[str] = None
|
|
61
|
+
self._drive_id: Optional[str] = None
|
|
62
|
+
self._site_info: Optional[DriveItem] = None
|
|
63
|
+
self._drive_info: Optional[DriveItem] = None
|
|
64
|
+
|
|
65
|
+
def get_context(self, url: str, *args):
|
|
66
|
+
"""
|
|
67
|
+
Backwards compatibility method.
|
|
68
|
+
Returns the Graph client instead of office365 context.
|
|
69
|
+
"""
|
|
70
|
+
return self.graph_client
|
|
71
|
+
|
|
72
|
+
def _start_(self, **kwargs):
|
|
73
|
+
"""Initialize SharePoint-specific configuration."""
|
|
74
|
+
# Process URL and site information
|
|
75
|
+
site = f"sites/{self.site}/" if self.site is not None else ""
|
|
76
|
+
self.site_url = f"https://{self.tenant}.sharepoint.com"
|
|
77
|
+
self.url = f"{self.site_url}/{site}".rstrip('/')
|
|
78
|
+
self.logger.info(
|
|
79
|
+
f"SharePoint target: {self.url}"
|
|
80
|
+
)
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
def connection(self):
|
|
84
|
+
"""
|
|
85
|
+
Establish SharePoint connection using the migrated O365Client.
|
|
86
|
+
|
|
87
|
+
This replaces the old office365-rest-python-client authentication
|
|
88
|
+
with Microsoft Graph SDK authentication.
|
|
89
|
+
"""
|
|
90
|
+
# Use the parent O365Client connection method
|
|
91
|
+
super().connection()
|
|
92
|
+
|
|
93
|
+
self.logger.info("SharePoint connection established successfully")
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
async def verify_sharepoint_access(self):
|
|
97
|
+
"""Verify SharePoint-specific access and cache site/drive info."""
|
|
98
|
+
try:
|
|
99
|
+
# Resolve and cache site info
|
|
100
|
+
self._site_info = await self._resolve_site()
|
|
101
|
+
self.logger.info(
|
|
102
|
+
f"SharePoint site accessible: {self._site_info.display_name}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Update the URL if sub-site was detected
|
|
106
|
+
if hasattr(self, '_site_info') and self._site_info:
|
|
107
|
+
# Reconstruct URL based on actual site used
|
|
108
|
+
actual_site_path = self._site_info.web_url.split('/sites/')[-1] if self._site_info.web_url else self.site
|
|
109
|
+
self.url = f"https://{self.tenant}.sharepoint.com/sites/{actual_site_path}"
|
|
110
|
+
self.logger.debug(f"Updated SharePoint URL: {self.url}")
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
self.logger.error(
|
|
114
|
+
f"SharePoint access verification failed: {e}"
|
|
115
|
+
)
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
f"SharePoint access verification failed: {e}"
|
|
118
|
+
) from e
|
|
119
|
+
|
|
120
|
+
async def _detect_and_resolve_subsite(self) -> tuple[str, str]:
|
|
121
|
+
"""
|
|
122
|
+
Detect if the first part of the directory path is a sub-site.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
tuple: (actual_site_to_use, cleaned_directory_path)
|
|
126
|
+
"""
|
|
127
|
+
# Get the directory from _srcfiles
|
|
128
|
+
if not hasattr(self, '_srcfiles') or not self._srcfiles:
|
|
129
|
+
return self.site, ""
|
|
130
|
+
|
|
131
|
+
first_file = self._srcfiles[0]
|
|
132
|
+
directory_raw = first_file.get('directory', '') if isinstance(first_file, dict) else ''
|
|
133
|
+
|
|
134
|
+
if not directory_raw:
|
|
135
|
+
return self.site, ""
|
|
136
|
+
|
|
137
|
+
directory = directory_raw.replace("\\", "/").strip().strip("/")
|
|
138
|
+
if not directory:
|
|
139
|
+
return self.site, ""
|
|
140
|
+
|
|
141
|
+
parts = directory.split("/")
|
|
142
|
+
potential_subsite = parts[0]
|
|
143
|
+
remaining_path = "/".join(parts[1:]) if len(parts) > 1 else ""
|
|
144
|
+
|
|
145
|
+
# Try to access the potential sub-site
|
|
146
|
+
with contextlib.suppress(Exception):
|
|
147
|
+
subsite_path = f"{self.site}/{potential_subsite}"
|
|
148
|
+
site_identifier = f"{self.tenant}.sharepoint.com:/sites/{subsite_path}"
|
|
149
|
+
|
|
150
|
+
self.logger.debug(f"Testing potential sub-site: {site_identifier}")
|
|
151
|
+
|
|
152
|
+
# Try to access the sub-site
|
|
153
|
+
site = await self.graph_client.sites.by_site_id(site_identifier).get()
|
|
154
|
+
|
|
155
|
+
if site and site.id:
|
|
156
|
+
self.logger.info(f"Detected sub-site: {potential_subsite}")
|
|
157
|
+
|
|
158
|
+
# Update all _srcfiles to remove the sub-site part from directory
|
|
159
|
+
for file_spec in self._srcfiles:
|
|
160
|
+
if isinstance(file_spec, dict) and 'directory' in file_spec:
|
|
161
|
+
old_dir = file_spec['directory']
|
|
162
|
+
# Remove the sub-site part
|
|
163
|
+
clean_parts = old_dir.replace("\\", "/").strip().strip("/").split("/")
|
|
164
|
+
if len(clean_parts) > 1 and clean_parts[0] == potential_subsite:
|
|
165
|
+
new_dir = "/".join(clean_parts[1:])
|
|
166
|
+
file_spec['directory'] = new_dir
|
|
167
|
+
self.logger.debug(f"Updated directory: '{old_dir}' → '{new_dir}'")
|
|
168
|
+
|
|
169
|
+
return subsite_path, remaining_path
|
|
170
|
+
|
|
171
|
+
# Not a sub-site, return original
|
|
172
|
+
return self.site, directory
|
|
173
|
+
|
|
174
|
+
async def _resolve_site(self) -> DriveItem:
|
|
175
|
+
"""Resolve SharePoint site using Graph API with auto sub-site detection."""
|
|
176
|
+
if self._site_info:
|
|
177
|
+
return self._site_info
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
# Detect if we need to use a sub-site
|
|
181
|
+
actual_site, _ = await self._detect_and_resolve_subsite()
|
|
182
|
+
|
|
183
|
+
site_path = f"/sites/{actual_site}" if actual_site else ""
|
|
184
|
+
site_identifier = f"{self.tenant}.sharepoint.com:{site_path}"
|
|
185
|
+
|
|
186
|
+
self.logger.debug(
|
|
187
|
+
f"Resolving site: {site_identifier}"
|
|
188
|
+
)
|
|
189
|
+
site = await self.graph_client.sites.by_site_id(site_identifier).get()
|
|
190
|
+
|
|
191
|
+
if site and site.id:
|
|
192
|
+
self._site_id = site.id
|
|
193
|
+
self._site_info = site
|
|
194
|
+
self.logger.info(
|
|
195
|
+
f"Site resolved: {site.display_name}"
|
|
196
|
+
)
|
|
197
|
+
return site
|
|
198
|
+
else:
|
|
199
|
+
raise RuntimeError(
|
|
200
|
+
f"Could not resolve SharePoint site: {site_identifier}"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
raise RuntimeError(
|
|
205
|
+
f"Failed to resolve SharePoint site: {e}"
|
|
206
|
+
) from e
|
|
207
|
+
|
|
208
|
+
def _parse_directory_path(self, directory: str) -> tuple[str, str]:
|
|
209
|
+
"""
|
|
210
|
+
Parse directory path to extract library name and folder path.
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
- "troc/Project Management/Epson/Store and Product MSL"
|
|
214
|
+
→ library: "troc", path: "Project Management/Epson/Store and Product MSL"
|
|
215
|
+
- "Shared Documents/Stores/"
|
|
216
|
+
→ library: "Shared Documents", path: "Stores"
|
|
217
|
+
- "Documents/folder/subfolder"
|
|
218
|
+
→ library: "Documents", path: "folder/subfolder"
|
|
219
|
+
"""
|
|
220
|
+
if not directory:
|
|
221
|
+
return "Documents", "" # Default library
|
|
222
|
+
|
|
223
|
+
directory = directory.replace("\\", "/").strip().strip("/")
|
|
224
|
+
if not directory:
|
|
225
|
+
return "Documents", ""
|
|
226
|
+
|
|
227
|
+
parts = directory.split("/")
|
|
228
|
+
|
|
229
|
+
# First part is the library name
|
|
230
|
+
library_name = parts[0]
|
|
231
|
+
# Rest is the path within that library
|
|
232
|
+
path_within_library = "/".join(parts[1:]) if len(parts) > 1 else ""
|
|
233
|
+
|
|
234
|
+
self.logger.debug(
|
|
235
|
+
f"Parsed directory '{directory}' → library: '{library_name}', path: '{path_within_library}'"
|
|
236
|
+
)
|
|
237
|
+
if library_name.lower() == "shared documents":
|
|
238
|
+
library_name = "Documents"
|
|
239
|
+
|
|
240
|
+
return library_name, path_within_library
|
|
241
|
+
|
|
242
|
+
async def _resolve_drive(self, library_name: str = None) -> DriveItem:
|
|
243
|
+
"""Resolve document library drive using Graph API with dynamic library name."""
|
|
244
|
+
if self._drive_info and not library_name:
|
|
245
|
+
return self._drive_info
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
site_info = await self._resolve_site()
|
|
249
|
+
drives = await self.graph_client.sites.by_site_id(site_info.id).drives.get()
|
|
250
|
+
if drives and drives.value:
|
|
251
|
+
self.logger.debug(
|
|
252
|
+
f"Available libraries: {[d.name for d in drives.value]}"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# If library_name specified, try to find it
|
|
256
|
+
if library_name:
|
|
257
|
+
for drive in drives.value:
|
|
258
|
+
if drive.name.lower() == library_name.lower(): # Case insensitive match
|
|
259
|
+
self.logger.info(f"Found library: {drive.name}")
|
|
260
|
+
# Don't cache if we're doing a specific lookup
|
|
261
|
+
return drive
|
|
262
|
+
|
|
263
|
+
# Library not found by name, log available options
|
|
264
|
+
available_names = [d.name for d in drives.value]
|
|
265
|
+
self.logger.warning(
|
|
266
|
+
f"Library '{library_name}' not found. Available: {available_names}"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Try common name mappings
|
|
270
|
+
if library_name.lower() == "shared documents":
|
|
271
|
+
for drive in drives.value:
|
|
272
|
+
if drive.name.lower() in ["documents", "shared documents"]:
|
|
273
|
+
self.logger.info(f"Using '{drive.name}' for 'Shared Documents'")
|
|
274
|
+
return drive
|
|
275
|
+
|
|
276
|
+
raise RuntimeError(
|
|
277
|
+
f"Library '{library_name}' not found. Available: {available_names}"
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# No specific library requested, use cached or default
|
|
281
|
+
if self._drive_info:
|
|
282
|
+
return self._drive_info
|
|
283
|
+
|
|
284
|
+
# Default to first drive and cache it
|
|
285
|
+
default_drive = drives.value[0]
|
|
286
|
+
self._drive_id = default_drive.id
|
|
287
|
+
self._drive_info = default_drive
|
|
288
|
+
self.logger.info(f"Using default library: {default_drive.name}")
|
|
289
|
+
return default_drive
|
|
290
|
+
|
|
291
|
+
raise RuntimeError(
|
|
292
|
+
f"No document libraries found in site: {site_info.display_name}"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
except Exception as e:
|
|
296
|
+
raise RuntimeError(f"Failed to resolve document library: {e}") from e
|
|
297
|
+
|
|
298
|
+
async def _ensure_folder(self, folder_path: str, create: bool = True, drive_id: str = None) -> DriveItem:
|
|
299
|
+
"""Ensure folder exists using Graph API, optionally in a specific library."""
|
|
300
|
+
|
|
301
|
+
# If no drive_id specified, get the default drive
|
|
302
|
+
if not drive_id:
|
|
303
|
+
drive_info = await self._resolve_drive()
|
|
304
|
+
drive_id = drive_info.id
|
|
305
|
+
|
|
306
|
+
folder_path = (folder_path or "").strip("/")
|
|
307
|
+
if not folder_path:
|
|
308
|
+
# Return root folder of the specified drive
|
|
309
|
+
root = await self.graph_client.drives.by_drive_id(drive_id).root.get()
|
|
310
|
+
return root
|
|
311
|
+
|
|
312
|
+
# Try to resolve existing folder
|
|
313
|
+
try:
|
|
314
|
+
folder_item = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
315
|
+
.items.by_drive_item_id(f"root:/{folder_path}:").get()
|
|
316
|
+
if folder_item:
|
|
317
|
+
return folder_item
|
|
318
|
+
except Exception:
|
|
319
|
+
if not create:
|
|
320
|
+
raise
|
|
321
|
+
|
|
322
|
+
# Create folder recursively
|
|
323
|
+
root = await self.graph_client.drives.by_drive_id(drive_id).root.get()
|
|
324
|
+
parent_id = root.id
|
|
325
|
+
|
|
326
|
+
for segment in [s for s in folder_path.split("/") if s]:
|
|
327
|
+
# Check if segment already exists
|
|
328
|
+
children = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
329
|
+
.items.by_drive_item_id(parent_id).children.get()
|
|
330
|
+
|
|
331
|
+
existing_folder = None
|
|
332
|
+
if children and children.value:
|
|
333
|
+
for child in children.value:
|
|
334
|
+
if child.name == segment and child.folder:
|
|
335
|
+
existing_folder = child
|
|
336
|
+
break
|
|
337
|
+
|
|
338
|
+
if existing_folder:
|
|
339
|
+
parent_id = existing_folder.id
|
|
340
|
+
continue
|
|
341
|
+
|
|
342
|
+
# Create new folder
|
|
343
|
+
new_folder = DriveItem()
|
|
344
|
+
new_folder.name = segment
|
|
345
|
+
new_folder.folder = Folder()
|
|
346
|
+
new_folder.additional_data = {
|
|
347
|
+
"@microsoft.graph.conflictBehavior": "replace"
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
created = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
351
|
+
.items.by_drive_item_id(parent_id).children.post(new_folder)
|
|
352
|
+
parent_id = created.id
|
|
353
|
+
self.logger.info(f"Created folder: {segment}")
|
|
354
|
+
|
|
355
|
+
# Return the final folder
|
|
356
|
+
return await self.graph_client.drives.by_drive_id(drive_id)\
|
|
357
|
+
.items.by_drive_item_id(parent_id).get()
|
|
358
|
+
|
|
359
|
+
async def _build_full_path(self, drive_id: str, parent_id: str, filename: str) -> str:
|
|
360
|
+
"""Return path relative to drive root: e.g. 'Shared Documents/Sub/Folder/output.pptx'."""
|
|
361
|
+
parent = await self.graph_client.drives.by_drive_id(drive_id).items.by_drive_item_id(parent_id).get()
|
|
362
|
+
base = (parent.parent_reference.path or "") # e.g. '/drives/{driveId}/root:/Shared Documents/Sub/Folder'
|
|
363
|
+
# Strip the "/drives/{id}/root:" prefix to make it drive-root relative
|
|
364
|
+
marker = "/root:"
|
|
365
|
+
idx = base.find(marker)
|
|
366
|
+
if idx != -1:
|
|
367
|
+
base = base[idx + len(marker):]
|
|
368
|
+
base = base.strip("/")
|
|
369
|
+
# Path already points to the parent folder itself, so just append the filename
|
|
370
|
+
return f"{base}/{filename}".strip("/")
|
|
371
|
+
|
|
372
|
+
async def _upload_small_file(self, drive_id, parent_id, local_path, target_name):
|
|
373
|
+
try:
|
|
374
|
+
async with aiofiles.open(local_path, "rb") as f:
|
|
375
|
+
content = await f.read()
|
|
376
|
+
|
|
377
|
+
# URL encode the target name to handle special characters
|
|
378
|
+
encoded_name = quote(target_name)
|
|
379
|
+
|
|
380
|
+
# Use the direct content upload endpoint for small files with conflict behavior
|
|
381
|
+
# PUT /drives/{driveId}/items/{parentId}:/{filename}:/content?@microsoft.graph.conflictBehavior=replace
|
|
382
|
+
request_path = f"{parent_id}:/{encoded_name}:"
|
|
383
|
+
|
|
384
|
+
# The Graph SDK may not support query parameters directly on the content endpoint
|
|
385
|
+
# So we use the basic path and let the SDK handle the upload
|
|
386
|
+
return await self.graph_client.drives.by_drive_id(drive_id).items.by_drive_item_id(request_path).content.put(content) # noqa
|
|
387
|
+
except Exception as e:
|
|
388
|
+
raise RuntimeError(f"Small file upload failed for {target_name}: {e}") from e
|
|
389
|
+
|
|
390
|
+
async def _create_upload_session(self, drive_id: str, parent_id: str, target_name: str) -> UploadSession:
|
|
391
|
+
try:
|
|
392
|
+
body = CreateUploadSessionPostRequestBody()
|
|
393
|
+
body.item = DriveItemUploadableProperties()
|
|
394
|
+
# no "name" here; filename is in the URL
|
|
395
|
+
body.item.additional_data = {"@microsoft.graph.conflictBehavior": "replace"}
|
|
396
|
+
|
|
397
|
+
# URL encode the target name to handle special characters
|
|
398
|
+
encoded_name = quote(target_name)
|
|
399
|
+
|
|
400
|
+
# POST /drives/{driveId}/items/{parentId}:/{fileName}:/createUploadSession
|
|
401
|
+
return await self.graph_client.drives.by_drive_id(drive_id)\
|
|
402
|
+
.items.by_drive_item_id(f"{parent_id}:/{encoded_name}:/")\
|
|
403
|
+
.create_upload_session.post(body)
|
|
404
|
+
|
|
405
|
+
except Exception as e:
|
|
406
|
+
raise RuntimeError(f"Upload session creation failed for {target_name}: {e}") from e
|
|
407
|
+
|
|
408
|
+
async def _upload_large_file(
|
|
409
|
+
self,
|
|
410
|
+
upload_session: UploadSession,
|
|
411
|
+
local_path: Union[str, Path]
|
|
412
|
+
) -> DriveItem:
|
|
413
|
+
"""Upload large file using resumable upload session."""
|
|
414
|
+
|
|
415
|
+
file_size = os.path.getsize(local_path)
|
|
416
|
+
uploaded = 0
|
|
417
|
+
async with aiohttp.ClientSession() as session:
|
|
418
|
+
async with aiofiles.open(local_path, "rb") as f:
|
|
419
|
+
with tqdm(total=file_size, unit='B', unit_scale=True, desc=f'Uploading {Path(local_path).name}') as pbar: # noqa
|
|
420
|
+
while uploaded < file_size:
|
|
421
|
+
chunk = await f.read(self.chunk_size)
|
|
422
|
+
if not chunk:
|
|
423
|
+
break
|
|
424
|
+
|
|
425
|
+
start = uploaded
|
|
426
|
+
end = uploaded + len(chunk) - 1
|
|
427
|
+
|
|
428
|
+
headers = {
|
|
429
|
+
"Content-Length": str(len(chunk)),
|
|
430
|
+
"Content-Range": f"bytes {start}-{end}/{file_size}"
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
async with session.put(
|
|
434
|
+
upload_session.upload_url,
|
|
435
|
+
headers=headers,
|
|
436
|
+
data=chunk
|
|
437
|
+
) as response:
|
|
438
|
+
if response.status in (200, 201):
|
|
439
|
+
# Upload complete
|
|
440
|
+
pbar.update(file_size - uploaded)
|
|
441
|
+
result_data = await response.json()
|
|
442
|
+
|
|
443
|
+
# Convert to DriveItem (simplified)
|
|
444
|
+
drive_item = DriveItem()
|
|
445
|
+
drive_item.name = result_data.get('name')
|
|
446
|
+
drive_item.size = result_data.get('size')
|
|
447
|
+
drive_item.web_url = result_data.get('webUrl')
|
|
448
|
+
drive_item.additional_data = result_data
|
|
449
|
+
|
|
450
|
+
return drive_item
|
|
451
|
+
|
|
452
|
+
elif response.status == 202:
|
|
453
|
+
# Continue uploading
|
|
454
|
+
uploaded = end + 1
|
|
455
|
+
pbar.update(len(chunk))
|
|
456
|
+
|
|
457
|
+
# Check for retry-after header
|
|
458
|
+
if (retry_after := response.headers.get('Retry-After')):
|
|
459
|
+
await asyncio.sleep(int(retry_after))
|
|
460
|
+
continue
|
|
461
|
+
|
|
462
|
+
else:
|
|
463
|
+
error_text = await response.text()
|
|
464
|
+
raise RuntimeError(
|
|
465
|
+
f"Chunk upload failed: {response.status} {error_text}"
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
raise RuntimeError(
|
|
469
|
+
"Upload session completed without final item response"
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
def _normalize_directory(self, directory: str, drive_info) -> str:
|
|
473
|
+
"""
|
|
474
|
+
Normalize a SharePoint directory path so it is **relative to the drive root**.
|
|
475
|
+
|
|
476
|
+
Accepts inputs like:
|
|
477
|
+
- "Project Management/Epson/Store and Product MSL"
|
|
478
|
+
- "Documents/Project Management/Epson/Store and Product MSL"
|
|
479
|
+
- "Shared Documents/Project Management/..."
|
|
480
|
+
- "sites/<site>/Shared Documents/Project Management/..."
|
|
481
|
+
- "troc/Documents/Project Management/..." # stray tenant/site prefix
|
|
482
|
+
- "/drives/<id>/root:/Project Management/... # SDK-style path"
|
|
483
|
+
- "/drive/root:/Project Management/..."
|
|
484
|
+
|
|
485
|
+
Returns a drive-root relative string such as:
|
|
486
|
+
"Project Management/Epson/Store and Product MSL" (or "" for library root)
|
|
487
|
+
"""
|
|
488
|
+
if not directory:
|
|
489
|
+
return ""
|
|
490
|
+
|
|
491
|
+
p = directory.replace("\\", "/").strip()
|
|
492
|
+
if not p:
|
|
493
|
+
return ""
|
|
494
|
+
|
|
495
|
+
self.logger.debug(f"Normalizing directory: '{directory}' -> initial clean: '{p}'")
|
|
496
|
+
|
|
497
|
+
# If path contains a /root: prefix, strip everything up to it.
|
|
498
|
+
if "/root:" in p:
|
|
499
|
+
p = p.split("/root:", 1)[1]
|
|
500
|
+
|
|
501
|
+
# Strip any leading "root:" marker and leading/trailing slashes
|
|
502
|
+
p = p.lstrip("root:").strip("/")
|
|
503
|
+
|
|
504
|
+
# Split into parts and work in lower for comparisons
|
|
505
|
+
parts = [seg for seg in p.split("/") if seg]
|
|
506
|
+
lower = [seg.lower() for seg in parts]
|
|
507
|
+
|
|
508
|
+
# Helper: return remainder after a given index
|
|
509
|
+
def after(idx: int) -> str:
|
|
510
|
+
return "/".join(parts[idx + 1:])
|
|
511
|
+
|
|
512
|
+
# 1) If the path contains a document library segment, keep only what's after it
|
|
513
|
+
for i, seg in enumerate(lower):
|
|
514
|
+
if seg in ("shared documents", "documents"):
|
|
515
|
+
normalized = after(i)
|
|
516
|
+
self.logger.debug(f"Removed library segment '{parts[i]}', remaining: '{normalized}'")
|
|
517
|
+
self.logger.debug(f"Final normalized directory: '{directory}' -> '{normalized}'")
|
|
518
|
+
return normalized
|
|
519
|
+
|
|
520
|
+
# 2) If it starts with 'sites/<sitename>/*', drop those two segments, then
|
|
521
|
+
# drop a leading library name if it immediately follows.
|
|
522
|
+
if len(lower) >= 2 and lower[0] == "sites":
|
|
523
|
+
parts = parts[2:]
|
|
524
|
+
lower = lower[2:]
|
|
525
|
+
if parts:
|
|
526
|
+
if lower and lower[0] in ("shared documents", "documents"):
|
|
527
|
+
parts = parts[1:]
|
|
528
|
+
normalized = "/".join(parts)
|
|
529
|
+
self.logger.debug(f"Removed '/sites/<site>/' prefix, remaining: '{normalized}'")
|
|
530
|
+
self.logger.debug(f"Final normalized directory: '{directory}' -> '{normalized}'")
|
|
531
|
+
return normalized
|
|
532
|
+
|
|
533
|
+
# 3) If the first segment equals site/tenant name (stray prefix), drop it and retry library removal
|
|
534
|
+
stray_prefixes = set()
|
|
535
|
+
if getattr(self, "site", None):
|
|
536
|
+
stray_prefixes.add(str(self.site).strip("/").lower())
|
|
537
|
+
if getattr(self, "tenant", None):
|
|
538
|
+
stray_prefixes.add(str(self.tenant).strip("/").lower())
|
|
539
|
+
|
|
540
|
+
if lower and lower[0] in stray_prefixes:
|
|
541
|
+
parts = parts[1:]
|
|
542
|
+
lower = lower[1:]
|
|
543
|
+
# If next is a library name, drop it as well
|
|
544
|
+
if lower and lower[0] in ("shared documents", "documents"):
|
|
545
|
+
parts = parts[1:]
|
|
546
|
+
normalized = "/".join(parts)
|
|
547
|
+
self.logger.debug(f"Removed stray site/tenant prefix, remaining: '{normalized}'")
|
|
548
|
+
self.logger.debug(f"Final normalized directory: '{directory}' -> '{normalized}'")
|
|
549
|
+
return normalized
|
|
550
|
+
|
|
551
|
+
# Otherwise assume it's already drive-root relative
|
|
552
|
+
normalized = "/".join(parts)
|
|
553
|
+
self.logger.debug(f"Final normalized directory: '{directory}' -> '{normalized}'")
|
|
554
|
+
return normalized
|
|
555
|
+
|
|
556
|
+
def _to_colon_id(self, directory: str, name: str) -> str:
|
|
557
|
+
"""
|
|
558
|
+
Build a root-based colon id with URL-encoded segments:
|
|
559
|
+
"root:/dir1/dir2/file.ext:"
|
|
560
|
+
"""
|
|
561
|
+
dir_clean = "/".join(quote(seg, safe="") for seg in (directory.strip("/").split("/") if directory else []))
|
|
562
|
+
name_enc = quote(name, safe="")
|
|
563
|
+
return f"root:/{dir_clean}/{name_enc}:" if dir_clean else f"root:/{name_enc}:"
|
|
564
|
+
|
|
565
|
+
async def upload_files(
|
|
566
|
+
self,
|
|
567
|
+
filenames: Optional[List[Union[Path, PurePath, str]]] = None,
|
|
568
|
+
destination: Optional[str] = None,
|
|
569
|
+
destination_filenames: Optional[List[str]] = None,
|
|
570
|
+
) -> List[Dict[str, Any]]:
|
|
571
|
+
"""
|
|
572
|
+
Upload files to SharePoint using Microsoft Graph API.
|
|
573
|
+
|
|
574
|
+
This replaces the old office365-rest-python-client upload method.
|
|
575
|
+
"""
|
|
576
|
+
if not filenames:
|
|
577
|
+
filenames = getattr(self, '_srcfiles', [])
|
|
578
|
+
|
|
579
|
+
target_folder = destination or getattr(self, 'directory', '')
|
|
580
|
+
# Again: Validate destination names (if provided)
|
|
581
|
+
if destination_filenames is not None and len(destination_filenames) != len(filenames):
|
|
582
|
+
raise RuntimeError(
|
|
583
|
+
"destination_filenames length must match filenames length"
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
# Parse the directory to extract library and path (same as file_search and file_lookup)
|
|
587
|
+
library_name, path_within_library = self._parse_directory_path(target_folder)
|
|
588
|
+
|
|
589
|
+
# Get the specific library
|
|
590
|
+
try:
|
|
591
|
+
drive_info = await self._resolve_drive(library_name)
|
|
592
|
+
self.logger.debug(f"Using library: {drive_info.name} (ID: {drive_info.id})")
|
|
593
|
+
except Exception as e:
|
|
594
|
+
self.logger.error(f"Failed to access library '{library_name}': {e}")
|
|
595
|
+
# Fall back to default library
|
|
596
|
+
drive_info = await self._resolve_drive()
|
|
597
|
+
|
|
598
|
+
# Ensure target folder exists using the path within the library
|
|
599
|
+
folder_info = await self._ensure_folder(path_within_library, create=True, drive_id=drive_info.id)
|
|
600
|
+
|
|
601
|
+
results: List[Dict[str, Any]] = []
|
|
602
|
+
|
|
603
|
+
target_folder = path_within_library or '/'
|
|
604
|
+
|
|
605
|
+
for idx, file_path in enumerate(filenames):
|
|
606
|
+
file_path = Path(file_path)
|
|
607
|
+
|
|
608
|
+
if not file_path.exists():
|
|
609
|
+
self.logger.error(f"❌ File not found: {file_path}")
|
|
610
|
+
continue
|
|
611
|
+
|
|
612
|
+
# Desired name in SharePoint (rename)
|
|
613
|
+
target_name = (
|
|
614
|
+
destination_filenames[idx] if destination_filenames else file_path.name
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
try:
|
|
618
|
+
file_size = file_path.stat().st_size
|
|
619
|
+
self.logger.notice(
|
|
620
|
+
f"Uploading {file_path.name} → {target_name} "
|
|
621
|
+
f"to '{target_folder}' ({file_size:,} bytes)"
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
if file_size <= self.small_file_threshold:
|
|
625
|
+
# Small file upload
|
|
626
|
+
result = await self._upload_small_file(
|
|
627
|
+
drive_info.id,
|
|
628
|
+
folder_info.id,
|
|
629
|
+
file_path,
|
|
630
|
+
target_name,
|
|
631
|
+
)
|
|
632
|
+
else:
|
|
633
|
+
# Large file upload
|
|
634
|
+
upload_session = await self._create_upload_session(
|
|
635
|
+
drive_info.id,
|
|
636
|
+
folder_info.id,
|
|
637
|
+
target_name
|
|
638
|
+
)
|
|
639
|
+
result = await self._upload_large_file(upload_session, file_path)
|
|
640
|
+
|
|
641
|
+
self.logger.info(f"Uploaded successfully: {result.name}")
|
|
642
|
+
# Build server-relative path including subfolders and renamed file
|
|
643
|
+
server_relative_path = await self._build_full_path(
|
|
644
|
+
drive_info.id, folder_info.id, target_name
|
|
645
|
+
)
|
|
646
|
+
if hasattr(result, 'web_url') and result.web_url:
|
|
647
|
+
self.logger.info(f"SharePoint URL: {result.web_url}")
|
|
648
|
+
|
|
649
|
+
# Backwards compatibility format
|
|
650
|
+
results.append({
|
|
651
|
+
"filename": {
|
|
652
|
+
"name": result.name,
|
|
653
|
+
"size": getattr(result, 'size', file_size),
|
|
654
|
+
"web_url": getattr(result, 'web_url', ''),
|
|
655
|
+
"serverRelativeUrl": f"/{server_relative_path}",
|
|
656
|
+
}
|
|
657
|
+
})
|
|
658
|
+
|
|
659
|
+
except Exception as e:
|
|
660
|
+
self.logger.error(f"Upload failed for {target_name}: {e}")
|
|
661
|
+
raise RuntimeError(f"Upload failed for {target_name}: {e}") from e
|
|
662
|
+
|
|
663
|
+
return results
|
|
664
|
+
|
|
665
|
+
async def test_permissions(self) -> Dict[str, Any]:
|
|
666
|
+
"""
|
|
667
|
+
Test SharePoint permissions using Microsoft Graph API.
|
|
668
|
+
|
|
669
|
+
This replaces the old office365-rest-python-client permission test.
|
|
670
|
+
"""
|
|
671
|
+
results = {
|
|
672
|
+
"site_access": False,
|
|
673
|
+
"folder_access": False,
|
|
674
|
+
"upload_access": False,
|
|
675
|
+
"errors": []
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
try:
|
|
679
|
+
# Test 1: Site access
|
|
680
|
+
site_info = await self._resolve_site()
|
|
681
|
+
results["site_access"] = True
|
|
682
|
+
self.logger.info(f"Site access: {site_info.display_name}")
|
|
683
|
+
|
|
684
|
+
# Test 2: Drive access
|
|
685
|
+
drive_info = await self._resolve_drive()
|
|
686
|
+
results["folder_access"] = True
|
|
687
|
+
self.logger.info(f"Drive access: {drive_info.name}")
|
|
688
|
+
|
|
689
|
+
# Test 3: Folder creation (upload capability test)
|
|
690
|
+
test_folder = await self._ensure_folder("test-folder-permissions", create=True)
|
|
691
|
+
results["upload_access"] = True
|
|
692
|
+
self.logger.info("Upload permissions confirmed")
|
|
693
|
+
|
|
694
|
+
# Clean up test folder
|
|
695
|
+
with contextlib.suppress(Exception):
|
|
696
|
+
await self.graph_client.drives.by_drive_id(
|
|
697
|
+
drive_info.id
|
|
698
|
+
).items.by_drive_item_id(test_folder.id).delete()
|
|
699
|
+
self.logger.info("Test folder cleaned up")
|
|
700
|
+
|
|
701
|
+
except Exception as e:
|
|
702
|
+
results["errors"].append(str(e))
|
|
703
|
+
self.logger.error(f"Permission test failed: {e}")
|
|
704
|
+
|
|
705
|
+
return results
|
|
706
|
+
|
|
707
|
+
async def upload_folder(
|
|
708
|
+
self,
|
|
709
|
+
local_folder: PurePath,
|
|
710
|
+
destination: str = None,
|
|
711
|
+
destination_filenames: Optional[List[str]] = None,
|
|
712
|
+
):
|
|
713
|
+
"""
|
|
714
|
+
Upload an entire folder to SharePoint using Microsoft Graph API.
|
|
715
|
+
|
|
716
|
+
Args:
|
|
717
|
+
local_folder: Local folder path to upload
|
|
718
|
+
sharepoint_folder: SharePoint destination folder (optional)
|
|
719
|
+
|
|
720
|
+
Returns:
|
|
721
|
+
List of upload results
|
|
722
|
+
"""
|
|
723
|
+
try:
|
|
724
|
+
local_path = Path(local_folder)
|
|
725
|
+
if not local_path.exists() or not local_path.is_dir():
|
|
726
|
+
raise FileNotFoundError(
|
|
727
|
+
f"Local folder does not exist or is not a directory: {local_folder}"
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
# Get all files in the folder recursively
|
|
731
|
+
all_files = []
|
|
732
|
+
all_files.extend(
|
|
733
|
+
file_path
|
|
734
|
+
for file_path in local_path.rglob("*")
|
|
735
|
+
if file_path.is_file()
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
if not all_files:
|
|
739
|
+
self.logger.warning(
|
|
740
|
+
f"No files found in folder: {local_folder}"
|
|
741
|
+
)
|
|
742
|
+
return []
|
|
743
|
+
|
|
744
|
+
self.logger.debug(
|
|
745
|
+
f"Uploading folder with {len(all_files)} files from {local_folder}"
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
# Use the existing upload_files method for each file
|
|
749
|
+
results = []
|
|
750
|
+
target_folder = destination or getattr(self, 'directory', 'Shared Documents')
|
|
751
|
+
|
|
752
|
+
# Group files by their relative directory structure
|
|
753
|
+
for idx, file_path in enumerate(all_files):
|
|
754
|
+
# Calculate relative path from the source folder
|
|
755
|
+
relative_path = file_path.relative_to(local_path)
|
|
756
|
+
|
|
757
|
+
# If file is in a subdirectory, include that in the SharePoint path
|
|
758
|
+
if relative_path.parent != Path('.'):
|
|
759
|
+
file_target_folder = f"{target_folder}/{relative_path.parent}".replace("\\", "/")
|
|
760
|
+
else:
|
|
761
|
+
file_target_folder = target_folder
|
|
762
|
+
|
|
763
|
+
try:
|
|
764
|
+
self.logger.debug(
|
|
765
|
+
f"Uploading {relative_path} to {file_target_folder}"
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
# Upload single file to the appropriate folder
|
|
769
|
+
file_result = await self.upload_files(
|
|
770
|
+
filenames=[file_path],
|
|
771
|
+
destination=file_target_folder,
|
|
772
|
+
destination_filenames=[destination_filenames[idx]] if destination_filenames else None
|
|
773
|
+
)
|
|
774
|
+
results.extend(file_result)
|
|
775
|
+
|
|
776
|
+
except Exception as e:
|
|
777
|
+
self.logger.error(f"Failed to upload {relative_path}: {e}")
|
|
778
|
+
# Continue with other files even if one fails
|
|
779
|
+
continue
|
|
780
|
+
|
|
781
|
+
self.logger.info(
|
|
782
|
+
f"Folder upload completed. {len(results)} files uploaded successfully."
|
|
783
|
+
)
|
|
784
|
+
return results
|
|
785
|
+
|
|
786
|
+
except Exception as e:
|
|
787
|
+
self.logger.error(f"Folder upload failed: {e}")
|
|
788
|
+
raise RuntimeError(
|
|
789
|
+
f"Folder upload failed: {e}"
|
|
790
|
+
) from e
|
|
791
|
+
|
|
792
|
+
async def create_subscription(
|
|
793
|
+
self,
|
|
794
|
+
library_id: str,
|
|
795
|
+
webhook_url: str,
|
|
796
|
+
client_state: str = "secret_string",
|
|
797
|
+
expiration_days: int = 1
|
|
798
|
+
) -> dict:
|
|
799
|
+
"""Create webhook subscription using Graph API."""
|
|
800
|
+
try:
|
|
801
|
+
# Set up expiration for the subscription (max 180 days)
|
|
802
|
+
expiration_date = datetime.now(timezone.utc) + timedelta(days=expiration_days)
|
|
803
|
+
expiration_datetime = f"{expiration_date.isoformat()}Z"
|
|
804
|
+
|
|
805
|
+
# Use Graph SDK for subscription creation
|
|
806
|
+
subscription = Subscription()
|
|
807
|
+
subscription.change_type = "created,updated,deleted"
|
|
808
|
+
subscription.notification_url = webhook_url
|
|
809
|
+
subscription.resource = f"sites/{self.tenant}/lists/{library_id}"
|
|
810
|
+
subscription.expiration_date_time = expiration_datetime
|
|
811
|
+
subscription.client_state = client_state
|
|
812
|
+
|
|
813
|
+
# Create subscription using Graph SDK
|
|
814
|
+
created_subscription = await self.graph_client.subscriptions.post(subscription)
|
|
815
|
+
|
|
816
|
+
self.logger.info("✅ Subscription created successfully")
|
|
817
|
+
return {
|
|
818
|
+
"id": created_subscription.id,
|
|
819
|
+
"resource": created_subscription.resource,
|
|
820
|
+
"notification_url": created_subscription.notification_url,
|
|
821
|
+
"expiration_date_time": created_subscription.expiration_date_time
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
except Exception as e:
|
|
825
|
+
self.logger.error(f"❌ Failed to create subscription: {e}")
|
|
826
|
+
raise RuntimeError(f"Failed to create subscription: {e}") from e
|
|
827
|
+
|
|
828
|
+
async def get_library_id(self, absolute_url: str) -> str:
|
|
829
|
+
"""Get library ID using Graph API."""
|
|
830
|
+
try:
|
|
831
|
+
# Parse the absolute URL to get site and document library path
|
|
832
|
+
parsed_url = urlparse(absolute_url)
|
|
833
|
+
path_parts = parsed_url.path.strip("/").split("/")
|
|
834
|
+
|
|
835
|
+
# Format the site name and library path
|
|
836
|
+
site_name = path_parts[1] # e.g., 'sites/mysite'
|
|
837
|
+
library_name = "/".join(path_parts[2:]) # e.g., 'Documents'
|
|
838
|
+
|
|
839
|
+
# Use Graph SDK to get library info
|
|
840
|
+
site_identifier = f"{self.tenant}.sharepoint.com:/{site_name}"
|
|
841
|
+
site = await self.graph_client.sites.by_site_id(site_identifier).get()
|
|
842
|
+
|
|
843
|
+
# Get drives for the site
|
|
844
|
+
drives = await self.graph_client.sites.by_site_id(site.id).drives.get()
|
|
845
|
+
|
|
846
|
+
if drives and drives.value:
|
|
847
|
+
for drive in drives.value:
|
|
848
|
+
if library_name in drive.name or drive.name == "Documents":
|
|
849
|
+
self.logger.info(
|
|
850
|
+
f"📋 Library ID for {absolute_url} is {drive.id}"
|
|
851
|
+
)
|
|
852
|
+
return drive.id
|
|
853
|
+
|
|
854
|
+
raise RuntimeError("Library not found")
|
|
855
|
+
|
|
856
|
+
except Exception as e:
|
|
857
|
+
raise RuntimeError(f"Failed to retrieve library ID: {e}") from e
|
|
858
|
+
|
|
859
|
+
async def close(self):
|
|
860
|
+
"""Clean up resources."""
|
|
861
|
+
await super().close()
|
|
862
|
+
self._site_info = None
|
|
863
|
+
self._drive_info = None
|
|
864
|
+
self._site_id = None
|
|
865
|
+
self._drive_id = None
|
|
866
|
+
|
|
867
|
+
def _pattern_is_api_safe(self, pattern: str) -> bool:
|
|
868
|
+
"""
|
|
869
|
+
Return True if 'pattern' can be safely passed to Graph search (no wildcards/regex),
|
|
870
|
+
otherwise False (e.g., contains * ? [ ] { } ( ) ^ $ | \ ).
|
|
871
|
+
"""
|
|
872
|
+
return not re.search(r'[*?\[\]\{\}\(\)\^\$|\\]', pattern or "")
|
|
873
|
+
|
|
874
|
+
def _in_dir(self, path_rel_to_drive: str, dir_rel_to_drive: str) -> bool:
|
|
875
|
+
p = (path_rel_to_drive or "").strip("/").lower()
|
|
876
|
+
d = (dir_rel_to_drive or "").strip("/").lower()
|
|
877
|
+
return p.startswith(d) if d else True
|
|
878
|
+
|
|
879
|
+
async def download_found_files(
|
|
880
|
+
self,
|
|
881
|
+
found: List[Dict[str, Any]],
|
|
882
|
+
) -> List[Dict[str, str]]:
|
|
883
|
+
"""
|
|
884
|
+
Download all items in 'found' (from file_search) into local self.directory.
|
|
885
|
+
Uses aiofiles for writing and httpx (downloadUrl) for streaming.
|
|
886
|
+
If self._filenames is provided and its length matches len(found),
|
|
887
|
+
files are renamed accordingly; otherwise warn and keep original names.
|
|
888
|
+
|
|
889
|
+
Returns: List[{"filename": <local_path>, "download_url": <url or "">}]
|
|
890
|
+
"""
|
|
891
|
+
results: List[Dict[str, str]] = []
|
|
892
|
+
|
|
893
|
+
# Ensure local destination directory exists
|
|
894
|
+
dest_dir = Path(getattr(self, "directory", ".")).expanduser().resolve()
|
|
895
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
896
|
+
|
|
897
|
+
# Handle desired names
|
|
898
|
+
desired_names = getattr(self, "_filenames", None)
|
|
899
|
+
if desired_names and len(desired_names) != len(found):
|
|
900
|
+
self.logger.warning(
|
|
901
|
+
f"⚠️ Matched files ({len(found)}) != self._filenames ({len(desired_names)}). "
|
|
902
|
+
f"Will keep original names."
|
|
903
|
+
)
|
|
904
|
+
desired_names = None
|
|
905
|
+
|
|
906
|
+
def _sanitize(name: str) -> str:
|
|
907
|
+
# make it a safe filename for local FS
|
|
908
|
+
name = Path(name).name # strip any path
|
|
909
|
+
return re.sub(r'[\\/:*?"<>|]+', "_", name).strip()
|
|
910
|
+
|
|
911
|
+
# Resolve drive once for content fallback (if needed)
|
|
912
|
+
drive_info = await self._resolve_drive()
|
|
913
|
+
drive_id = drive_info.id
|
|
914
|
+
|
|
915
|
+
async with httpx.AsyncClient(follow_redirects=True, timeout=None) as client:
|
|
916
|
+
for idx, entry in enumerate(found):
|
|
917
|
+
item = entry.get("item")
|
|
918
|
+
if not item or not getattr(item, "name", None):
|
|
919
|
+
self.logger.warning("Skipping entry without a valid drive item")
|
|
920
|
+
continue
|
|
921
|
+
|
|
922
|
+
# Determine target local filename
|
|
923
|
+
target_name = _sanitize(desired_names[idx]) if desired_names else _sanitize(item.name)
|
|
924
|
+
dest_path = dest_dir / target_name
|
|
925
|
+
|
|
926
|
+
# Get pre-authenticated download URL if available
|
|
927
|
+
download_url = ""
|
|
928
|
+
try:
|
|
929
|
+
add = getattr(item, "additional_data", {}) or {}
|
|
930
|
+
download_url = add.get("@microsoft.graph.downloadUrl", "") or ""
|
|
931
|
+
except Exception:
|
|
932
|
+
download_url = ""
|
|
933
|
+
|
|
934
|
+
self.logger.debug(
|
|
935
|
+
f"⬇️ Downloading {item.name} → {dest_path.name}"
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
try:
|
|
939
|
+
if download_url:
|
|
940
|
+
# Stream via downloadUrl
|
|
941
|
+
async with client.stream("GET", download_url) as resp:
|
|
942
|
+
resp.raise_for_status()
|
|
943
|
+
async with aiofiles.open(dest_path, "wb") as f:
|
|
944
|
+
async for chunk in resp.aiter_bytes(1 << 20): # 1 MiB
|
|
945
|
+
await f.write(chunk)
|
|
946
|
+
else:
|
|
947
|
+
# Fallback: GET /content via Graph (loads into memory)
|
|
948
|
+
data = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
949
|
+
.items.by_drive_item_id(item.id).content.get()
|
|
950
|
+
async with aiofiles.open(dest_path, "wb") as f:
|
|
951
|
+
await f.write(data)
|
|
952
|
+
|
|
953
|
+
self.logger.debug(
|
|
954
|
+
f"✅ Saved: {dest_path}"
|
|
955
|
+
)
|
|
956
|
+
results.append(
|
|
957
|
+
{"filename": str(dest_path), "download_url": download_url}
|
|
958
|
+
)
|
|
959
|
+
except Exception as e:
|
|
960
|
+
self.logger.error(
|
|
961
|
+
f"❌ Download failed for {item.name}: {e}"
|
|
962
|
+
)
|
|
963
|
+
# Continue with the rest; do not raise to allow partial completion
|
|
964
|
+
|
|
965
|
+
return results
|
|
966
|
+
|
|
967
|
+
async def file_search(self) -> List[Dict[str, Any]]:
|
|
968
|
+
"""
|
|
969
|
+
Search for files with Graph API (when safe) and recursive fallback starting at the target folder.
|
|
970
|
+
Logs every tested file during recursion. Does not raise on desired-name count mismatches.
|
|
971
|
+
"""
|
|
972
|
+
destinations: List[Dict[str, Any]] = []
|
|
973
|
+
|
|
974
|
+
try:
|
|
975
|
+
for spec in getattr(self, "_srcfiles", []):
|
|
976
|
+
directory_raw: str = (spec.get("directory") or "").strip()
|
|
977
|
+
pattern: str = spec.get("pattern") or spec.get("filename") or ""
|
|
978
|
+
extension = (spec.get("extension") or "").strip()
|
|
979
|
+
wanted_ext: Optional[str] = extension.lower().lstrip(".") if extension else None
|
|
980
|
+
|
|
981
|
+
if not directory_raw:
|
|
982
|
+
raise RuntimeError("file_search: each spec must include a 'directory'")
|
|
983
|
+
|
|
984
|
+
# Parse the directory to extract library and path (same as file_lookup)
|
|
985
|
+
library_name, directory = self._parse_directory_path(directory_raw)
|
|
986
|
+
|
|
987
|
+
# Get the specific library
|
|
988
|
+
try:
|
|
989
|
+
drive_info = await self._resolve_drive(library_name)
|
|
990
|
+
drive_id = drive_info.id
|
|
991
|
+
self.logger.debug(f"Using library: {drive_info.name} (ID: {drive_id})")
|
|
992
|
+
except Exception as e:
|
|
993
|
+
self.logger.error(f"Failed to access library '{library_name}': {e}")
|
|
994
|
+
continue
|
|
995
|
+
|
|
996
|
+
found_files: List[Dict[str, Any]] = []
|
|
997
|
+
|
|
998
|
+
# Try API search only if pattern is simple/safe
|
|
999
|
+
if pattern and self._pattern_is_api_safe(pattern):
|
|
1000
|
+
# A conservative sanitizer for Graph search term
|
|
1001
|
+
clean_q = re.sub(r"[^A-Za-z0-9._-]+", " ", pattern).strip()
|
|
1002
|
+
if len(clean_q) >= 2:
|
|
1003
|
+
try:
|
|
1004
|
+
self.logger.debug(f"Attempting API search with term: '{clean_q}'")
|
|
1005
|
+
api_res = await self.graph_client.drives.by_drive_id(drive_id).search_with_q(clean_q).get() # noqa
|
|
1006
|
+
if api_res and api_res.value:
|
|
1007
|
+
self.logger.debug(f"API search returned {len(api_res.value)} result(s)")
|
|
1008
|
+
for item in api_res.value:
|
|
1009
|
+
if not getattr(item, "file", None) or not item.name:
|
|
1010
|
+
continue
|
|
1011
|
+
name = item.name
|
|
1012
|
+
# Extension check
|
|
1013
|
+
ext_ok = True
|
|
1014
|
+
if wanted_ext:
|
|
1015
|
+
ext_ok = name.lower().endswith(f".{wanted_ext}")
|
|
1016
|
+
# Pattern check (supports literal/regex/wildcard via existing helper)
|
|
1017
|
+
name_ok = self._matches_pattern(name, pattern)
|
|
1018
|
+
item_path = self._get_item_path_from_item(item) # drive-root relative
|
|
1019
|
+
|
|
1020
|
+
self.logger.info(
|
|
1021
|
+
f"🔎 [API] {item_path} "
|
|
1022
|
+
f"(ext: {'✓' if ext_ok else '×'}, name: {'✓' if name_ok else '×'})"
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
if ext_ok and name_ok and self._in_dir(item_path, directory):
|
|
1026
|
+
found_files.append({
|
|
1027
|
+
"item": item,
|
|
1028
|
+
"path": item_path,
|
|
1029
|
+
"server_relative_url": item_path
|
|
1030
|
+
})
|
|
1031
|
+
self.logger.info(f"✅ [API] Match: {item_path}")
|
|
1032
|
+
else:
|
|
1033
|
+
self.logger.debug("API search returned no results")
|
|
1034
|
+
except Exception as api_err:
|
|
1035
|
+
self.logger.warning(f"API search failed: {api_err}")
|
|
1036
|
+
|
|
1037
|
+
# Fallback to recursive (or if API found nothing)
|
|
1038
|
+
if not found_files:
|
|
1039
|
+
self.logger.info(
|
|
1040
|
+
f"No API results or pattern not API-safe. Recursive in '{directory or '/'}'..."
|
|
1041
|
+
)
|
|
1042
|
+
found_files = await self._search_pattern_recursive(
|
|
1043
|
+
drive_id=drive_id,
|
|
1044
|
+
directory=directory,
|
|
1045
|
+
pattern=pattern,
|
|
1046
|
+
wanted_ext=wanted_ext
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
if not found_files:
|
|
1050
|
+
err = f"No files found for '{pattern or '<empty>'}' in '{directory_raw}'"
|
|
1051
|
+
self.logger.error(err)
|
|
1052
|
+
raise RuntimeError(err)
|
|
1053
|
+
|
|
1054
|
+
destinations.extend(found_files)
|
|
1055
|
+
self.logger.info(
|
|
1056
|
+
f"=== Found {len(found_files)} file(s) for '{pattern or '<empty>'}' ==="
|
|
1057
|
+
)
|
|
1058
|
+
|
|
1059
|
+
# Warn (do not fail) on name-count mismatch
|
|
1060
|
+
desired = getattr(self, "_filenames", None)
|
|
1061
|
+
if desired and len(desired) != len(destinations):
|
|
1062
|
+
self.logger.warning(
|
|
1063
|
+
f"⚠️ Matched files ({len(destinations)}) != self._filenames ({len(desired)}). "
|
|
1064
|
+
f"Downloads will keep original names where needed."
|
|
1065
|
+
)
|
|
1066
|
+
|
|
1067
|
+
return destinations
|
|
1068
|
+
|
|
1069
|
+
except Exception as e:
|
|
1070
|
+
self.logger.error(f"File search failed: {e}")
|
|
1071
|
+
raise RuntimeError(f"File search failed: {e}") from e
|
|
1072
|
+
|
|
1073
|
+
def _matches_pattern(self, filename: str, pattern: str) -> bool:
|
|
1074
|
+
"""
|
|
1075
|
+
Check if filename matches the search pattern with detailed logging.
|
|
1076
|
+
"""
|
|
1077
|
+
try:
|
|
1078
|
+
# Convert shell-style wildcards to regex if needed
|
|
1079
|
+
if '*' in pattern and '.*' not in pattern:
|
|
1080
|
+
# Simple wildcard pattern like "mkdocs*.yml"
|
|
1081
|
+
regex_pattern = pattern.replace("*", ".*").replace("?", ".")
|
|
1082
|
+
# Add anchors for exact matching
|
|
1083
|
+
regex_pattern = f"^{regex_pattern}$"
|
|
1084
|
+
elif '.*' in pattern or '[' in pattern or '^' in pattern or '$' in pattern:
|
|
1085
|
+
# Already a regex pattern
|
|
1086
|
+
regex_pattern = pattern
|
|
1087
|
+
else:
|
|
1088
|
+
# Exact match
|
|
1089
|
+
regex_pattern = f"^{re.escape(pattern)}$"
|
|
1090
|
+
|
|
1091
|
+
# self.logger.debug(f"Pattern matching: '{filename}' against regex '{regex_pattern}'")
|
|
1092
|
+
return bool(re.match(regex_pattern, filename, re.IGNORECASE))
|
|
1093
|
+
|
|
1094
|
+
except re.error as e:
|
|
1095
|
+
self.logger.warning(
|
|
1096
|
+
f"Regex pattern '{pattern}' failed: {e}, falling back to substring match"
|
|
1097
|
+
)
|
|
1098
|
+
# Remove regex characters and do substring match
|
|
1099
|
+
clean_pattern = re.sub(r'[.*+?^${}()|[\]\\]', '', pattern)
|
|
1100
|
+
result = clean_pattern.lower() in filename.lower()
|
|
1101
|
+
self.logger.debug(f"Substring match result: {result}")
|
|
1102
|
+
return result
|
|
1103
|
+
|
|
1104
|
+
def _get_item_path_from_item(self, item) -> str:
|
|
1105
|
+
"""
|
|
1106
|
+
Extract the full path from a DriveItem object.
|
|
1107
|
+
"""
|
|
1108
|
+
try:
|
|
1109
|
+
# Try to get path from parent_reference
|
|
1110
|
+
if hasattr(item, 'parent_reference') and item.parent_reference and item.parent_reference.path:
|
|
1111
|
+
parent_path = item.parent_reference.path or ""
|
|
1112
|
+
|
|
1113
|
+
# Clean up the parent path
|
|
1114
|
+
if parent_path.startswith("/drive/root:"):
|
|
1115
|
+
parent_path = parent_path[12:]
|
|
1116
|
+
elif parent_path.startswith("/drives/") and "/root:" in parent_path:
|
|
1117
|
+
parent_path = parent_path.split("/root:")[-1]
|
|
1118
|
+
|
|
1119
|
+
# Build the full path
|
|
1120
|
+
if parent_path:
|
|
1121
|
+
full_path = f"{parent_path}/{item.name}".replace("//", "/").lstrip("/")
|
|
1122
|
+
else:
|
|
1123
|
+
full_path = item.name or ""
|
|
1124
|
+
|
|
1125
|
+
return full_path
|
|
1126
|
+
|
|
1127
|
+
# Fallback: try to get path from web_url if available
|
|
1128
|
+
if hasattr(item, 'web_url') and item.web_url:
|
|
1129
|
+
with contextlib.suppress(Exception):
|
|
1130
|
+
web_url = item.web_url
|
|
1131
|
+
|
|
1132
|
+
# Look for the document library part in the URL
|
|
1133
|
+
if "/Shared%20Documents/" in web_url:
|
|
1134
|
+
path_part = web_url.split("/Shared%20Documents/", 1)[1]
|
|
1135
|
+
return unquote(path_part)
|
|
1136
|
+
elif "/Shared Documents/" in web_url:
|
|
1137
|
+
path_part = web_url.split("/Shared Documents/", 1)[1]
|
|
1138
|
+
return unquote(path_part)
|
|
1139
|
+
|
|
1140
|
+
# Final fallback: just return the filename
|
|
1141
|
+
return item.name or ""
|
|
1142
|
+
|
|
1143
|
+
except Exception as e:
|
|
1144
|
+
self.logger.debug(f"Error extracting path from item: {e}")
|
|
1145
|
+
return item.name or ""
|
|
1146
|
+
|
|
1147
|
+
def _is_in_target_directory(self, file_path: str, target_directory: str) -> bool:
|
|
1148
|
+
"""
|
|
1149
|
+
Check if a file path is within the target directory.
|
|
1150
|
+
"""
|
|
1151
|
+
# Normalize paths
|
|
1152
|
+
file_path = file_path.strip().strip("/")
|
|
1153
|
+
target_directory = target_directory.strip().strip("/")
|
|
1154
|
+
|
|
1155
|
+
# Remove "Shared Documents" prefix if present in target but not in file path
|
|
1156
|
+
if target_directory.startswith("Shared Documents/"):
|
|
1157
|
+
target_dir_without_prefix = target_directory[17:] # Remove "Shared Documents/"
|
|
1158
|
+
if target_dir_without_prefix in file_path:
|
|
1159
|
+
return True
|
|
1160
|
+
|
|
1161
|
+
# Direct directory match
|
|
1162
|
+
if target_directory in file_path:
|
|
1163
|
+
return True
|
|
1164
|
+
|
|
1165
|
+
# Check if file is in subdirectory
|
|
1166
|
+
file_dir = "/".join(file_path.split("/")[:-1]) # Remove filename
|
|
1167
|
+
return file_dir == target_directory or file_dir.endswith(f"/{target_directory}")
|
|
1168
|
+
|
|
1169
|
+
async def _search_pattern_recursive(
|
|
1170
|
+
self,
|
|
1171
|
+
drive_id: str,
|
|
1172
|
+
directory: str,
|
|
1173
|
+
pattern: str,
|
|
1174
|
+
wanted_ext: Optional[str] = None
|
|
1175
|
+
) -> List[Dict[str, Any]]:
|
|
1176
|
+
"""
|
|
1177
|
+
Depth-first search starting at the *target folder*.
|
|
1178
|
+
Logs EVERY file tested with ✓/× for extension and name.
|
|
1179
|
+
"""
|
|
1180
|
+
matches: List[Dict[str, Any]] = []
|
|
1181
|
+
counter = {"tested": 0}
|
|
1182
|
+
|
|
1183
|
+
# Resolve the starting folder using colon-path id (no item_with_path)
|
|
1184
|
+
start_path = (directory or "").strip().strip("/")
|
|
1185
|
+
if start_path:
|
|
1186
|
+
try:
|
|
1187
|
+
start_folder = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
1188
|
+
.items.by_drive_item_id(f"root:/{start_path}:/").get()
|
|
1189
|
+
self.logger.notice(
|
|
1190
|
+
f"📂 Recursive start: '{start_path}' (ID: {start_folder.id})"
|
|
1191
|
+
)
|
|
1192
|
+
except Exception as e:
|
|
1193
|
+
# If normalization is correct and the folder truly exists (as uploads did),
|
|
1194
|
+
# this should not hit. Log and re-raise to avoid silently walking the root.
|
|
1195
|
+
self.logger.error(f"❌ Start folder not found for '{start_path}': {e}")
|
|
1196
|
+
raise
|
|
1197
|
+
else:
|
|
1198
|
+
start_folder = await self.graph_client.drives.by_drive_id(drive_id).root.get()
|
|
1199
|
+
self.logger.notice("📂 Recursive start: drive root")
|
|
1200
|
+
|
|
1201
|
+
matcher = self._matches_pattern # your existing helper
|
|
1202
|
+
|
|
1203
|
+
async def _dfs(folder_id: str, base_rel_path: str):
|
|
1204
|
+
children = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
1205
|
+
.items.by_drive_item_id(folder_id).children.get()
|
|
1206
|
+
if not children or not children.value:
|
|
1207
|
+
return
|
|
1208
|
+
|
|
1209
|
+
for entry in children.value:
|
|
1210
|
+
if getattr(entry, "folder", None):
|
|
1211
|
+
sub_rel = f"{base_rel_path}/{entry.name}".strip("/")
|
|
1212
|
+
await _dfs(entry.id, sub_rel)
|
|
1213
|
+
continue
|
|
1214
|
+
if not getattr(entry, "file", None) or not entry.name:
|
|
1215
|
+
continue
|
|
1216
|
+
|
|
1217
|
+
name = entry.name.strip()
|
|
1218
|
+
# Extension check
|
|
1219
|
+
ext_ok = True
|
|
1220
|
+
if wanted_ext:
|
|
1221
|
+
ext_ok = name.lower().endswith("." + wanted_ext)
|
|
1222
|
+
|
|
1223
|
+
# Pattern check (supports literal/regex/wildcard)
|
|
1224
|
+
name_ok = matcher(name, pattern)
|
|
1225
|
+
|
|
1226
|
+
# Compute full path (drive-root relative) for logging/return
|
|
1227
|
+
full_path = await self._get_item_full_path(drive_id, entry.id)
|
|
1228
|
+
|
|
1229
|
+
counter["tested"] += 1
|
|
1230
|
+
# self.logger.debug(
|
|
1231
|
+
# f"🔎 [{counter['tested']}] {full_path} "
|
|
1232
|
+
# f"(ext: {'✓' if ext_ok else '×'}, name: {'✓' if name_ok else '×'})"
|
|
1233
|
+
# )
|
|
1234
|
+
|
|
1235
|
+
if ext_ok and name_ok:
|
|
1236
|
+
matches.append({
|
|
1237
|
+
"item": entry,
|
|
1238
|
+
"path": full_path,
|
|
1239
|
+
"server_relative_url": full_path
|
|
1240
|
+
})
|
|
1241
|
+
self.logger.info(f"✅ Match: {full_path}")
|
|
1242
|
+
|
|
1243
|
+
await _dfs(start_folder.id, start_path)
|
|
1244
|
+
return matches
|
|
1245
|
+
|
|
1246
|
+
async def _get_item_full_path(self, drive_id: str, item_id: str) -> str:
|
|
1247
|
+
"""
|
|
1248
|
+
Get the full server relative path of an item.
|
|
1249
|
+
"""
|
|
1250
|
+
try:
|
|
1251
|
+
item = await self.graph_client.drives.by_drive_id(drive_id).items.by_drive_item_id(item_id).get()
|
|
1252
|
+
|
|
1253
|
+
if hasattr(item, 'parent_reference') and item.parent_reference:
|
|
1254
|
+
parent_path = item.parent_reference.path or ""
|
|
1255
|
+
if parent_path.startswith("/drive/root:"):
|
|
1256
|
+
parent_path = parent_path[12:]
|
|
1257
|
+
|
|
1258
|
+
full_path = f"{parent_path}/{item.name}".replace("//", "/").lstrip("/")
|
|
1259
|
+
return full_path
|
|
1260
|
+
else:
|
|
1261
|
+
return item.name or ""
|
|
1262
|
+
|
|
1263
|
+
except Exception as e:
|
|
1264
|
+
self.logger.warning(f"Could not get full path for item {item_id}: {e}")
|
|
1265
|
+
return ""
|
|
1266
|
+
|
|
1267
|
+
async def _resolve_existing_directory(
|
|
1268
|
+
self,
|
|
1269
|
+
drive_id: str,
|
|
1270
|
+
directory_raw: str,
|
|
1271
|
+
drive_info,
|
|
1272
|
+
):
|
|
1273
|
+
"""
|
|
1274
|
+
Resolve an existing folder under the drive.
|
|
1275
|
+
Tries the normalized directory; if not found, drops the first path segment and retries.
|
|
1276
|
+
Returns (folder_item, used_directory_relative_to_drive_root).
|
|
1277
|
+
"""
|
|
1278
|
+
directory_raw = (directory_raw or "").strip()
|
|
1279
|
+
dir_norm = self._normalize_directory(directory_raw, drive_info)
|
|
1280
|
+
|
|
1281
|
+
candidates = []
|
|
1282
|
+
if dir_norm:
|
|
1283
|
+
candidates.append(dir_norm)
|
|
1284
|
+
if "/" in dir_norm:
|
|
1285
|
+
candidates.append(dir_norm.split("/", 1)[1])
|
|
1286
|
+
else:
|
|
1287
|
+
candidates.append("")
|
|
1288
|
+
|
|
1289
|
+
last_err = None
|
|
1290
|
+
for cand in candidates:
|
|
1291
|
+
colon_id = f"root:/{cand}:/".replace("//", "/") if cand else "root:/"
|
|
1292
|
+
try:
|
|
1293
|
+
folder = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
1294
|
+
.items.by_drive_item_id(colon_id).get()
|
|
1295
|
+
if getattr(folder, "folder", None):
|
|
1296
|
+
self.logger.debug(
|
|
1297
|
+
f"Resolved directory '{directory_raw}' -> '{cand or '/'}' (ID: {folder.id})"
|
|
1298
|
+
)
|
|
1299
|
+
return folder, cand
|
|
1300
|
+
except Exception as e:
|
|
1301
|
+
last_err = e
|
|
1302
|
+
self.logger.debug(
|
|
1303
|
+
f"Directory candidate not found '{cand or '/'}': {e}"
|
|
1304
|
+
)
|
|
1305
|
+
|
|
1306
|
+
raise RuntimeError(
|
|
1307
|
+
f"Start directory not found: '{directory_raw}' (normalized '{dir_norm}')"
|
|
1308
|
+
) from last_err
|
|
1309
|
+
|
|
1310
|
+
async def file_lookup(
|
|
1311
|
+
self,
|
|
1312
|
+
files: Optional[List[Dict[str, str]]] = None,
|
|
1313
|
+
) -> List[Dict[str, Any]]:
|
|
1314
|
+
"""
|
|
1315
|
+
Resolve exact files (no search) into 'destinations' items.
|
|
1316
|
+
Robustly handles extra leading path segments like 'TROC/...'
|
|
1317
|
+
and resolves the file under the resolved parent folder ID.
|
|
1318
|
+
"""
|
|
1319
|
+
specs = files if files is not None else getattr(self, "_srcfiles", [])
|
|
1320
|
+
if not specs:
|
|
1321
|
+
raise RuntimeError("file_lookup: no files provided and self._srcfiles is empty")
|
|
1322
|
+
|
|
1323
|
+
drive_info = await self._resolve_drive()
|
|
1324
|
+
drive_id = drive_info.id
|
|
1325
|
+
|
|
1326
|
+
destinations: List[Dict[str, Any]] = []
|
|
1327
|
+
|
|
1328
|
+
for spec in specs:
|
|
1329
|
+
directory_raw: str = (spec.get("directory") or "").strip()
|
|
1330
|
+
filename_raw: str = (spec.get("filename") or "").strip()
|
|
1331
|
+
if not filename_raw:
|
|
1332
|
+
self.logger.warning("file_lookup: skipping entry without 'filename'")
|
|
1333
|
+
continue
|
|
1334
|
+
|
|
1335
|
+
# Parse the directory to extract library and path
|
|
1336
|
+
library_name, path_within_library = self._parse_directory_path(directory_raw)
|
|
1337
|
+
|
|
1338
|
+
self.logger.notice(
|
|
1339
|
+
f"Looking up file: '{filename_raw}' in library '{library_name}', path '{path_within_library}'"
|
|
1340
|
+
)
|
|
1341
|
+
|
|
1342
|
+
# Get the specific library
|
|
1343
|
+
try:
|
|
1344
|
+
drive_info = await self._resolve_drive(library_name)
|
|
1345
|
+
drive_id = drive_info.id
|
|
1346
|
+
except Exception as e:
|
|
1347
|
+
self.logger.error(f"Failed to access library '{library_name}': {e}")
|
|
1348
|
+
continue
|
|
1349
|
+
|
|
1350
|
+
# Build the Microsoft Graph API path within the library
|
|
1351
|
+
if path_within_library:
|
|
1352
|
+
colon_id = f"root:/{path_within_library}/{filename_raw}:"
|
|
1353
|
+
else:
|
|
1354
|
+
colon_id = f"root:/{filename_raw}:"
|
|
1355
|
+
|
|
1356
|
+
self.logger.debug(
|
|
1357
|
+
f"Using Graph API item ID: '{colon_id}' in library '{library_name}'"
|
|
1358
|
+
)
|
|
1359
|
+
# First try direct by parent folder ID + :/filename:/ (avoids full-path encoding pitfalls)
|
|
1360
|
+
try:
|
|
1361
|
+
file_item = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
1362
|
+
.items.by_drive_item_id(colon_id).get()
|
|
1363
|
+
except Exception as e:
|
|
1364
|
+
self.logger.error(
|
|
1365
|
+
f"Direct lookup failed for '{filename_raw}' in library '{library_name}', path '{path_within_library}', error: {e}"
|
|
1366
|
+
)
|
|
1367
|
+
|
|
1368
|
+
# Fallback: list children and match by exact name
|
|
1369
|
+
try:
|
|
1370
|
+
if path_within_library:
|
|
1371
|
+
# Get the directory first
|
|
1372
|
+
dir_colon_id = f"root:/{path_within_library}:"
|
|
1373
|
+
folder = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
1374
|
+
.items.by_drive_item_id(dir_colon_id).get()
|
|
1375
|
+
else:
|
|
1376
|
+
# Use root of this library
|
|
1377
|
+
folder = await self.graph_client.drives.by_drive_id(drive_id).root.get()
|
|
1378
|
+
|
|
1379
|
+
# List directory contents
|
|
1380
|
+
children = await self.graph_client.drives.by_drive_id(drive_id)\
|
|
1381
|
+
.items.by_drive_item_id(folder.id).children.get()
|
|
1382
|
+
|
|
1383
|
+
found_item = None
|
|
1384
|
+
if children and children.value:
|
|
1385
|
+
self.logger.debug(
|
|
1386
|
+
f"Directory contains {len(children.value)} items:"
|
|
1387
|
+
)
|
|
1388
|
+
for child in children.value:
|
|
1389
|
+
self.logger.debug(f" - {child.name} ({'file' if child.file else 'folder'})")
|
|
1390
|
+
if child.file and child.name and child.name == filename_raw:
|
|
1391
|
+
found_item = child
|
|
1392
|
+
break
|
|
1393
|
+
if found_item:
|
|
1394
|
+
item = found_item
|
|
1395
|
+
self.logger.info(f"Found via fallback search: {filename_raw}")
|
|
1396
|
+
else:
|
|
1397
|
+
self.logger.error(f"File '{filename_raw}' not found in library '{library_name}', path '{path_within_library or 'root'}'")
|
|
1398
|
+
continue
|
|
1399
|
+
except Exception as e2:
|
|
1400
|
+
self.logger.error(f"Fallback search failed: {e2}")
|
|
1401
|
+
continue
|
|
1402
|
+
|
|
1403
|
+
# Build drive-root-relative path for return/logging
|
|
1404
|
+
full_path = await self._get_item_full_path(drive_id, file_item.id)
|
|
1405
|
+
destinations.append({
|
|
1406
|
+
"item": file_item,
|
|
1407
|
+
"path": full_path,
|
|
1408
|
+
"server_relative_url": full_path
|
|
1409
|
+
})
|
|
1410
|
+
self.logger.info(f"✅ Found: {full_path}")
|
|
1411
|
+
|
|
1412
|
+
return destinations or None
|
|
1413
|
+
|
|
1414
|
+
async def debug_root_structure(self):
|
|
1415
|
+
"""Quick debug to see what's actually at the root of this SharePoint site."""
|
|
1416
|
+
try:
|
|
1417
|
+
drive_info = await self._resolve_drive()
|
|
1418
|
+
drive_id = drive_info.id
|
|
1419
|
+
|
|
1420
|
+
# Get root folder first, then its children
|
|
1421
|
+
root = await self.graph_client.drives.by_drive_id(drive_id).root.get()
|
|
1422
|
+
children = await self.graph_client.drives.by_drive_id(drive_id).items.by_drive_item_id(root.id).children.get()
|
|
1423
|
+
|
|
1424
|
+
if children and children.value:
|
|
1425
|
+
self.logger.notice("=== ROOT STRUCTURE ===")
|
|
1426
|
+
for child in children.value:
|
|
1427
|
+
if child.folder:
|
|
1428
|
+
self.logger.notice(f"📁 {child.name}/")
|
|
1429
|
+
else:
|
|
1430
|
+
self.logger.notice(f"📄 {child.name}")
|
|
1431
|
+
else:
|
|
1432
|
+
self.logger.error("No items found at root")
|
|
1433
|
+
|
|
1434
|
+
except Exception as e:
|
|
1435
|
+
self.logger.error(f"Debug failed: {e}")
|