ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (535) hide show
  1. agentui/.prettierrc +15 -0
  2. agentui/QUICKSTART.md +272 -0
  3. agentui/README.md +59 -0
  4. agentui/env.example +16 -0
  5. agentui/jsconfig.json +14 -0
  6. agentui/package-lock.json +4242 -0
  7. agentui/package.json +34 -0
  8. agentui/scripts/postinstall/apply-patches.mjs +260 -0
  9. agentui/src/app.css +61 -0
  10. agentui/src/app.d.ts +13 -0
  11. agentui/src/app.html +12 -0
  12. agentui/src/components/LoadingSpinner.svelte +64 -0
  13. agentui/src/components/ThemeSwitcher.svelte +159 -0
  14. agentui/src/components/index.js +4 -0
  15. agentui/src/lib/api/bots.ts +60 -0
  16. agentui/src/lib/api/chat.ts +22 -0
  17. agentui/src/lib/api/http.ts +25 -0
  18. agentui/src/lib/components/BotCard.svelte +33 -0
  19. agentui/src/lib/components/ChatBubble.svelte +63 -0
  20. agentui/src/lib/components/Toast.svelte +21 -0
  21. agentui/src/lib/config.ts +20 -0
  22. agentui/src/lib/stores/auth.svelte.ts +73 -0
  23. agentui/src/lib/stores/theme.svelte.js +64 -0
  24. agentui/src/lib/stores/toast.svelte.ts +31 -0
  25. agentui/src/lib/utils/conversation.ts +39 -0
  26. agentui/src/routes/+layout.svelte +20 -0
  27. agentui/src/routes/+page.svelte +232 -0
  28. agentui/src/routes/login/+page.svelte +200 -0
  29. agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
  30. agentui/src/routes/talk/[agentId]/+page.ts +7 -0
  31. agentui/static/README.md +1 -0
  32. agentui/svelte.config.js +11 -0
  33. agentui/tailwind.config.ts +53 -0
  34. agentui/tsconfig.json +3 -0
  35. agentui/vite.config.ts +10 -0
  36. ai_parrot-0.17.2.dist-info/METADATA +472 -0
  37. ai_parrot-0.17.2.dist-info/RECORD +535 -0
  38. ai_parrot-0.17.2.dist-info/WHEEL +6 -0
  39. ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
  40. ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
  41. ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
  42. crew-builder/.prettierrc +15 -0
  43. crew-builder/QUICKSTART.md +259 -0
  44. crew-builder/README.md +113 -0
  45. crew-builder/env.example +17 -0
  46. crew-builder/jsconfig.json +14 -0
  47. crew-builder/package-lock.json +4182 -0
  48. crew-builder/package.json +37 -0
  49. crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
  50. crew-builder/src/app.css +62 -0
  51. crew-builder/src/app.d.ts +13 -0
  52. crew-builder/src/app.html +12 -0
  53. crew-builder/src/components/LoadingSpinner.svelte +64 -0
  54. crew-builder/src/components/ThemeSwitcher.svelte +149 -0
  55. crew-builder/src/components/index.js +9 -0
  56. crew-builder/src/lib/api/bots.ts +60 -0
  57. crew-builder/src/lib/api/chat.ts +80 -0
  58. crew-builder/src/lib/api/client.ts +56 -0
  59. crew-builder/src/lib/api/crew/crew.ts +136 -0
  60. crew-builder/src/lib/api/index.ts +5 -0
  61. crew-builder/src/lib/api/o365/auth.ts +65 -0
  62. crew-builder/src/lib/auth/auth.ts +54 -0
  63. crew-builder/src/lib/components/AgentNode.svelte +43 -0
  64. crew-builder/src/lib/components/BotCard.svelte +33 -0
  65. crew-builder/src/lib/components/ChatBubble.svelte +67 -0
  66. crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
  67. crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
  68. crew-builder/src/lib/components/JsonViewer.svelte +24 -0
  69. crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
  70. crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
  71. crew-builder/src/lib/components/Toast.svelte +67 -0
  72. crew-builder/src/lib/components/Toolbar.svelte +157 -0
  73. crew-builder/src/lib/components/index.ts +10 -0
  74. crew-builder/src/lib/config.ts +8 -0
  75. crew-builder/src/lib/stores/auth.svelte.ts +228 -0
  76. crew-builder/src/lib/stores/crewStore.ts +369 -0
  77. crew-builder/src/lib/stores/theme.svelte.js +145 -0
  78. crew-builder/src/lib/stores/toast.svelte.ts +69 -0
  79. crew-builder/src/lib/utils/conversation.ts +39 -0
  80. crew-builder/src/lib/utils/markdown.ts +122 -0
  81. crew-builder/src/lib/utils/talkHistory.ts +47 -0
  82. crew-builder/src/routes/+layout.svelte +20 -0
  83. crew-builder/src/routes/+page.svelte +539 -0
  84. crew-builder/src/routes/agents/+page.svelte +247 -0
  85. crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
  86. crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
  87. crew-builder/src/routes/builder/+page.svelte +204 -0
  88. crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
  89. crew-builder/src/routes/crew/ask/+page.ts +1 -0
  90. crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
  91. crew-builder/src/routes/login/+page.svelte +197 -0
  92. crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
  93. crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
  94. crew-builder/static/README.md +1 -0
  95. crew-builder/svelte.config.js +11 -0
  96. crew-builder/tailwind.config.ts +53 -0
  97. crew-builder/tsconfig.json +3 -0
  98. crew-builder/vite.config.ts +10 -0
  99. mcp_servers/calculator_server.py +309 -0
  100. parrot/__init__.py +27 -0
  101. parrot/__pycache__/__init__.cpython-310.pyc +0 -0
  102. parrot/__pycache__/version.cpython-310.pyc +0 -0
  103. parrot/_version.py +34 -0
  104. parrot/a2a/__init__.py +48 -0
  105. parrot/a2a/client.py +658 -0
  106. parrot/a2a/discovery.py +89 -0
  107. parrot/a2a/mixin.py +257 -0
  108. parrot/a2a/models.py +376 -0
  109. parrot/a2a/server.py +770 -0
  110. parrot/agents/__init__.py +29 -0
  111. parrot/bots/__init__.py +12 -0
  112. parrot/bots/a2a_agent.py +19 -0
  113. parrot/bots/abstract.py +3139 -0
  114. parrot/bots/agent.py +1129 -0
  115. parrot/bots/basic.py +9 -0
  116. parrot/bots/chatbot.py +669 -0
  117. parrot/bots/data.py +1618 -0
  118. parrot/bots/database/__init__.py +5 -0
  119. parrot/bots/database/abstract.py +3071 -0
  120. parrot/bots/database/cache.py +286 -0
  121. parrot/bots/database/models.py +468 -0
  122. parrot/bots/database/prompts.py +154 -0
  123. parrot/bots/database/retries.py +98 -0
  124. parrot/bots/database/router.py +269 -0
  125. parrot/bots/database/sql.py +41 -0
  126. parrot/bots/db/__init__.py +6 -0
  127. parrot/bots/db/abstract.py +556 -0
  128. parrot/bots/db/bigquery.py +602 -0
  129. parrot/bots/db/cache.py +85 -0
  130. parrot/bots/db/documentdb.py +668 -0
  131. parrot/bots/db/elastic.py +1014 -0
  132. parrot/bots/db/influx.py +898 -0
  133. parrot/bots/db/mock.py +96 -0
  134. parrot/bots/db/multi.py +783 -0
  135. parrot/bots/db/prompts.py +185 -0
  136. parrot/bots/db/sql.py +1255 -0
  137. parrot/bots/db/tools.py +212 -0
  138. parrot/bots/document.py +680 -0
  139. parrot/bots/hrbot.py +15 -0
  140. parrot/bots/kb.py +170 -0
  141. parrot/bots/mcp.py +36 -0
  142. parrot/bots/orchestration/README.md +463 -0
  143. parrot/bots/orchestration/__init__.py +1 -0
  144. parrot/bots/orchestration/agent.py +155 -0
  145. parrot/bots/orchestration/crew.py +3330 -0
  146. parrot/bots/orchestration/fsm.py +1179 -0
  147. parrot/bots/orchestration/hr.py +434 -0
  148. parrot/bots/orchestration/storage/__init__.py +4 -0
  149. parrot/bots/orchestration/storage/memory.py +100 -0
  150. parrot/bots/orchestration/storage/mixin.py +119 -0
  151. parrot/bots/orchestration/verify.py +202 -0
  152. parrot/bots/product.py +204 -0
  153. parrot/bots/prompts/__init__.py +96 -0
  154. parrot/bots/prompts/agents.py +155 -0
  155. parrot/bots/prompts/data.py +216 -0
  156. parrot/bots/prompts/output_generation.py +8 -0
  157. parrot/bots/scraper/__init__.py +3 -0
  158. parrot/bots/scraper/models.py +122 -0
  159. parrot/bots/scraper/scraper.py +1173 -0
  160. parrot/bots/scraper/templates.py +115 -0
  161. parrot/bots/stores/__init__.py +5 -0
  162. parrot/bots/stores/local.py +172 -0
  163. parrot/bots/webdev.py +81 -0
  164. parrot/cli.py +17 -0
  165. parrot/clients/__init__.py +16 -0
  166. parrot/clients/base.py +1491 -0
  167. parrot/clients/claude.py +1191 -0
  168. parrot/clients/factory.py +129 -0
  169. parrot/clients/google.py +4567 -0
  170. parrot/clients/gpt.py +1975 -0
  171. parrot/clients/grok.py +432 -0
  172. parrot/clients/groq.py +986 -0
  173. parrot/clients/hf.py +582 -0
  174. parrot/clients/models.py +18 -0
  175. parrot/conf.py +395 -0
  176. parrot/embeddings/__init__.py +9 -0
  177. parrot/embeddings/base.py +157 -0
  178. parrot/embeddings/google.py +98 -0
  179. parrot/embeddings/huggingface.py +74 -0
  180. parrot/embeddings/openai.py +84 -0
  181. parrot/embeddings/processor.py +88 -0
  182. parrot/exceptions.c +13868 -0
  183. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  184. parrot/exceptions.pxd +22 -0
  185. parrot/exceptions.pxi +15 -0
  186. parrot/exceptions.pyx +44 -0
  187. parrot/generators/__init__.py +29 -0
  188. parrot/generators/base.py +200 -0
  189. parrot/generators/html.py +293 -0
  190. parrot/generators/react.py +205 -0
  191. parrot/generators/streamlit.py +203 -0
  192. parrot/generators/template.py +105 -0
  193. parrot/handlers/__init__.py +4 -0
  194. parrot/handlers/agent.py +861 -0
  195. parrot/handlers/agents/__init__.py +1 -0
  196. parrot/handlers/agents/abstract.py +900 -0
  197. parrot/handlers/bots.py +338 -0
  198. parrot/handlers/chat.py +915 -0
  199. parrot/handlers/creation.sql +192 -0
  200. parrot/handlers/crew/ARCHITECTURE.md +362 -0
  201. parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
  202. parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
  203. parrot/handlers/crew/__init__.py +0 -0
  204. parrot/handlers/crew/handler.py +801 -0
  205. parrot/handlers/crew/models.py +229 -0
  206. parrot/handlers/crew/redis_persistence.py +523 -0
  207. parrot/handlers/jobs/__init__.py +10 -0
  208. parrot/handlers/jobs/job.py +384 -0
  209. parrot/handlers/jobs/mixin.py +627 -0
  210. parrot/handlers/jobs/models.py +115 -0
  211. parrot/handlers/jobs/worker.py +31 -0
  212. parrot/handlers/models.py +596 -0
  213. parrot/handlers/o365_auth.py +105 -0
  214. parrot/handlers/stream.py +337 -0
  215. parrot/interfaces/__init__.py +6 -0
  216. parrot/interfaces/aws.py +143 -0
  217. parrot/interfaces/credentials.py +113 -0
  218. parrot/interfaces/database.py +27 -0
  219. parrot/interfaces/google.py +1123 -0
  220. parrot/interfaces/hierarchy.py +1227 -0
  221. parrot/interfaces/http.py +651 -0
  222. parrot/interfaces/images/__init__.py +0 -0
  223. parrot/interfaces/images/plugins/__init__.py +24 -0
  224. parrot/interfaces/images/plugins/abstract.py +58 -0
  225. parrot/interfaces/images/plugins/analisys.py +148 -0
  226. parrot/interfaces/images/plugins/classify.py +150 -0
  227. parrot/interfaces/images/plugins/classifybase.py +182 -0
  228. parrot/interfaces/images/plugins/detect.py +150 -0
  229. parrot/interfaces/images/plugins/exif.py +1103 -0
  230. parrot/interfaces/images/plugins/hash.py +52 -0
  231. parrot/interfaces/images/plugins/vision.py +104 -0
  232. parrot/interfaces/images/plugins/yolo.py +66 -0
  233. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  234. parrot/interfaces/o365.py +978 -0
  235. parrot/interfaces/onedrive.py +822 -0
  236. parrot/interfaces/sharepoint.py +1435 -0
  237. parrot/interfaces/soap.py +257 -0
  238. parrot/loaders/__init__.py +8 -0
  239. parrot/loaders/abstract.py +1131 -0
  240. parrot/loaders/audio.py +199 -0
  241. parrot/loaders/basepdf.py +53 -0
  242. parrot/loaders/basevideo.py +1568 -0
  243. parrot/loaders/csv.py +409 -0
  244. parrot/loaders/docx.py +116 -0
  245. parrot/loaders/epubloader.py +316 -0
  246. parrot/loaders/excel.py +199 -0
  247. parrot/loaders/factory.py +55 -0
  248. parrot/loaders/files/__init__.py +0 -0
  249. parrot/loaders/files/abstract.py +39 -0
  250. parrot/loaders/files/html.py +26 -0
  251. parrot/loaders/files/text.py +63 -0
  252. parrot/loaders/html.py +152 -0
  253. parrot/loaders/markdown.py +442 -0
  254. parrot/loaders/pdf.py +373 -0
  255. parrot/loaders/pdfmark.py +320 -0
  256. parrot/loaders/pdftables.py +506 -0
  257. parrot/loaders/ppt.py +476 -0
  258. parrot/loaders/qa.py +63 -0
  259. parrot/loaders/splitters/__init__.py +10 -0
  260. parrot/loaders/splitters/base.py +138 -0
  261. parrot/loaders/splitters/md.py +228 -0
  262. parrot/loaders/splitters/token.py +143 -0
  263. parrot/loaders/txt.py +26 -0
  264. parrot/loaders/video.py +89 -0
  265. parrot/loaders/videolocal.py +218 -0
  266. parrot/loaders/videounderstanding.py +377 -0
  267. parrot/loaders/vimeo.py +167 -0
  268. parrot/loaders/web.py +599 -0
  269. parrot/loaders/youtube.py +504 -0
  270. parrot/manager/__init__.py +5 -0
  271. parrot/manager/manager.py +1030 -0
  272. parrot/mcp/__init__.py +28 -0
  273. parrot/mcp/adapter.py +105 -0
  274. parrot/mcp/cli.py +174 -0
  275. parrot/mcp/client.py +119 -0
  276. parrot/mcp/config.py +75 -0
  277. parrot/mcp/integration.py +842 -0
  278. parrot/mcp/oauth.py +933 -0
  279. parrot/mcp/server.py +225 -0
  280. parrot/mcp/transports/__init__.py +3 -0
  281. parrot/mcp/transports/base.py +279 -0
  282. parrot/mcp/transports/grpc_session.py +163 -0
  283. parrot/mcp/transports/http.py +312 -0
  284. parrot/mcp/transports/mcp.proto +108 -0
  285. parrot/mcp/transports/quic.py +1082 -0
  286. parrot/mcp/transports/sse.py +330 -0
  287. parrot/mcp/transports/stdio.py +309 -0
  288. parrot/mcp/transports/unix.py +395 -0
  289. parrot/mcp/transports/websocket.py +547 -0
  290. parrot/memory/__init__.py +16 -0
  291. parrot/memory/abstract.py +209 -0
  292. parrot/memory/agent.py +32 -0
  293. parrot/memory/cache.py +175 -0
  294. parrot/memory/core.py +555 -0
  295. parrot/memory/file.py +153 -0
  296. parrot/memory/mem.py +131 -0
  297. parrot/memory/redis.py +613 -0
  298. parrot/models/__init__.py +46 -0
  299. parrot/models/basic.py +118 -0
  300. parrot/models/compliance.py +208 -0
  301. parrot/models/crew.py +395 -0
  302. parrot/models/detections.py +654 -0
  303. parrot/models/generation.py +85 -0
  304. parrot/models/google.py +223 -0
  305. parrot/models/groq.py +23 -0
  306. parrot/models/openai.py +30 -0
  307. parrot/models/outputs.py +285 -0
  308. parrot/models/responses.py +938 -0
  309. parrot/notifications/__init__.py +743 -0
  310. parrot/openapi/__init__.py +3 -0
  311. parrot/openapi/components.yaml +641 -0
  312. parrot/openapi/config.py +322 -0
  313. parrot/outputs/__init__.py +32 -0
  314. parrot/outputs/formats/__init__.py +108 -0
  315. parrot/outputs/formats/altair.py +359 -0
  316. parrot/outputs/formats/application.py +122 -0
  317. parrot/outputs/formats/base.py +351 -0
  318. parrot/outputs/formats/bokeh.py +356 -0
  319. parrot/outputs/formats/card.py +424 -0
  320. parrot/outputs/formats/chart.py +436 -0
  321. parrot/outputs/formats/d3.py +255 -0
  322. parrot/outputs/formats/echarts.py +310 -0
  323. parrot/outputs/formats/generators/__init__.py +0 -0
  324. parrot/outputs/formats/generators/abstract.py +61 -0
  325. parrot/outputs/formats/generators/panel.py +145 -0
  326. parrot/outputs/formats/generators/streamlit.py +86 -0
  327. parrot/outputs/formats/generators/terminal.py +63 -0
  328. parrot/outputs/formats/holoviews.py +310 -0
  329. parrot/outputs/formats/html.py +147 -0
  330. parrot/outputs/formats/jinja2.py +46 -0
  331. parrot/outputs/formats/json.py +87 -0
  332. parrot/outputs/formats/map.py +933 -0
  333. parrot/outputs/formats/markdown.py +172 -0
  334. parrot/outputs/formats/matplotlib.py +237 -0
  335. parrot/outputs/formats/mixins/__init__.py +0 -0
  336. parrot/outputs/formats/mixins/emaps.py +855 -0
  337. parrot/outputs/formats/plotly.py +341 -0
  338. parrot/outputs/formats/seaborn.py +310 -0
  339. parrot/outputs/formats/table.py +397 -0
  340. parrot/outputs/formats/template_report.py +138 -0
  341. parrot/outputs/formats/yaml.py +125 -0
  342. parrot/outputs/formatter.py +152 -0
  343. parrot/outputs/templates/__init__.py +95 -0
  344. parrot/pipelines/__init__.py +0 -0
  345. parrot/pipelines/abstract.py +210 -0
  346. parrot/pipelines/detector.py +124 -0
  347. parrot/pipelines/models.py +90 -0
  348. parrot/pipelines/planogram.py +3002 -0
  349. parrot/pipelines/table.sql +97 -0
  350. parrot/plugins/__init__.py +106 -0
  351. parrot/plugins/importer.py +80 -0
  352. parrot/py.typed +0 -0
  353. parrot/registry/__init__.py +18 -0
  354. parrot/registry/registry.py +594 -0
  355. parrot/scheduler/__init__.py +1189 -0
  356. parrot/scheduler/models.py +60 -0
  357. parrot/security/__init__.py +16 -0
  358. parrot/security/prompt_injection.py +268 -0
  359. parrot/security/security_events.sql +25 -0
  360. parrot/services/__init__.py +1 -0
  361. parrot/services/mcp/__init__.py +8 -0
  362. parrot/services/mcp/config.py +13 -0
  363. parrot/services/mcp/server.py +295 -0
  364. parrot/services/o365_remote_auth.py +235 -0
  365. parrot/stores/__init__.py +7 -0
  366. parrot/stores/abstract.py +352 -0
  367. parrot/stores/arango.py +1090 -0
  368. parrot/stores/bigquery.py +1377 -0
  369. parrot/stores/cache.py +106 -0
  370. parrot/stores/empty.py +10 -0
  371. parrot/stores/faiss_store.py +1157 -0
  372. parrot/stores/kb/__init__.py +9 -0
  373. parrot/stores/kb/abstract.py +68 -0
  374. parrot/stores/kb/cache.py +165 -0
  375. parrot/stores/kb/doc.py +325 -0
  376. parrot/stores/kb/hierarchy.py +346 -0
  377. parrot/stores/kb/local.py +457 -0
  378. parrot/stores/kb/prompt.py +28 -0
  379. parrot/stores/kb/redis.py +659 -0
  380. parrot/stores/kb/store.py +115 -0
  381. parrot/stores/kb/user.py +374 -0
  382. parrot/stores/models.py +59 -0
  383. parrot/stores/pgvector.py +3 -0
  384. parrot/stores/postgres.py +2853 -0
  385. parrot/stores/utils/__init__.py +0 -0
  386. parrot/stores/utils/chunking.py +197 -0
  387. parrot/telemetry/__init__.py +3 -0
  388. parrot/telemetry/mixin.py +111 -0
  389. parrot/template/__init__.py +3 -0
  390. parrot/template/engine.py +259 -0
  391. parrot/tools/__init__.py +23 -0
  392. parrot/tools/abstract.py +644 -0
  393. parrot/tools/agent.py +363 -0
  394. parrot/tools/arangodbsearch.py +537 -0
  395. parrot/tools/arxiv_tool.py +188 -0
  396. parrot/tools/calculator/__init__.py +3 -0
  397. parrot/tools/calculator/operations/__init__.py +38 -0
  398. parrot/tools/calculator/operations/calculus.py +80 -0
  399. parrot/tools/calculator/operations/statistics.py +76 -0
  400. parrot/tools/calculator/tool.py +150 -0
  401. parrot/tools/cloudwatch.py +988 -0
  402. parrot/tools/codeinterpreter/__init__.py +127 -0
  403. parrot/tools/codeinterpreter/executor.py +371 -0
  404. parrot/tools/codeinterpreter/internals.py +473 -0
  405. parrot/tools/codeinterpreter/models.py +643 -0
  406. parrot/tools/codeinterpreter/prompts.py +224 -0
  407. parrot/tools/codeinterpreter/tool.py +664 -0
  408. parrot/tools/company_info/__init__.py +6 -0
  409. parrot/tools/company_info/tool.py +1138 -0
  410. parrot/tools/correlationanalysis.py +437 -0
  411. parrot/tools/database/abstract.py +286 -0
  412. parrot/tools/database/bq.py +115 -0
  413. parrot/tools/database/cache.py +284 -0
  414. parrot/tools/database/models.py +95 -0
  415. parrot/tools/database/pg.py +343 -0
  416. parrot/tools/databasequery.py +1159 -0
  417. parrot/tools/db.py +1800 -0
  418. parrot/tools/ddgo.py +370 -0
  419. parrot/tools/decorators.py +271 -0
  420. parrot/tools/dftohtml.py +282 -0
  421. parrot/tools/document.py +549 -0
  422. parrot/tools/ecs.py +819 -0
  423. parrot/tools/edareport.py +368 -0
  424. parrot/tools/elasticsearch.py +1049 -0
  425. parrot/tools/employees.py +462 -0
  426. parrot/tools/epson/__init__.py +96 -0
  427. parrot/tools/excel.py +683 -0
  428. parrot/tools/file/__init__.py +13 -0
  429. parrot/tools/file/abstract.py +76 -0
  430. parrot/tools/file/gcs.py +378 -0
  431. parrot/tools/file/local.py +284 -0
  432. parrot/tools/file/s3.py +511 -0
  433. parrot/tools/file/tmp.py +309 -0
  434. parrot/tools/file/tool.py +501 -0
  435. parrot/tools/file_reader.py +129 -0
  436. parrot/tools/flowtask/__init__.py +19 -0
  437. parrot/tools/flowtask/tool.py +761 -0
  438. parrot/tools/gittoolkit.py +508 -0
  439. parrot/tools/google/__init__.py +18 -0
  440. parrot/tools/google/base.py +169 -0
  441. parrot/tools/google/tools.py +1251 -0
  442. parrot/tools/googlelocation.py +5 -0
  443. parrot/tools/googleroutes.py +5 -0
  444. parrot/tools/googlesearch.py +5 -0
  445. parrot/tools/googlesitesearch.py +5 -0
  446. parrot/tools/googlevoice.py +2 -0
  447. parrot/tools/gvoice.py +695 -0
  448. parrot/tools/ibisworld/README.md +225 -0
  449. parrot/tools/ibisworld/__init__.py +11 -0
  450. parrot/tools/ibisworld/tool.py +366 -0
  451. parrot/tools/jiratoolkit.py +1718 -0
  452. parrot/tools/manager.py +1098 -0
  453. parrot/tools/math.py +152 -0
  454. parrot/tools/metadata.py +476 -0
  455. parrot/tools/msteams.py +1621 -0
  456. parrot/tools/msword.py +635 -0
  457. parrot/tools/multidb.py +580 -0
  458. parrot/tools/multistoresearch.py +369 -0
  459. parrot/tools/networkninja.py +167 -0
  460. parrot/tools/nextstop/__init__.py +4 -0
  461. parrot/tools/nextstop/base.py +286 -0
  462. parrot/tools/nextstop/employee.py +733 -0
  463. parrot/tools/nextstop/store.py +462 -0
  464. parrot/tools/notification.py +435 -0
  465. parrot/tools/o365/__init__.py +42 -0
  466. parrot/tools/o365/base.py +295 -0
  467. parrot/tools/o365/bundle.py +522 -0
  468. parrot/tools/o365/events.py +554 -0
  469. parrot/tools/o365/mail.py +992 -0
  470. parrot/tools/o365/onedrive.py +497 -0
  471. parrot/tools/o365/sharepoint.py +641 -0
  472. parrot/tools/openapi_toolkit.py +904 -0
  473. parrot/tools/openweather.py +527 -0
  474. parrot/tools/pdfprint.py +1001 -0
  475. parrot/tools/powerbi.py +518 -0
  476. parrot/tools/powerpoint.py +1113 -0
  477. parrot/tools/pricestool.py +146 -0
  478. parrot/tools/products/__init__.py +246 -0
  479. parrot/tools/prophet_tool.py +171 -0
  480. parrot/tools/pythonpandas.py +630 -0
  481. parrot/tools/pythonrepl.py +910 -0
  482. parrot/tools/qsource.py +436 -0
  483. parrot/tools/querytoolkit.py +395 -0
  484. parrot/tools/quickeda.py +827 -0
  485. parrot/tools/resttool.py +553 -0
  486. parrot/tools/retail/__init__.py +0 -0
  487. parrot/tools/retail/bby.py +528 -0
  488. parrot/tools/sandboxtool.py +703 -0
  489. parrot/tools/sassie/__init__.py +352 -0
  490. parrot/tools/scraping/__init__.py +7 -0
  491. parrot/tools/scraping/docs/select.md +466 -0
  492. parrot/tools/scraping/documentation.md +1278 -0
  493. parrot/tools/scraping/driver.py +436 -0
  494. parrot/tools/scraping/models.py +576 -0
  495. parrot/tools/scraping/options.py +85 -0
  496. parrot/tools/scraping/orchestrator.py +517 -0
  497. parrot/tools/scraping/readme.md +740 -0
  498. parrot/tools/scraping/tool.py +3115 -0
  499. parrot/tools/seasonaldetection.py +642 -0
  500. parrot/tools/shell_tool/__init__.py +5 -0
  501. parrot/tools/shell_tool/actions.py +408 -0
  502. parrot/tools/shell_tool/engine.py +155 -0
  503. parrot/tools/shell_tool/models.py +322 -0
  504. parrot/tools/shell_tool/tool.py +442 -0
  505. parrot/tools/site_search.py +214 -0
  506. parrot/tools/textfile.py +418 -0
  507. parrot/tools/think.py +378 -0
  508. parrot/tools/toolkit.py +298 -0
  509. parrot/tools/webapp_tool.py +187 -0
  510. parrot/tools/whatif.py +1279 -0
  511. parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
  512. parrot/tools/workday/__init__.py +6 -0
  513. parrot/tools/workday/models.py +1389 -0
  514. parrot/tools/workday/tool.py +1293 -0
  515. parrot/tools/yfinance_tool.py +306 -0
  516. parrot/tools/zipcode.py +217 -0
  517. parrot/utils/__init__.py +2 -0
  518. parrot/utils/helpers.py +73 -0
  519. parrot/utils/parsers/__init__.py +5 -0
  520. parrot/utils/parsers/toml.c +12078 -0
  521. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  522. parrot/utils/parsers/toml.pyx +21 -0
  523. parrot/utils/toml.py +11 -0
  524. parrot/utils/types.cpp +20936 -0
  525. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  526. parrot/utils/types.pyx +213 -0
  527. parrot/utils/uv.py +11 -0
  528. parrot/version.py +10 -0
  529. parrot/yaml-rs/Cargo.lock +350 -0
  530. parrot/yaml-rs/Cargo.toml +19 -0
  531. parrot/yaml-rs/pyproject.toml +19 -0
  532. parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
  533. parrot/yaml-rs/src/lib.rs +222 -0
  534. requirements/docker-compose.yml +24 -0
  535. requirements/requirements-dev.txt +21 -0
@@ -0,0 +1,1435 @@
1
+ import os
2
+ import re
3
+ import asyncio
4
+ from typing import List, Optional, Union, Dict, Any
5
+ import contextlib
6
+ from pathlib import Path, PurePath
7
+ from datetime import datetime, timedelta, timezone
8
+ from urllib.parse import urlparse, quote, unquote
9
+ import aiofiles
10
+ from tqdm import tqdm
11
+ import httpx
12
+ import aiohttp
13
+ # Microsoft Graph SDK imports (replacing office365-rest-python-client)
14
+ from msgraph.generated.models.subscription import Subscription
15
+ from msgraph.generated.models.drive_item import DriveItem
16
+ from msgraph.generated.models.folder import Folder
17
+ from msgraph.generated.models.file import File
18
+ from msgraph.generated.models.upload_session import UploadSession
19
+ from msgraph.generated.drives.item.items.item.create_upload_session.create_upload_session_post_request_body import (
20
+ CreateUploadSessionPostRequestBody
21
+ )
22
+ from msgraph.generated.models.drive_item_uploadable_properties import DriveItemUploadableProperties
23
+ from .o365 import O365Client
24
+ from ..conf import (
25
+ SHAREPOINT_APP_ID,
26
+ SHAREPOINT_APP_SECRET,
27
+ SHAREPOINT_TENANT_ID,
28
+ SHAREPOINT_TENANT_NAME
29
+ )
30
+
31
+
32
+ class SharepointClient(O365Client):
33
+ """
34
+ SharePoint Client - Migrated to Microsoft Graph SDK
35
+
36
+ Uses Microsoft Graph SDK for all SharePoint operations.
37
+
38
+ """
39
+
40
+ def __init__(self, *args, **kwargs):
41
+ super().__init__(*args, **kwargs)
42
+
43
+ # Default credentials for SharePoint-specific config
44
+ self._default_tenant_id = SHAREPOINT_TENANT_ID
45
+ self._default_client_id = SHAREPOINT_APP_ID
46
+ self._default_client_secret = SHAREPOINT_APP_SECRET
47
+ self._default_tenant_name = SHAREPOINT_TENANT_NAME
48
+
49
+ # SharePoint-specific properties
50
+ self.directory: Optional[str] = None
51
+ self.filename: Optional[str] = None
52
+ self._srcfiles: List = []
53
+ self._destination: List = []
54
+
55
+ # Upload settings
56
+ self.small_file_threshold = 4 * 1024 * 1024 # 4 MB
57
+ self.chunk_size = 10 * 1024 * 1024 # 10 MB
58
+
59
+ # Cached SharePoint objects
60
+ self._site_id: Optional[str] = None
61
+ self._drive_id: Optional[str] = None
62
+ self._site_info: Optional[DriveItem] = None
63
+ self._drive_info: Optional[DriveItem] = None
64
+
65
+ def get_context(self, url: str, *args):
66
+ """
67
+ Backwards compatibility method.
68
+ Returns the Graph client instead of office365 context.
69
+ """
70
+ return self.graph_client
71
+
72
+ def _start_(self, **kwargs):
73
+ """Initialize SharePoint-specific configuration."""
74
+ # Process URL and site information
75
+ site = f"sites/{self.site}/" if self.site is not None else ""
76
+ self.site_url = f"https://{self.tenant}.sharepoint.com"
77
+ self.url = f"{self.site_url}/{site}".rstrip('/')
78
+ self.logger.info(
79
+ f"SharePoint target: {self.url}"
80
+ )
81
+ return True
82
+
83
+ def connection(self):
84
+ """
85
+ Establish SharePoint connection using the migrated O365Client.
86
+
87
+ This replaces the old office365-rest-python-client authentication
88
+ with Microsoft Graph SDK authentication.
89
+ """
90
+ # Use the parent O365Client connection method
91
+ super().connection()
92
+
93
+ self.logger.info("SharePoint connection established successfully")
94
+ return self
95
+
96
+ async def verify_sharepoint_access(self):
97
+ """Verify SharePoint-specific access and cache site/drive info."""
98
+ try:
99
+ # Resolve and cache site info
100
+ self._site_info = await self._resolve_site()
101
+ self.logger.info(
102
+ f"SharePoint site accessible: {self._site_info.display_name}"
103
+ )
104
+
105
+ # Update the URL if sub-site was detected
106
+ if hasattr(self, '_site_info') and self._site_info:
107
+ # Reconstruct URL based on actual site used
108
+ actual_site_path = self._site_info.web_url.split('/sites/')[-1] if self._site_info.web_url else self.site
109
+ self.url = f"https://{self.tenant}.sharepoint.com/sites/{actual_site_path}"
110
+ self.logger.debug(f"Updated SharePoint URL: {self.url}")
111
+
112
+ except Exception as e:
113
+ self.logger.error(
114
+ f"SharePoint access verification failed: {e}"
115
+ )
116
+ raise RuntimeError(
117
+ f"SharePoint access verification failed: {e}"
118
+ ) from e
119
+
120
+ async def _detect_and_resolve_subsite(self) -> tuple[str, str]:
121
+ """
122
+ Detect if the first part of the directory path is a sub-site.
123
+
124
+ Returns:
125
+ tuple: (actual_site_to_use, cleaned_directory_path)
126
+ """
127
+ # Get the directory from _srcfiles
128
+ if not hasattr(self, '_srcfiles') or not self._srcfiles:
129
+ return self.site, ""
130
+
131
+ first_file = self._srcfiles[0]
132
+ directory_raw = first_file.get('directory', '') if isinstance(first_file, dict) else ''
133
+
134
+ if not directory_raw:
135
+ return self.site, ""
136
+
137
+ directory = directory_raw.replace("\\", "/").strip().strip("/")
138
+ if not directory:
139
+ return self.site, ""
140
+
141
+ parts = directory.split("/")
142
+ potential_subsite = parts[0]
143
+ remaining_path = "/".join(parts[1:]) if len(parts) > 1 else ""
144
+
145
+ # Try to access the potential sub-site
146
+ with contextlib.suppress(Exception):
147
+ subsite_path = f"{self.site}/{potential_subsite}"
148
+ site_identifier = f"{self.tenant}.sharepoint.com:/sites/{subsite_path}"
149
+
150
+ self.logger.debug(f"Testing potential sub-site: {site_identifier}")
151
+
152
+ # Try to access the sub-site
153
+ site = await self.graph_client.sites.by_site_id(site_identifier).get()
154
+
155
+ if site and site.id:
156
+ self.logger.info(f"Detected sub-site: {potential_subsite}")
157
+
158
+ # Update all _srcfiles to remove the sub-site part from directory
159
+ for file_spec in self._srcfiles:
160
+ if isinstance(file_spec, dict) and 'directory' in file_spec:
161
+ old_dir = file_spec['directory']
162
+ # Remove the sub-site part
163
+ clean_parts = old_dir.replace("\\", "/").strip().strip("/").split("/")
164
+ if len(clean_parts) > 1 and clean_parts[0] == potential_subsite:
165
+ new_dir = "/".join(clean_parts[1:])
166
+ file_spec['directory'] = new_dir
167
+ self.logger.debug(f"Updated directory: '{old_dir}' → '{new_dir}'")
168
+
169
+ return subsite_path, remaining_path
170
+
171
+ # Not a sub-site, return original
172
+ return self.site, directory
173
+
174
+ async def _resolve_site(self) -> DriveItem:
175
+ """Resolve SharePoint site using Graph API with auto sub-site detection."""
176
+ if self._site_info:
177
+ return self._site_info
178
+
179
+ try:
180
+ # Detect if we need to use a sub-site
181
+ actual_site, _ = await self._detect_and_resolve_subsite()
182
+
183
+ site_path = f"/sites/{actual_site}" if actual_site else ""
184
+ site_identifier = f"{self.tenant}.sharepoint.com:{site_path}"
185
+
186
+ self.logger.debug(
187
+ f"Resolving site: {site_identifier}"
188
+ )
189
+ site = await self.graph_client.sites.by_site_id(site_identifier).get()
190
+
191
+ if site and site.id:
192
+ self._site_id = site.id
193
+ self._site_info = site
194
+ self.logger.info(
195
+ f"Site resolved: {site.display_name}"
196
+ )
197
+ return site
198
+ else:
199
+ raise RuntimeError(
200
+ f"Could not resolve SharePoint site: {site_identifier}"
201
+ )
202
+
203
+ except Exception as e:
204
+ raise RuntimeError(
205
+ f"Failed to resolve SharePoint site: {e}"
206
+ ) from e
207
+
208
+ def _parse_directory_path(self, directory: str) -> tuple[str, str]:
209
+ """
210
+ Parse directory path to extract library name and folder path.
211
+
212
+ Examples:
213
+ - "troc/Project Management/Epson/Store and Product MSL"
214
+ → library: "troc", path: "Project Management/Epson/Store and Product MSL"
215
+ - "Shared Documents/Stores/"
216
+ → library: "Shared Documents", path: "Stores"
217
+ - "Documents/folder/subfolder"
218
+ → library: "Documents", path: "folder/subfolder"
219
+ """
220
+ if not directory:
221
+ return "Documents", "" # Default library
222
+
223
+ directory = directory.replace("\\", "/").strip().strip("/")
224
+ if not directory:
225
+ return "Documents", ""
226
+
227
+ parts = directory.split("/")
228
+
229
+ # First part is the library name
230
+ library_name = parts[0]
231
+ # Rest is the path within that library
232
+ path_within_library = "/".join(parts[1:]) if len(parts) > 1 else ""
233
+
234
+ self.logger.debug(
235
+ f"Parsed directory '{directory}' → library: '{library_name}', path: '{path_within_library}'"
236
+ )
237
+ if library_name.lower() == "shared documents":
238
+ library_name = "Documents"
239
+
240
+ return library_name, path_within_library
241
+
242
+ async def _resolve_drive(self, library_name: str = None) -> DriveItem:
243
+ """Resolve document library drive using Graph API with dynamic library name."""
244
+ if self._drive_info and not library_name:
245
+ return self._drive_info
246
+
247
+ try:
248
+ site_info = await self._resolve_site()
249
+ drives = await self.graph_client.sites.by_site_id(site_info.id).drives.get()
250
+ if drives and drives.value:
251
+ self.logger.debug(
252
+ f"Available libraries: {[d.name for d in drives.value]}"
253
+ )
254
+
255
+ # If library_name specified, try to find it
256
+ if library_name:
257
+ for drive in drives.value:
258
+ if drive.name.lower() == library_name.lower(): # Case insensitive match
259
+ self.logger.info(f"Found library: {drive.name}")
260
+ # Don't cache if we're doing a specific lookup
261
+ return drive
262
+
263
+ # Library not found by name, log available options
264
+ available_names = [d.name for d in drives.value]
265
+ self.logger.warning(
266
+ f"Library '{library_name}' not found. Available: {available_names}"
267
+ )
268
+
269
+ # Try common name mappings
270
+ if library_name.lower() == "shared documents":
271
+ for drive in drives.value:
272
+ if drive.name.lower() in ["documents", "shared documents"]:
273
+ self.logger.info(f"Using '{drive.name}' for 'Shared Documents'")
274
+ return drive
275
+
276
+ raise RuntimeError(
277
+ f"Library '{library_name}' not found. Available: {available_names}"
278
+ )
279
+
280
+ # No specific library requested, use cached or default
281
+ if self._drive_info:
282
+ return self._drive_info
283
+
284
+ # Default to first drive and cache it
285
+ default_drive = drives.value[0]
286
+ self._drive_id = default_drive.id
287
+ self._drive_info = default_drive
288
+ self.logger.info(f"Using default library: {default_drive.name}")
289
+ return default_drive
290
+
291
+ raise RuntimeError(
292
+ f"No document libraries found in site: {site_info.display_name}"
293
+ )
294
+
295
+ except Exception as e:
296
+ raise RuntimeError(f"Failed to resolve document library: {e}") from e
297
+
298
+ async def _ensure_folder(self, folder_path: str, create: bool = True, drive_id: str = None) -> DriveItem:
299
+ """Ensure folder exists using Graph API, optionally in a specific library."""
300
+
301
+ # If no drive_id specified, get the default drive
302
+ if not drive_id:
303
+ drive_info = await self._resolve_drive()
304
+ drive_id = drive_info.id
305
+
306
+ folder_path = (folder_path or "").strip("/")
307
+ if not folder_path:
308
+ # Return root folder of the specified drive
309
+ root = await self.graph_client.drives.by_drive_id(drive_id).root.get()
310
+ return root
311
+
312
+ # Try to resolve existing folder
313
+ try:
314
+ folder_item = await self.graph_client.drives.by_drive_id(drive_id)\
315
+ .items.by_drive_item_id(f"root:/{folder_path}:").get()
316
+ if folder_item:
317
+ return folder_item
318
+ except Exception:
319
+ if not create:
320
+ raise
321
+
322
+ # Create folder recursively
323
+ root = await self.graph_client.drives.by_drive_id(drive_id).root.get()
324
+ parent_id = root.id
325
+
326
+ for segment in [s for s in folder_path.split("/") if s]:
327
+ # Check if segment already exists
328
+ children = await self.graph_client.drives.by_drive_id(drive_id)\
329
+ .items.by_drive_item_id(parent_id).children.get()
330
+
331
+ existing_folder = None
332
+ if children and children.value:
333
+ for child in children.value:
334
+ if child.name == segment and child.folder:
335
+ existing_folder = child
336
+ break
337
+
338
+ if existing_folder:
339
+ parent_id = existing_folder.id
340
+ continue
341
+
342
+ # Create new folder
343
+ new_folder = DriveItem()
344
+ new_folder.name = segment
345
+ new_folder.folder = Folder()
346
+ new_folder.additional_data = {
347
+ "@microsoft.graph.conflictBehavior": "replace"
348
+ }
349
+
350
+ created = await self.graph_client.drives.by_drive_id(drive_id)\
351
+ .items.by_drive_item_id(parent_id).children.post(new_folder)
352
+ parent_id = created.id
353
+ self.logger.info(f"Created folder: {segment}")
354
+
355
+ # Return the final folder
356
+ return await self.graph_client.drives.by_drive_id(drive_id)\
357
+ .items.by_drive_item_id(parent_id).get()
358
+
359
+ async def _build_full_path(self, drive_id: str, parent_id: str, filename: str) -> str:
360
+ """Return path relative to drive root: e.g. 'Shared Documents/Sub/Folder/output.pptx'."""
361
+ parent = await self.graph_client.drives.by_drive_id(drive_id).items.by_drive_item_id(parent_id).get()
362
+ base = (parent.parent_reference.path or "") # e.g. '/drives/{driveId}/root:/Shared Documents/Sub/Folder'
363
+ # Strip the "/drives/{id}/root:" prefix to make it drive-root relative
364
+ marker = "/root:"
365
+ idx = base.find(marker)
366
+ if idx != -1:
367
+ base = base[idx + len(marker):]
368
+ base = base.strip("/")
369
+ # Path already points to the parent folder itself, so just append the filename
370
+ return f"{base}/{filename}".strip("/")
371
+
372
+ async def _upload_small_file(self, drive_id, parent_id, local_path, target_name):
373
+ try:
374
+ async with aiofiles.open(local_path, "rb") as f:
375
+ content = await f.read()
376
+
377
+ # URL encode the target name to handle special characters
378
+ encoded_name = quote(target_name)
379
+
380
+ # Use the direct content upload endpoint for small files with conflict behavior
381
+ # PUT /drives/{driveId}/items/{parentId}:/{filename}:/content?@microsoft.graph.conflictBehavior=replace
382
+ request_path = f"{parent_id}:/{encoded_name}:"
383
+
384
+ # The Graph SDK may not support query parameters directly on the content endpoint
385
+ # So we use the basic path and let the SDK handle the upload
386
+ return await self.graph_client.drives.by_drive_id(drive_id).items.by_drive_item_id(request_path).content.put(content) # noqa
387
+ except Exception as e:
388
+ raise RuntimeError(f"Small file upload failed for {target_name}: {e}") from e
389
+
390
+ async def _create_upload_session(self, drive_id: str, parent_id: str, target_name: str) -> UploadSession:
391
+ try:
392
+ body = CreateUploadSessionPostRequestBody()
393
+ body.item = DriveItemUploadableProperties()
394
+ # no "name" here; filename is in the URL
395
+ body.item.additional_data = {"@microsoft.graph.conflictBehavior": "replace"}
396
+
397
+ # URL encode the target name to handle special characters
398
+ encoded_name = quote(target_name)
399
+
400
+ # POST /drives/{driveId}/items/{parentId}:/{fileName}:/createUploadSession
401
+ return await self.graph_client.drives.by_drive_id(drive_id)\
402
+ .items.by_drive_item_id(f"{parent_id}:/{encoded_name}:/")\
403
+ .create_upload_session.post(body)
404
+
405
+ except Exception as e:
406
+ raise RuntimeError(f"Upload session creation failed for {target_name}: {e}") from e
407
+
408
+ async def _upload_large_file(
409
+ self,
410
+ upload_session: UploadSession,
411
+ local_path: Union[str, Path]
412
+ ) -> DriveItem:
413
+ """Upload large file using resumable upload session."""
414
+
415
+ file_size = os.path.getsize(local_path)
416
+ uploaded = 0
417
+ async with aiohttp.ClientSession() as session:
418
+ async with aiofiles.open(local_path, "rb") as f:
419
+ with tqdm(total=file_size, unit='B', unit_scale=True, desc=f'Uploading {Path(local_path).name}') as pbar: # noqa
420
+ while uploaded < file_size:
421
+ chunk = await f.read(self.chunk_size)
422
+ if not chunk:
423
+ break
424
+
425
+ start = uploaded
426
+ end = uploaded + len(chunk) - 1
427
+
428
+ headers = {
429
+ "Content-Length": str(len(chunk)),
430
+ "Content-Range": f"bytes {start}-{end}/{file_size}"
431
+ }
432
+
433
+ async with session.put(
434
+ upload_session.upload_url,
435
+ headers=headers,
436
+ data=chunk
437
+ ) as response:
438
+ if response.status in (200, 201):
439
+ # Upload complete
440
+ pbar.update(file_size - uploaded)
441
+ result_data = await response.json()
442
+
443
+ # Convert to DriveItem (simplified)
444
+ drive_item = DriveItem()
445
+ drive_item.name = result_data.get('name')
446
+ drive_item.size = result_data.get('size')
447
+ drive_item.web_url = result_data.get('webUrl')
448
+ drive_item.additional_data = result_data
449
+
450
+ return drive_item
451
+
452
+ elif response.status == 202:
453
+ # Continue uploading
454
+ uploaded = end + 1
455
+ pbar.update(len(chunk))
456
+
457
+ # Check for retry-after header
458
+ if (retry_after := response.headers.get('Retry-After')):
459
+ await asyncio.sleep(int(retry_after))
460
+ continue
461
+
462
+ else:
463
+ error_text = await response.text()
464
+ raise RuntimeError(
465
+ f"Chunk upload failed: {response.status} {error_text}"
466
+ )
467
+
468
+ raise RuntimeError(
469
+ "Upload session completed without final item response"
470
+ )
471
+
472
+ def _normalize_directory(self, directory: str, drive_info) -> str:
473
+ """
474
+ Normalize a SharePoint directory path so it is **relative to the drive root**.
475
+
476
+ Accepts inputs like:
477
+ - "Project Management/Epson/Store and Product MSL"
478
+ - "Documents/Project Management/Epson/Store and Product MSL"
479
+ - "Shared Documents/Project Management/..."
480
+ - "sites/<site>/Shared Documents/Project Management/..."
481
+ - "troc/Documents/Project Management/..." # stray tenant/site prefix
482
+ - "/drives/<id>/root:/Project Management/... # SDK-style path"
483
+ - "/drive/root:/Project Management/..."
484
+
485
+ Returns a drive-root relative string such as:
486
+ "Project Management/Epson/Store and Product MSL" (or "" for library root)
487
+ """
488
+ if not directory:
489
+ return ""
490
+
491
+ p = directory.replace("\\", "/").strip()
492
+ if not p:
493
+ return ""
494
+
495
+ self.logger.debug(f"Normalizing directory: '{directory}' -> initial clean: '{p}'")
496
+
497
+ # If path contains a /root: prefix, strip everything up to it.
498
+ if "/root:" in p:
499
+ p = p.split("/root:", 1)[1]
500
+
501
+ # Strip any leading "root:" marker and leading/trailing slashes
502
+ p = p.lstrip("root:").strip("/")
503
+
504
+ # Split into parts and work in lower for comparisons
505
+ parts = [seg for seg in p.split("/") if seg]
506
+ lower = [seg.lower() for seg in parts]
507
+
508
+ # Helper: return remainder after a given index
509
+ def after(idx: int) -> str:
510
+ return "/".join(parts[idx + 1:])
511
+
512
+ # 1) If the path contains a document library segment, keep only what's after it
513
+ for i, seg in enumerate(lower):
514
+ if seg in ("shared documents", "documents"):
515
+ normalized = after(i)
516
+ self.logger.debug(f"Removed library segment '{parts[i]}', remaining: '{normalized}'")
517
+ self.logger.debug(f"Final normalized directory: '{directory}' -> '{normalized}'")
518
+ return normalized
519
+
520
+ # 2) If it starts with 'sites/<sitename>/*', drop those two segments, then
521
+ # drop a leading library name if it immediately follows.
522
+ if len(lower) >= 2 and lower[0] == "sites":
523
+ parts = parts[2:]
524
+ lower = lower[2:]
525
+ if parts:
526
+ if lower and lower[0] in ("shared documents", "documents"):
527
+ parts = parts[1:]
528
+ normalized = "/".join(parts)
529
+ self.logger.debug(f"Removed '/sites/<site>/' prefix, remaining: '{normalized}'")
530
+ self.logger.debug(f"Final normalized directory: '{directory}' -> '{normalized}'")
531
+ return normalized
532
+
533
+ # 3) If the first segment equals site/tenant name (stray prefix), drop it and retry library removal
534
+ stray_prefixes = set()
535
+ if getattr(self, "site", None):
536
+ stray_prefixes.add(str(self.site).strip("/").lower())
537
+ if getattr(self, "tenant", None):
538
+ stray_prefixes.add(str(self.tenant).strip("/").lower())
539
+
540
+ if lower and lower[0] in stray_prefixes:
541
+ parts = parts[1:]
542
+ lower = lower[1:]
543
+ # If next is a library name, drop it as well
544
+ if lower and lower[0] in ("shared documents", "documents"):
545
+ parts = parts[1:]
546
+ normalized = "/".join(parts)
547
+ self.logger.debug(f"Removed stray site/tenant prefix, remaining: '{normalized}'")
548
+ self.logger.debug(f"Final normalized directory: '{directory}' -> '{normalized}'")
549
+ return normalized
550
+
551
+ # Otherwise assume it's already drive-root relative
552
+ normalized = "/".join(parts)
553
+ self.logger.debug(f"Final normalized directory: '{directory}' -> '{normalized}'")
554
+ return normalized
555
+
556
+ def _to_colon_id(self, directory: str, name: str) -> str:
557
+ """
558
+ Build a root-based colon id with URL-encoded segments:
559
+ "root:/dir1/dir2/file.ext:"
560
+ """
561
+ dir_clean = "/".join(quote(seg, safe="") for seg in (directory.strip("/").split("/") if directory else []))
562
+ name_enc = quote(name, safe="")
563
+ return f"root:/{dir_clean}/{name_enc}:" if dir_clean else f"root:/{name_enc}:"
564
+
565
+ async def upload_files(
566
+ self,
567
+ filenames: Optional[List[Union[Path, PurePath, str]]] = None,
568
+ destination: Optional[str] = None,
569
+ destination_filenames: Optional[List[str]] = None,
570
+ ) -> List[Dict[str, Any]]:
571
+ """
572
+ Upload files to SharePoint using Microsoft Graph API.
573
+
574
+ This replaces the old office365-rest-python-client upload method.
575
+ """
576
+ if not filenames:
577
+ filenames = getattr(self, '_srcfiles', [])
578
+
579
+ target_folder = destination or getattr(self, 'directory', '')
580
+ # Again: Validate destination names (if provided)
581
+ if destination_filenames is not None and len(destination_filenames) != len(filenames):
582
+ raise RuntimeError(
583
+ "destination_filenames length must match filenames length"
584
+ )
585
+
586
+ # Parse the directory to extract library and path (same as file_search and file_lookup)
587
+ library_name, path_within_library = self._parse_directory_path(target_folder)
588
+
589
+ # Get the specific library
590
+ try:
591
+ drive_info = await self._resolve_drive(library_name)
592
+ self.logger.debug(f"Using library: {drive_info.name} (ID: {drive_info.id})")
593
+ except Exception as e:
594
+ self.logger.error(f"Failed to access library '{library_name}': {e}")
595
+ # Fall back to default library
596
+ drive_info = await self._resolve_drive()
597
+
598
+ # Ensure target folder exists using the path within the library
599
+ folder_info = await self._ensure_folder(path_within_library, create=True, drive_id=drive_info.id)
600
+
601
+ results: List[Dict[str, Any]] = []
602
+
603
+ target_folder = path_within_library or '/'
604
+
605
+ for idx, file_path in enumerate(filenames):
606
+ file_path = Path(file_path)
607
+
608
+ if not file_path.exists():
609
+ self.logger.error(f"❌ File not found: {file_path}")
610
+ continue
611
+
612
+ # Desired name in SharePoint (rename)
613
+ target_name = (
614
+ destination_filenames[idx] if destination_filenames else file_path.name
615
+ )
616
+
617
+ try:
618
+ file_size = file_path.stat().st_size
619
+ self.logger.notice(
620
+ f"Uploading {file_path.name} → {target_name} "
621
+ f"to '{target_folder}' ({file_size:,} bytes)"
622
+ )
623
+
624
+ if file_size <= self.small_file_threshold:
625
+ # Small file upload
626
+ result = await self._upload_small_file(
627
+ drive_info.id,
628
+ folder_info.id,
629
+ file_path,
630
+ target_name,
631
+ )
632
+ else:
633
+ # Large file upload
634
+ upload_session = await self._create_upload_session(
635
+ drive_info.id,
636
+ folder_info.id,
637
+ target_name
638
+ )
639
+ result = await self._upload_large_file(upload_session, file_path)
640
+
641
+ self.logger.info(f"Uploaded successfully: {result.name}")
642
+ # Build server-relative path including subfolders and renamed file
643
+ server_relative_path = await self._build_full_path(
644
+ drive_info.id, folder_info.id, target_name
645
+ )
646
+ if hasattr(result, 'web_url') and result.web_url:
647
+ self.logger.info(f"SharePoint URL: {result.web_url}")
648
+
649
+ # Backwards compatibility format
650
+ results.append({
651
+ "filename": {
652
+ "name": result.name,
653
+ "size": getattr(result, 'size', file_size),
654
+ "web_url": getattr(result, 'web_url', ''),
655
+ "serverRelativeUrl": f"/{server_relative_path}",
656
+ }
657
+ })
658
+
659
+ except Exception as e:
660
+ self.logger.error(f"Upload failed for {target_name}: {e}")
661
+ raise RuntimeError(f"Upload failed for {target_name}: {e}") from e
662
+
663
+ return results
664
+
665
+ async def test_permissions(self) -> Dict[str, Any]:
666
+ """
667
+ Test SharePoint permissions using Microsoft Graph API.
668
+
669
+ This replaces the old office365-rest-python-client permission test.
670
+ """
671
+ results = {
672
+ "site_access": False,
673
+ "folder_access": False,
674
+ "upload_access": False,
675
+ "errors": []
676
+ }
677
+
678
+ try:
679
+ # Test 1: Site access
680
+ site_info = await self._resolve_site()
681
+ results["site_access"] = True
682
+ self.logger.info(f"Site access: {site_info.display_name}")
683
+
684
+ # Test 2: Drive access
685
+ drive_info = await self._resolve_drive()
686
+ results["folder_access"] = True
687
+ self.logger.info(f"Drive access: {drive_info.name}")
688
+
689
+ # Test 3: Folder creation (upload capability test)
690
+ test_folder = await self._ensure_folder("test-folder-permissions", create=True)
691
+ results["upload_access"] = True
692
+ self.logger.info("Upload permissions confirmed")
693
+
694
+ # Clean up test folder
695
+ with contextlib.suppress(Exception):
696
+ await self.graph_client.drives.by_drive_id(
697
+ drive_info.id
698
+ ).items.by_drive_item_id(test_folder.id).delete()
699
+ self.logger.info("Test folder cleaned up")
700
+
701
+ except Exception as e:
702
+ results["errors"].append(str(e))
703
+ self.logger.error(f"Permission test failed: {e}")
704
+
705
+ return results
706
+
707
+ async def upload_folder(
708
+ self,
709
+ local_folder: PurePath,
710
+ destination: str = None,
711
+ destination_filenames: Optional[List[str]] = None,
712
+ ):
713
+ """
714
+ Upload an entire folder to SharePoint using Microsoft Graph API.
715
+
716
+ Args:
717
+ local_folder: Local folder path to upload
718
+ sharepoint_folder: SharePoint destination folder (optional)
719
+
720
+ Returns:
721
+ List of upload results
722
+ """
723
+ try:
724
+ local_path = Path(local_folder)
725
+ if not local_path.exists() or not local_path.is_dir():
726
+ raise FileNotFoundError(
727
+ f"Local folder does not exist or is not a directory: {local_folder}"
728
+ )
729
+
730
+ # Get all files in the folder recursively
731
+ all_files = []
732
+ all_files.extend(
733
+ file_path
734
+ for file_path in local_path.rglob("*")
735
+ if file_path.is_file()
736
+ )
737
+
738
+ if not all_files:
739
+ self.logger.warning(
740
+ f"No files found in folder: {local_folder}"
741
+ )
742
+ return []
743
+
744
+ self.logger.debug(
745
+ f"Uploading folder with {len(all_files)} files from {local_folder}"
746
+ )
747
+
748
+ # Use the existing upload_files method for each file
749
+ results = []
750
+ target_folder = destination or getattr(self, 'directory', 'Shared Documents')
751
+
752
+ # Group files by their relative directory structure
753
+ for idx, file_path in enumerate(all_files):
754
+ # Calculate relative path from the source folder
755
+ relative_path = file_path.relative_to(local_path)
756
+
757
+ # If file is in a subdirectory, include that in the SharePoint path
758
+ if relative_path.parent != Path('.'):
759
+ file_target_folder = f"{target_folder}/{relative_path.parent}".replace("\\", "/")
760
+ else:
761
+ file_target_folder = target_folder
762
+
763
+ try:
764
+ self.logger.debug(
765
+ f"Uploading {relative_path} to {file_target_folder}"
766
+ )
767
+
768
+ # Upload single file to the appropriate folder
769
+ file_result = await self.upload_files(
770
+ filenames=[file_path],
771
+ destination=file_target_folder,
772
+ destination_filenames=[destination_filenames[idx]] if destination_filenames else None
773
+ )
774
+ results.extend(file_result)
775
+
776
+ except Exception as e:
777
+ self.logger.error(f"Failed to upload {relative_path}: {e}")
778
+ # Continue with other files even if one fails
779
+ continue
780
+
781
+ self.logger.info(
782
+ f"Folder upload completed. {len(results)} files uploaded successfully."
783
+ )
784
+ return results
785
+
786
+ except Exception as e:
787
+ self.logger.error(f"Folder upload failed: {e}")
788
+ raise RuntimeError(
789
+ f"Folder upload failed: {e}"
790
+ ) from e
791
+
792
+ async def create_subscription(
793
+ self,
794
+ library_id: str,
795
+ webhook_url: str,
796
+ client_state: str = "secret_string",
797
+ expiration_days: int = 1
798
+ ) -> dict:
799
+ """Create webhook subscription using Graph API."""
800
+ try:
801
+ # Set up expiration for the subscription (max 180 days)
802
+ expiration_date = datetime.now(timezone.utc) + timedelta(days=expiration_days)
803
+ expiration_datetime = f"{expiration_date.isoformat()}Z"
804
+
805
+ # Use Graph SDK for subscription creation
806
+ subscription = Subscription()
807
+ subscription.change_type = "created,updated,deleted"
808
+ subscription.notification_url = webhook_url
809
+ subscription.resource = f"sites/{self.tenant}/lists/{library_id}"
810
+ subscription.expiration_date_time = expiration_datetime
811
+ subscription.client_state = client_state
812
+
813
+ # Create subscription using Graph SDK
814
+ created_subscription = await self.graph_client.subscriptions.post(subscription)
815
+
816
+ self.logger.info("✅ Subscription created successfully")
817
+ return {
818
+ "id": created_subscription.id,
819
+ "resource": created_subscription.resource,
820
+ "notification_url": created_subscription.notification_url,
821
+ "expiration_date_time": created_subscription.expiration_date_time
822
+ }
823
+
824
+ except Exception as e:
825
+ self.logger.error(f"❌ Failed to create subscription: {e}")
826
+ raise RuntimeError(f"Failed to create subscription: {e}") from e
827
+
828
+ async def get_library_id(self, absolute_url: str) -> str:
829
+ """Get library ID using Graph API."""
830
+ try:
831
+ # Parse the absolute URL to get site and document library path
832
+ parsed_url = urlparse(absolute_url)
833
+ path_parts = parsed_url.path.strip("/").split("/")
834
+
835
+ # Format the site name and library path
836
+ site_name = path_parts[1] # e.g., 'sites/mysite'
837
+ library_name = "/".join(path_parts[2:]) # e.g., 'Documents'
838
+
839
+ # Use Graph SDK to get library info
840
+ site_identifier = f"{self.tenant}.sharepoint.com:/{site_name}"
841
+ site = await self.graph_client.sites.by_site_id(site_identifier).get()
842
+
843
+ # Get drives for the site
844
+ drives = await self.graph_client.sites.by_site_id(site.id).drives.get()
845
+
846
+ if drives and drives.value:
847
+ for drive in drives.value:
848
+ if library_name in drive.name or drive.name == "Documents":
849
+ self.logger.info(
850
+ f"📋 Library ID for {absolute_url} is {drive.id}"
851
+ )
852
+ return drive.id
853
+
854
+ raise RuntimeError("Library not found")
855
+
856
+ except Exception as e:
857
+ raise RuntimeError(f"Failed to retrieve library ID: {e}") from e
858
+
859
+ async def close(self):
860
+ """Clean up resources."""
861
+ await super().close()
862
+ self._site_info = None
863
+ self._drive_info = None
864
+ self._site_id = None
865
+ self._drive_id = None
866
+
867
+ def _pattern_is_api_safe(self, pattern: str) -> bool:
868
+ """
869
+ Return True if 'pattern' can be safely passed to Graph search (no wildcards/regex),
870
+ otherwise False (e.g., contains * ? [ ] { } ( ) ^ $ | \ ).
871
+ """
872
+ return not re.search(r'[*?\[\]\{\}\(\)\^\$|\\]', pattern or "")
873
+
874
+ def _in_dir(self, path_rel_to_drive: str, dir_rel_to_drive: str) -> bool:
875
+ p = (path_rel_to_drive or "").strip("/").lower()
876
+ d = (dir_rel_to_drive or "").strip("/").lower()
877
+ return p.startswith(d) if d else True
878
+
879
+ async def download_found_files(
880
+ self,
881
+ found: List[Dict[str, Any]],
882
+ ) -> List[Dict[str, str]]:
883
+ """
884
+ Download all items in 'found' (from file_search) into local self.directory.
885
+ Uses aiofiles for writing and httpx (downloadUrl) for streaming.
886
+ If self._filenames is provided and its length matches len(found),
887
+ files are renamed accordingly; otherwise warn and keep original names.
888
+
889
+ Returns: List[{"filename": <local_path>, "download_url": <url or "">}]
890
+ """
891
+ results: List[Dict[str, str]] = []
892
+
893
+ # Ensure local destination directory exists
894
+ dest_dir = Path(getattr(self, "directory", ".")).expanduser().resolve()
895
+ dest_dir.mkdir(parents=True, exist_ok=True)
896
+
897
+ # Handle desired names
898
+ desired_names = getattr(self, "_filenames", None)
899
+ if desired_names and len(desired_names) != len(found):
900
+ self.logger.warning(
901
+ f"⚠️ Matched files ({len(found)}) != self._filenames ({len(desired_names)}). "
902
+ f"Will keep original names."
903
+ )
904
+ desired_names = None
905
+
906
+ def _sanitize(name: str) -> str:
907
+ # make it a safe filename for local FS
908
+ name = Path(name).name # strip any path
909
+ return re.sub(r'[\\/:*?"<>|]+', "_", name).strip()
910
+
911
+ # Resolve drive once for content fallback (if needed)
912
+ drive_info = await self._resolve_drive()
913
+ drive_id = drive_info.id
914
+
915
+ async with httpx.AsyncClient(follow_redirects=True, timeout=None) as client:
916
+ for idx, entry in enumerate(found):
917
+ item = entry.get("item")
918
+ if not item or not getattr(item, "name", None):
919
+ self.logger.warning("Skipping entry without a valid drive item")
920
+ continue
921
+
922
+ # Determine target local filename
923
+ target_name = _sanitize(desired_names[idx]) if desired_names else _sanitize(item.name)
924
+ dest_path = dest_dir / target_name
925
+
926
+ # Get pre-authenticated download URL if available
927
+ download_url = ""
928
+ try:
929
+ add = getattr(item, "additional_data", {}) or {}
930
+ download_url = add.get("@microsoft.graph.downloadUrl", "") or ""
931
+ except Exception:
932
+ download_url = ""
933
+
934
+ self.logger.debug(
935
+ f"⬇️ Downloading {item.name} → {dest_path.name}"
936
+ )
937
+
938
+ try:
939
+ if download_url:
940
+ # Stream via downloadUrl
941
+ async with client.stream("GET", download_url) as resp:
942
+ resp.raise_for_status()
943
+ async with aiofiles.open(dest_path, "wb") as f:
944
+ async for chunk in resp.aiter_bytes(1 << 20): # 1 MiB
945
+ await f.write(chunk)
946
+ else:
947
+ # Fallback: GET /content via Graph (loads into memory)
948
+ data = await self.graph_client.drives.by_drive_id(drive_id)\
949
+ .items.by_drive_item_id(item.id).content.get()
950
+ async with aiofiles.open(dest_path, "wb") as f:
951
+ await f.write(data)
952
+
953
+ self.logger.debug(
954
+ f"✅ Saved: {dest_path}"
955
+ )
956
+ results.append(
957
+ {"filename": str(dest_path), "download_url": download_url}
958
+ )
959
+ except Exception as e:
960
+ self.logger.error(
961
+ f"❌ Download failed for {item.name}: {e}"
962
+ )
963
+ # Continue with the rest; do not raise to allow partial completion
964
+
965
+ return results
966
+
967
+ async def file_search(self) -> List[Dict[str, Any]]:
968
+ """
969
+ Search for files with Graph API (when safe) and recursive fallback starting at the target folder.
970
+ Logs every tested file during recursion. Does not raise on desired-name count mismatches.
971
+ """
972
+ destinations: List[Dict[str, Any]] = []
973
+
974
+ try:
975
+ for spec in getattr(self, "_srcfiles", []):
976
+ directory_raw: str = (spec.get("directory") or "").strip()
977
+ pattern: str = spec.get("pattern") or spec.get("filename") or ""
978
+ extension = (spec.get("extension") or "").strip()
979
+ wanted_ext: Optional[str] = extension.lower().lstrip(".") if extension else None
980
+
981
+ if not directory_raw:
982
+ raise RuntimeError("file_search: each spec must include a 'directory'")
983
+
984
+ # Parse the directory to extract library and path (same as file_lookup)
985
+ library_name, directory = self._parse_directory_path(directory_raw)
986
+
987
+ # Get the specific library
988
+ try:
989
+ drive_info = await self._resolve_drive(library_name)
990
+ drive_id = drive_info.id
991
+ self.logger.debug(f"Using library: {drive_info.name} (ID: {drive_id})")
992
+ except Exception as e:
993
+ self.logger.error(f"Failed to access library '{library_name}': {e}")
994
+ continue
995
+
996
+ found_files: List[Dict[str, Any]] = []
997
+
998
+ # Try API search only if pattern is simple/safe
999
+ if pattern and self._pattern_is_api_safe(pattern):
1000
+ # A conservative sanitizer for Graph search term
1001
+ clean_q = re.sub(r"[^A-Za-z0-9._-]+", " ", pattern).strip()
1002
+ if len(clean_q) >= 2:
1003
+ try:
1004
+ self.logger.debug(f"Attempting API search with term: '{clean_q}'")
1005
+ api_res = await self.graph_client.drives.by_drive_id(drive_id).search_with_q(clean_q).get() # noqa
1006
+ if api_res and api_res.value:
1007
+ self.logger.debug(f"API search returned {len(api_res.value)} result(s)")
1008
+ for item in api_res.value:
1009
+ if not getattr(item, "file", None) or not item.name:
1010
+ continue
1011
+ name = item.name
1012
+ # Extension check
1013
+ ext_ok = True
1014
+ if wanted_ext:
1015
+ ext_ok = name.lower().endswith(f".{wanted_ext}")
1016
+ # Pattern check (supports literal/regex/wildcard via existing helper)
1017
+ name_ok = self._matches_pattern(name, pattern)
1018
+ item_path = self._get_item_path_from_item(item) # drive-root relative
1019
+
1020
+ self.logger.info(
1021
+ f"🔎 [API] {item_path} "
1022
+ f"(ext: {'✓' if ext_ok else '×'}, name: {'✓' if name_ok else '×'})"
1023
+ )
1024
+
1025
+ if ext_ok and name_ok and self._in_dir(item_path, directory):
1026
+ found_files.append({
1027
+ "item": item,
1028
+ "path": item_path,
1029
+ "server_relative_url": item_path
1030
+ })
1031
+ self.logger.info(f"✅ [API] Match: {item_path}")
1032
+ else:
1033
+ self.logger.debug("API search returned no results")
1034
+ except Exception as api_err:
1035
+ self.logger.warning(f"API search failed: {api_err}")
1036
+
1037
+ # Fallback to recursive (or if API found nothing)
1038
+ if not found_files:
1039
+ self.logger.info(
1040
+ f"No API results or pattern not API-safe. Recursive in '{directory or '/'}'..."
1041
+ )
1042
+ found_files = await self._search_pattern_recursive(
1043
+ drive_id=drive_id,
1044
+ directory=directory,
1045
+ pattern=pattern,
1046
+ wanted_ext=wanted_ext
1047
+ )
1048
+
1049
+ if not found_files:
1050
+ err = f"No files found for '{pattern or '<empty>'}' in '{directory_raw}'"
1051
+ self.logger.error(err)
1052
+ raise RuntimeError(err)
1053
+
1054
+ destinations.extend(found_files)
1055
+ self.logger.info(
1056
+ f"=== Found {len(found_files)} file(s) for '{pattern or '<empty>'}' ==="
1057
+ )
1058
+
1059
+ # Warn (do not fail) on name-count mismatch
1060
+ desired = getattr(self, "_filenames", None)
1061
+ if desired and len(desired) != len(destinations):
1062
+ self.logger.warning(
1063
+ f"⚠️ Matched files ({len(destinations)}) != self._filenames ({len(desired)}). "
1064
+ f"Downloads will keep original names where needed."
1065
+ )
1066
+
1067
+ return destinations
1068
+
1069
+ except Exception as e:
1070
+ self.logger.error(f"File search failed: {e}")
1071
+ raise RuntimeError(f"File search failed: {e}") from e
1072
+
1073
+ def _matches_pattern(self, filename: str, pattern: str) -> bool:
1074
+ """
1075
+ Check if filename matches the search pattern with detailed logging.
1076
+ """
1077
+ try:
1078
+ # Convert shell-style wildcards to regex if needed
1079
+ if '*' in pattern and '.*' not in pattern:
1080
+ # Simple wildcard pattern like "mkdocs*.yml"
1081
+ regex_pattern = pattern.replace("*", ".*").replace("?", ".")
1082
+ # Add anchors for exact matching
1083
+ regex_pattern = f"^{regex_pattern}$"
1084
+ elif '.*' in pattern or '[' in pattern or '^' in pattern or '$' in pattern:
1085
+ # Already a regex pattern
1086
+ regex_pattern = pattern
1087
+ else:
1088
+ # Exact match
1089
+ regex_pattern = f"^{re.escape(pattern)}$"
1090
+
1091
+ # self.logger.debug(f"Pattern matching: '{filename}' against regex '{regex_pattern}'")
1092
+ return bool(re.match(regex_pattern, filename, re.IGNORECASE))
1093
+
1094
+ except re.error as e:
1095
+ self.logger.warning(
1096
+ f"Regex pattern '{pattern}' failed: {e}, falling back to substring match"
1097
+ )
1098
+ # Remove regex characters and do substring match
1099
+ clean_pattern = re.sub(r'[.*+?^${}()|[\]\\]', '', pattern)
1100
+ result = clean_pattern.lower() in filename.lower()
1101
+ self.logger.debug(f"Substring match result: {result}")
1102
+ return result
1103
+
1104
+ def _get_item_path_from_item(self, item) -> str:
1105
+ """
1106
+ Extract the full path from a DriveItem object.
1107
+ """
1108
+ try:
1109
+ # Try to get path from parent_reference
1110
+ if hasattr(item, 'parent_reference') and item.parent_reference and item.parent_reference.path:
1111
+ parent_path = item.parent_reference.path or ""
1112
+
1113
+ # Clean up the parent path
1114
+ if parent_path.startswith("/drive/root:"):
1115
+ parent_path = parent_path[12:]
1116
+ elif parent_path.startswith("/drives/") and "/root:" in parent_path:
1117
+ parent_path = parent_path.split("/root:")[-1]
1118
+
1119
+ # Build the full path
1120
+ if parent_path:
1121
+ full_path = f"{parent_path}/{item.name}".replace("//", "/").lstrip("/")
1122
+ else:
1123
+ full_path = item.name or ""
1124
+
1125
+ return full_path
1126
+
1127
+ # Fallback: try to get path from web_url if available
1128
+ if hasattr(item, 'web_url') and item.web_url:
1129
+ with contextlib.suppress(Exception):
1130
+ web_url = item.web_url
1131
+
1132
+ # Look for the document library part in the URL
1133
+ if "/Shared%20Documents/" in web_url:
1134
+ path_part = web_url.split("/Shared%20Documents/", 1)[1]
1135
+ return unquote(path_part)
1136
+ elif "/Shared Documents/" in web_url:
1137
+ path_part = web_url.split("/Shared Documents/", 1)[1]
1138
+ return unquote(path_part)
1139
+
1140
+ # Final fallback: just return the filename
1141
+ return item.name or ""
1142
+
1143
+ except Exception as e:
1144
+ self.logger.debug(f"Error extracting path from item: {e}")
1145
+ return item.name or ""
1146
+
1147
+ def _is_in_target_directory(self, file_path: str, target_directory: str) -> bool:
1148
+ """
1149
+ Check if a file path is within the target directory.
1150
+ """
1151
+ # Normalize paths
1152
+ file_path = file_path.strip().strip("/")
1153
+ target_directory = target_directory.strip().strip("/")
1154
+
1155
+ # Remove "Shared Documents" prefix if present in target but not in file path
1156
+ if target_directory.startswith("Shared Documents/"):
1157
+ target_dir_without_prefix = target_directory[17:] # Remove "Shared Documents/"
1158
+ if target_dir_without_prefix in file_path:
1159
+ return True
1160
+
1161
+ # Direct directory match
1162
+ if target_directory in file_path:
1163
+ return True
1164
+
1165
+ # Check if file is in subdirectory
1166
+ file_dir = "/".join(file_path.split("/")[:-1]) # Remove filename
1167
+ return file_dir == target_directory or file_dir.endswith(f"/{target_directory}")
1168
+
1169
+ async def _search_pattern_recursive(
1170
+ self,
1171
+ drive_id: str,
1172
+ directory: str,
1173
+ pattern: str,
1174
+ wanted_ext: Optional[str] = None
1175
+ ) -> List[Dict[str, Any]]:
1176
+ """
1177
+ Depth-first search starting at the *target folder*.
1178
+ Logs EVERY file tested with ✓/× for extension and name.
1179
+ """
1180
+ matches: List[Dict[str, Any]] = []
1181
+ counter = {"tested": 0}
1182
+
1183
+ # Resolve the starting folder using colon-path id (no item_with_path)
1184
+ start_path = (directory or "").strip().strip("/")
1185
+ if start_path:
1186
+ try:
1187
+ start_folder = await self.graph_client.drives.by_drive_id(drive_id)\
1188
+ .items.by_drive_item_id(f"root:/{start_path}:/").get()
1189
+ self.logger.notice(
1190
+ f"📂 Recursive start: '{start_path}' (ID: {start_folder.id})"
1191
+ )
1192
+ except Exception as e:
1193
+ # If normalization is correct and the folder truly exists (as uploads did),
1194
+ # this should not hit. Log and re-raise to avoid silently walking the root.
1195
+ self.logger.error(f"❌ Start folder not found for '{start_path}': {e}")
1196
+ raise
1197
+ else:
1198
+ start_folder = await self.graph_client.drives.by_drive_id(drive_id).root.get()
1199
+ self.logger.notice("📂 Recursive start: drive root")
1200
+
1201
+ matcher = self._matches_pattern # your existing helper
1202
+
1203
+ async def _dfs(folder_id: str, base_rel_path: str):
1204
+ children = await self.graph_client.drives.by_drive_id(drive_id)\
1205
+ .items.by_drive_item_id(folder_id).children.get()
1206
+ if not children or not children.value:
1207
+ return
1208
+
1209
+ for entry in children.value:
1210
+ if getattr(entry, "folder", None):
1211
+ sub_rel = f"{base_rel_path}/{entry.name}".strip("/")
1212
+ await _dfs(entry.id, sub_rel)
1213
+ continue
1214
+ if not getattr(entry, "file", None) or not entry.name:
1215
+ continue
1216
+
1217
+ name = entry.name.strip()
1218
+ # Extension check
1219
+ ext_ok = True
1220
+ if wanted_ext:
1221
+ ext_ok = name.lower().endswith("." + wanted_ext)
1222
+
1223
+ # Pattern check (supports literal/regex/wildcard)
1224
+ name_ok = matcher(name, pattern)
1225
+
1226
+ # Compute full path (drive-root relative) for logging/return
1227
+ full_path = await self._get_item_full_path(drive_id, entry.id)
1228
+
1229
+ counter["tested"] += 1
1230
+ # self.logger.debug(
1231
+ # f"🔎 [{counter['tested']}] {full_path} "
1232
+ # f"(ext: {'✓' if ext_ok else '×'}, name: {'✓' if name_ok else '×'})"
1233
+ # )
1234
+
1235
+ if ext_ok and name_ok:
1236
+ matches.append({
1237
+ "item": entry,
1238
+ "path": full_path,
1239
+ "server_relative_url": full_path
1240
+ })
1241
+ self.logger.info(f"✅ Match: {full_path}")
1242
+
1243
+ await _dfs(start_folder.id, start_path)
1244
+ return matches
1245
+
1246
+ async def _get_item_full_path(self, drive_id: str, item_id: str) -> str:
1247
+ """
1248
+ Get the full server relative path of an item.
1249
+ """
1250
+ try:
1251
+ item = await self.graph_client.drives.by_drive_id(drive_id).items.by_drive_item_id(item_id).get()
1252
+
1253
+ if hasattr(item, 'parent_reference') and item.parent_reference:
1254
+ parent_path = item.parent_reference.path or ""
1255
+ if parent_path.startswith("/drive/root:"):
1256
+ parent_path = parent_path[12:]
1257
+
1258
+ full_path = f"{parent_path}/{item.name}".replace("//", "/").lstrip("/")
1259
+ return full_path
1260
+ else:
1261
+ return item.name or ""
1262
+
1263
+ except Exception as e:
1264
+ self.logger.warning(f"Could not get full path for item {item_id}: {e}")
1265
+ return ""
1266
+
1267
+ async def _resolve_existing_directory(
1268
+ self,
1269
+ drive_id: str,
1270
+ directory_raw: str,
1271
+ drive_info,
1272
+ ):
1273
+ """
1274
+ Resolve an existing folder under the drive.
1275
+ Tries the normalized directory; if not found, drops the first path segment and retries.
1276
+ Returns (folder_item, used_directory_relative_to_drive_root).
1277
+ """
1278
+ directory_raw = (directory_raw or "").strip()
1279
+ dir_norm = self._normalize_directory(directory_raw, drive_info)
1280
+
1281
+ candidates = []
1282
+ if dir_norm:
1283
+ candidates.append(dir_norm)
1284
+ if "/" in dir_norm:
1285
+ candidates.append(dir_norm.split("/", 1)[1])
1286
+ else:
1287
+ candidates.append("")
1288
+
1289
+ last_err = None
1290
+ for cand in candidates:
1291
+ colon_id = f"root:/{cand}:/".replace("//", "/") if cand else "root:/"
1292
+ try:
1293
+ folder = await self.graph_client.drives.by_drive_id(drive_id)\
1294
+ .items.by_drive_item_id(colon_id).get()
1295
+ if getattr(folder, "folder", None):
1296
+ self.logger.debug(
1297
+ f"Resolved directory '{directory_raw}' -> '{cand or '/'}' (ID: {folder.id})"
1298
+ )
1299
+ return folder, cand
1300
+ except Exception as e:
1301
+ last_err = e
1302
+ self.logger.debug(
1303
+ f"Directory candidate not found '{cand or '/'}': {e}"
1304
+ )
1305
+
1306
+ raise RuntimeError(
1307
+ f"Start directory not found: '{directory_raw}' (normalized '{dir_norm}')"
1308
+ ) from last_err
1309
+
1310
+ async def file_lookup(
1311
+ self,
1312
+ files: Optional[List[Dict[str, str]]] = None,
1313
+ ) -> List[Dict[str, Any]]:
1314
+ """
1315
+ Resolve exact files (no search) into 'destinations' items.
1316
+ Robustly handles extra leading path segments like 'TROC/...'
1317
+ and resolves the file under the resolved parent folder ID.
1318
+ """
1319
+ specs = files if files is not None else getattr(self, "_srcfiles", [])
1320
+ if not specs:
1321
+ raise RuntimeError("file_lookup: no files provided and self._srcfiles is empty")
1322
+
1323
+ drive_info = await self._resolve_drive()
1324
+ drive_id = drive_info.id
1325
+
1326
+ destinations: List[Dict[str, Any]] = []
1327
+
1328
+ for spec in specs:
1329
+ directory_raw: str = (spec.get("directory") or "").strip()
1330
+ filename_raw: str = (spec.get("filename") or "").strip()
1331
+ if not filename_raw:
1332
+ self.logger.warning("file_lookup: skipping entry without 'filename'")
1333
+ continue
1334
+
1335
+ # Parse the directory to extract library and path
1336
+ library_name, path_within_library = self._parse_directory_path(directory_raw)
1337
+
1338
+ self.logger.notice(
1339
+ f"Looking up file: '{filename_raw}' in library '{library_name}', path '{path_within_library}'"
1340
+ )
1341
+
1342
+ # Get the specific library
1343
+ try:
1344
+ drive_info = await self._resolve_drive(library_name)
1345
+ drive_id = drive_info.id
1346
+ except Exception as e:
1347
+ self.logger.error(f"Failed to access library '{library_name}': {e}")
1348
+ continue
1349
+
1350
+ # Build the Microsoft Graph API path within the library
1351
+ if path_within_library:
1352
+ colon_id = f"root:/{path_within_library}/{filename_raw}:"
1353
+ else:
1354
+ colon_id = f"root:/{filename_raw}:"
1355
+
1356
+ self.logger.debug(
1357
+ f"Using Graph API item ID: '{colon_id}' in library '{library_name}'"
1358
+ )
1359
+ # First try direct by parent folder ID + :/filename:/ (avoids full-path encoding pitfalls)
1360
+ try:
1361
+ file_item = await self.graph_client.drives.by_drive_id(drive_id)\
1362
+ .items.by_drive_item_id(colon_id).get()
1363
+ except Exception as e:
1364
+ self.logger.error(
1365
+ f"Direct lookup failed for '{filename_raw}' in library '{library_name}', path '{path_within_library}', error: {e}"
1366
+ )
1367
+
1368
+ # Fallback: list children and match by exact name
1369
+ try:
1370
+ if path_within_library:
1371
+ # Get the directory first
1372
+ dir_colon_id = f"root:/{path_within_library}:"
1373
+ folder = await self.graph_client.drives.by_drive_id(drive_id)\
1374
+ .items.by_drive_item_id(dir_colon_id).get()
1375
+ else:
1376
+ # Use root of this library
1377
+ folder = await self.graph_client.drives.by_drive_id(drive_id).root.get()
1378
+
1379
+ # List directory contents
1380
+ children = await self.graph_client.drives.by_drive_id(drive_id)\
1381
+ .items.by_drive_item_id(folder.id).children.get()
1382
+
1383
+ found_item = None
1384
+ if children and children.value:
1385
+ self.logger.debug(
1386
+ f"Directory contains {len(children.value)} items:"
1387
+ )
1388
+ for child in children.value:
1389
+ self.logger.debug(f" - {child.name} ({'file' if child.file else 'folder'})")
1390
+ if child.file and child.name and child.name == filename_raw:
1391
+ found_item = child
1392
+ break
1393
+ if found_item:
1394
+ item = found_item
1395
+ self.logger.info(f"Found via fallback search: {filename_raw}")
1396
+ else:
1397
+ self.logger.error(f"File '{filename_raw}' not found in library '{library_name}', path '{path_within_library or 'root'}'")
1398
+ continue
1399
+ except Exception as e2:
1400
+ self.logger.error(f"Fallback search failed: {e2}")
1401
+ continue
1402
+
1403
+ # Build drive-root-relative path for return/logging
1404
+ full_path = await self._get_item_full_path(drive_id, file_item.id)
1405
+ destinations.append({
1406
+ "item": file_item,
1407
+ "path": full_path,
1408
+ "server_relative_url": full_path
1409
+ })
1410
+ self.logger.info(f"✅ Found: {full_path}")
1411
+
1412
+ return destinations or None
1413
+
1414
+ async def debug_root_structure(self):
1415
+ """Quick debug to see what's actually at the root of this SharePoint site."""
1416
+ try:
1417
+ drive_info = await self._resolve_drive()
1418
+ drive_id = drive_info.id
1419
+
1420
+ # Get root folder first, then its children
1421
+ root = await self.graph_client.drives.by_drive_id(drive_id).root.get()
1422
+ children = await self.graph_client.drives.by_drive_id(drive_id).items.by_drive_item_id(root.id).children.get()
1423
+
1424
+ if children and children.value:
1425
+ self.logger.notice("=== ROOT STRUCTURE ===")
1426
+ for child in children.value:
1427
+ if child.folder:
1428
+ self.logger.notice(f"📁 {child.name}/")
1429
+ else:
1430
+ self.logger.notice(f"📄 {child.name}")
1431
+ else:
1432
+ self.logger.error("No items found at root")
1433
+
1434
+ except Exception as e:
1435
+ self.logger.error(f"Debug failed: {e}")