@vodailoc/kilo-kit-mcp 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (582) hide show
  1. package/.mcp/kilo-kit.codex-windows.toml +5 -0
  2. package/LICENSE +190 -190
  3. package/QUICKSTART.md +265 -255
  4. package/README.md +321 -267
  5. package/mcp/README.md +64 -12
  6. package/mcp/dist/formatters.js +142 -1
  7. package/mcp/dist/orchestration-audit.js +20 -0
  8. package/mcp/dist/orchestration-memory.js +258 -0
  9. package/mcp/dist/orchestration-types.js +1 -0
  10. package/mcp/dist/orchestrator.js +222 -0
  11. package/mcp/dist/question-templates.js +249 -0
  12. package/mcp/dist/route-analytics.js +149 -0
  13. package/mcp/dist/router.js +75 -82
  14. package/mcp/dist/routing-policy-data.js +241 -0
  15. package/mcp/dist/routing-policy.js +145 -0
  16. package/mcp/dist/server.js +93 -4
  17. package/mcp/dist/smoke-env.js +18 -0
  18. package/mcp/dist/smoke.js +68 -1
  19. package/mcp/package.json +1 -2
  20. package/package.json +3 -2
  21. package/skills/README.md +647 -647
  22. package/skills/SKILLS_INDEX.md +139 -139
  23. package/skills/ai-media/ai-multimodal/.env.example +97 -97
  24. package/skills/ai-media/ai-multimodal/SKILL.md +357 -357
  25. package/skills/ai-media/ai-multimodal/references/audio-processing.md +373 -373
  26. package/skills/ai-media/ai-multimodal/references/image-generation.md +558 -558
  27. package/skills/ai-media/ai-multimodal/references/video-analysis.md +502 -502
  28. package/skills/ai-media/ai-multimodal/references/vision-understanding.md +483 -483
  29. package/skills/ai-media/ai-multimodal/scripts/document_converter.py +395 -395
  30. package/skills/ai-media/ai-multimodal/scripts/gemini_batch_process.py +480 -480
  31. package/skills/ai-media/ai-multimodal/scripts/media_optimizer.py +506 -506
  32. package/skills/ai-media/ai-multimodal/scripts/requirements.txt +26 -26
  33. package/skills/ai-media/ai-multimodal/scripts/tests/requirements.txt +20 -20
  34. package/skills/ai-media/ai-multimodal/scripts/tests/test_document_converter.py +299 -299
  35. package/skills/ai-media/ai-multimodal/scripts/tests/test_gemini_batch_process.py +362 -362
  36. package/skills/ai-media/ai-multimodal/scripts/tests/test_media_optimizer.py +373 -373
  37. package/skills/ai-media/media-processing/SKILL.md +358 -358
  38. package/skills/ai-media/media-processing/references/ffmpeg-encoding.md +358 -358
  39. package/skills/ai-media/media-processing/references/ffmpeg-filters.md +503 -503
  40. package/skills/ai-media/media-processing/references/ffmpeg-streaming.md +403 -403
  41. package/skills/ai-media/media-processing/references/format-compatibility.md +375 -375
  42. package/skills/ai-media/media-processing/references/imagemagick-batch.md +612 -612
  43. package/skills/ai-media/media-processing/references/imagemagick-editing.md +623 -623
  44. package/skills/ai-media/media-processing/scripts/batch_resize.py +342 -342
  45. package/skills/ai-media/media-processing/scripts/media_convert.py +311 -311
  46. package/skills/ai-media/media-processing/scripts/requirements.txt +24 -24
  47. package/skills/ai-media/media-processing/scripts/tests/requirements.txt +2 -2
  48. package/skills/ai-media/media-processing/scripts/tests/test_batch_resize.py +372 -372
  49. package/skills/ai-media/media-processing/scripts/tests/test_media_convert.py +259 -259
  50. package/skills/ai-media/media-processing/scripts/tests/test_video_optimize.py +397 -397
  51. package/skills/ai-media/media-processing/scripts/video_optimize.py +414 -414
  52. package/skills/ai-media/screenshot/LICENSE.txt +201 -201
  53. package/skills/ai-media/screenshot/SKILL.md +267 -267
  54. package/skills/ai-media/screenshot/agents/openai.yaml +6 -6
  55. package/skills/ai-media/screenshot/assets/screenshot-small.svg +5 -5
  56. package/skills/ai-media/screenshot/scripts/ensure_macos_permissions.sh +54 -54
  57. package/skills/ai-media/screenshot/scripts/macos_display_info.swift +22 -22
  58. package/skills/ai-media/screenshot/scripts/macos_permissions.swift +40 -40
  59. package/skills/ai-media/screenshot/scripts/macos_window_info.swift +126 -126
  60. package/skills/ai-media/screenshot/scripts/take_screenshot.ps1 +163 -163
  61. package/skills/ai-media/screenshot/scripts/take_screenshot.py +585 -585
  62. package/skills/ai-media/sora/LICENSE.txt +201 -201
  63. package/skills/ai-media/sora/SKILL.md +153 -153
  64. package/skills/ai-media/sora/agents/openai.yaml +6 -6
  65. package/skills/ai-media/sora/assets/sora-small.svg +4 -4
  66. package/skills/ai-media/sora/references/cinematic-shots.md +53 -53
  67. package/skills/ai-media/sora/references/cli.md +248 -248
  68. package/skills/ai-media/sora/references/codex-network.md +28 -28
  69. package/skills/ai-media/sora/references/prompting.md +137 -137
  70. package/skills/ai-media/sora/references/sample-prompts.md +95 -95
  71. package/skills/ai-media/sora/references/social-ads.md +42 -42
  72. package/skills/ai-media/sora/references/troubleshooting.md +58 -58
  73. package/skills/ai-media/sora/references/video-api.md +45 -45
  74. package/skills/ai-media/sora/scripts/sora.py +970 -970
  75. package/skills/design/aesthetic/SKILL.md +121 -121
  76. package/skills/design/aesthetic/assets/design-guideline-template.md +163 -163
  77. package/skills/design/aesthetic/assets/design-story-template.md +135 -135
  78. package/skills/design/aesthetic/references/design-principles.md +62 -62
  79. package/skills/design/aesthetic/references/design-resources.md +75 -75
  80. package/skills/design/aesthetic/references/micro-interactions.md +53 -53
  81. package/skills/design/aesthetic/references/storytelling-design.md +50 -50
  82. package/skills/design/figma/LICENSE.txt +202 -202
  83. package/skills/design/figma/SKILL.md +42 -42
  84. package/skills/design/figma/agents/openai.yaml +14 -14
  85. package/skills/design/figma/assets/figma-small.svg +3 -3
  86. package/skills/design/figma/assets/icon.svg +28 -28
  87. package/skills/design/figma/references/figma-mcp-config.md +35 -35
  88. package/skills/design/figma/references/figma-tools-and-prompts.md +34 -34
  89. package/skills/design/figma-implement-design/LICENSE.txt +202 -202
  90. package/skills/design/figma-implement-design/SKILL.md +264 -264
  91. package/skills/design/figma-implement-design/agents/openai.yaml +14 -14
  92. package/skills/design/figma-implement-design/assets/figma-small.svg +3 -3
  93. package/skills/design/figma-implement-design/assets/icon.svg +28 -28
  94. package/skills/design/frontend-design/SKILL.md +41 -41
  95. package/skills/design/frontend-design/references/animejs.md +395 -395
  96. package/skills/design/ui-styling/LICENSE.txt +201 -201
  97. package/skills/design/ui-styling/SKILL.md +321 -321
  98. package/skills/design/ui-styling/canvas-fonts/ArsenalSC-OFL.txt +93 -93
  99. package/skills/design/ui-styling/canvas-fonts/BigShoulders-OFL.txt +93 -93
  100. package/skills/design/ui-styling/canvas-fonts/Boldonse-OFL.txt +93 -93
  101. package/skills/design/ui-styling/canvas-fonts/BricolageGrotesque-OFL.txt +93 -93
  102. package/skills/design/ui-styling/canvas-fonts/CrimsonPro-OFL.txt +93 -93
  103. package/skills/design/ui-styling/canvas-fonts/DMMono-OFL.txt +93 -93
  104. package/skills/design/ui-styling/canvas-fonts/EricaOne-OFL.txt +94 -94
  105. package/skills/design/ui-styling/canvas-fonts/GeistMono-OFL.txt +93 -93
  106. package/skills/design/ui-styling/canvas-fonts/Gloock-OFL.txt +93 -93
  107. package/skills/design/ui-styling/canvas-fonts/IBMPlexMono-OFL.txt +93 -93
  108. package/skills/design/ui-styling/canvas-fonts/InstrumentSans-OFL.txt +93 -93
  109. package/skills/design/ui-styling/canvas-fonts/Italiana-OFL.txt +93 -93
  110. package/skills/design/ui-styling/canvas-fonts/JetBrainsMono-OFL.txt +93 -93
  111. package/skills/design/ui-styling/canvas-fonts/Jura-OFL.txt +93 -93
  112. package/skills/design/ui-styling/canvas-fonts/LibreBaskerville-OFL.txt +93 -93
  113. package/skills/design/ui-styling/canvas-fonts/Lora-OFL.txt +93 -93
  114. package/skills/design/ui-styling/canvas-fonts/NationalPark-OFL.txt +93 -93
  115. package/skills/design/ui-styling/canvas-fonts/NothingYouCouldDo-OFL.txt +93 -93
  116. package/skills/design/ui-styling/canvas-fonts/Outfit-OFL.txt +93 -93
  117. package/skills/design/ui-styling/canvas-fonts/PixelifySans-OFL.txt +93 -93
  118. package/skills/design/ui-styling/canvas-fonts/PoiretOne-OFL.txt +93 -93
  119. package/skills/design/ui-styling/canvas-fonts/RedHatMono-OFL.txt +93 -93
  120. package/skills/design/ui-styling/canvas-fonts/Silkscreen-OFL.txt +93 -93
  121. package/skills/design/ui-styling/canvas-fonts/SmoochSans-OFL.txt +93 -93
  122. package/skills/design/ui-styling/canvas-fonts/Tektur-OFL.txt +93 -93
  123. package/skills/design/ui-styling/canvas-fonts/WorkSans-OFL.txt +93 -93
  124. package/skills/design/ui-styling/canvas-fonts/YoungSerif-OFL.txt +93 -93
  125. package/skills/design/ui-styling/references/canvas-design-system.md +320 -320
  126. package/skills/design/ui-styling/references/shadcn-accessibility.md +471 -471
  127. package/skills/design/ui-styling/references/shadcn-components.md +424 -424
  128. package/skills/design/ui-styling/references/shadcn-theming.md +373 -373
  129. package/skills/design/ui-styling/references/tailwind-customization.md +483 -483
  130. package/skills/design/ui-styling/references/tailwind-responsive.md +382 -382
  131. package/skills/design/ui-styling/references/tailwind-utilities.md +455 -455
  132. package/skills/design/ui-styling/scripts/requirements.txt +17 -17
  133. package/skills/design/ui-styling/scripts/shadcn_add.py +292 -292
  134. package/skills/design/ui-styling/scripts/tailwind_config_gen.py +456 -456
  135. package/skills/design/ui-styling/scripts/tests/requirements.txt +3 -3
  136. package/skills/design/ui-styling/scripts/tests/test_shadcn_add.py +266 -266
  137. package/skills/design/ui-styling/scripts/tests/test_tailwind_config_gen.py +336 -336
  138. package/skills/engineering/aspnet-core/LICENSE.txt +201 -201
  139. package/skills/engineering/aspnet-core/SKILL.md +61 -61
  140. package/skills/engineering/aspnet-core/agents/openai.yaml +5 -5
  141. package/skills/engineering/aspnet-core/references/_sections.md +40 -40
  142. package/skills/engineering/aspnet-core/references/apis-minimal-and-controllers.md +81 -81
  143. package/skills/engineering/aspnet-core/references/data-state-and-services.md +69 -69
  144. package/skills/engineering/aspnet-core/references/program-and-pipeline.md +103 -103
  145. package/skills/engineering/aspnet-core/references/realtime-grpc-and-background-work.md +58 -58
  146. package/skills/engineering/aspnet-core/references/security-and-identity.md +75 -75
  147. package/skills/engineering/aspnet-core/references/source-map.md +43 -43
  148. package/skills/engineering/aspnet-core/references/stack-selection.md +63 -63
  149. package/skills/engineering/aspnet-core/references/testing-performance-and-operations.md +92 -92
  150. package/skills/engineering/aspnet-core/references/ui-blazor.md +53 -53
  151. package/skills/engineering/aspnet-core/references/ui-mvc.md +56 -56
  152. package/skills/engineering/aspnet-core/references/ui-razor-pages.md +55 -55
  153. package/skills/engineering/aspnet-core/references/versioning-and-upgrades.md +51 -51
  154. package/skills/engineering/backend-development/SKILL.md +95 -95
  155. package/skills/engineering/backend-development/references/backend-api-design.md +495 -495
  156. package/skills/engineering/backend-development/references/backend-architecture.md +454 -454
  157. package/skills/engineering/backend-development/references/backend-authentication.md +338 -338
  158. package/skills/engineering/backend-development/references/backend-code-quality.md +659 -659
  159. package/skills/engineering/backend-development/references/backend-debugging.md +904 -904
  160. package/skills/engineering/backend-development/references/backend-devops.md +494 -494
  161. package/skills/engineering/backend-development/references/backend-mindset.md +387 -387
  162. package/skills/engineering/backend-development/references/backend-performance.md +397 -397
  163. package/skills/engineering/backend-development/references/backend-security.md +290 -290
  164. package/skills/engineering/backend-development/references/backend-technologies.md +256 -256
  165. package/skills/engineering/backend-development/references/backend-testing.md +429 -429
  166. package/skills/engineering/better-auth/SKILL.md +204 -204
  167. package/skills/engineering/better-auth/references/advanced-features.md +553 -553
  168. package/skills/engineering/better-auth/references/database-integration.md +577 -577
  169. package/skills/engineering/better-auth/references/email-password-auth.md +416 -416
  170. package/skills/engineering/better-auth/references/oauth-providers.md +430 -430
  171. package/skills/engineering/better-auth/scripts/better_auth_init.py +521 -521
  172. package/skills/engineering/better-auth/scripts/requirements.txt +15 -15
  173. package/skills/engineering/better-auth/scripts/tests/test_better_auth_init.py +421 -421
  174. package/skills/engineering/code-review/SKILL.md +140 -140
  175. package/skills/engineering/code-review/references/code-review-reception.md +208 -208
  176. package/skills/engineering/code-review/references/requesting-code-review.md +104 -104
  177. package/skills/engineering/code-review/references/verification-before-completion.md +138 -138
  178. package/skills/engineering/context-engineering/SKILL.md +86 -86
  179. package/skills/engineering/context-engineering/references/context-compression.md +84 -84
  180. package/skills/engineering/context-engineering/references/context-degradation.md +93 -93
  181. package/skills/engineering/context-engineering/references/context-fundamentals.md +75 -75
  182. package/skills/engineering/context-engineering/references/context-optimization.md +82 -82
  183. package/skills/engineering/context-engineering/references/evaluation.md +89 -89
  184. package/skills/engineering/context-engineering/references/memory-systems.md +88 -88
  185. package/skills/engineering/context-engineering/references/multi-agent-patterns.md +90 -90
  186. package/skills/engineering/context-engineering/references/project-development.md +97 -97
  187. package/skills/engineering/context-engineering/references/tool-design.md +86 -86
  188. package/skills/engineering/context-engineering/scripts/compression_evaluator.py +329 -329
  189. package/skills/engineering/context-engineering/scripts/context_analyzer.py +294 -294
  190. package/skills/engineering/databases/SKILL.md +232 -232
  191. package/skills/engineering/databases/references/mongodb-aggregation.md +447 -447
  192. package/skills/engineering/databases/references/mongodb-atlas.md +465 -465
  193. package/skills/engineering/databases/references/mongodb-crud.md +408 -408
  194. package/skills/engineering/databases/references/mongodb-indexing.md +442 -442
  195. package/skills/engineering/databases/references/postgresql-administration.md +594 -594
  196. package/skills/engineering/databases/references/postgresql-performance.md +527 -527
  197. package/skills/engineering/databases/references/postgresql-psql-cli.md +467 -467
  198. package/skills/engineering/databases/references/postgresql-queries.md +475 -475
  199. package/skills/engineering/databases/scripts/db_backup.py +502 -502
  200. package/skills/engineering/databases/scripts/db_migrate.py +414 -414
  201. package/skills/engineering/databases/scripts/db_performance_check.py +444 -444
  202. package/skills/engineering/databases/scripts/requirements.txt +20 -20
  203. package/skills/engineering/databases/scripts/tests/requirements.txt +4 -4
  204. package/skills/engineering/databases/scripts/tests/test_db_backup.py +340 -340
  205. package/skills/engineering/databases/scripts/tests/test_db_migrate.py +277 -277
  206. package/skills/engineering/databases/scripts/tests/test_db_performance_check.py +370 -370
  207. package/skills/engineering/diagnose/SKILL.md +117 -117
  208. package/skills/engineering/diagnose/scripts/hitl-loop.template.sh +41 -41
  209. package/skills/engineering/docs-seeker/SKILL.md +207 -207
  210. package/skills/engineering/docs-seeker/WORKFLOWS.md +505 -505
  211. package/skills/engineering/docs-seeker/references/best-practices.md +632 -632
  212. package/skills/engineering/docs-seeker/references/documentation-sources.md +461 -461
  213. package/skills/engineering/docs-seeker/references/error-handling.md +621 -621
  214. package/skills/engineering/docs-seeker/references/limitations.md +821 -821
  215. package/skills/engineering/docs-seeker/references/performance.md +574 -574
  216. package/skills/engineering/docs-seeker/references/tool-selection.md +262 -262
  217. package/skills/engineering/frontend-development/SKILL.md +398 -398
  218. package/skills/engineering/frontend-development/resources/common-patterns.md +330 -330
  219. package/skills/engineering/frontend-development/resources/complete-examples.md +871 -871
  220. package/skills/engineering/frontend-development/resources/component-patterns.md +501 -501
  221. package/skills/engineering/frontend-development/resources/data-fetching.md +766 -766
  222. package/skills/engineering/frontend-development/resources/file-organization.md +501 -501
  223. package/skills/engineering/frontend-development/resources/loading-and-error-states.md +500 -500
  224. package/skills/engineering/frontend-development/resources/performance.md +405 -405
  225. package/skills/engineering/frontend-development/resources/routing-guide.md +363 -363
  226. package/skills/engineering/frontend-development/resources/styling-guide.md +427 -427
  227. package/skills/engineering/frontend-development/resources/typescript-standards.md +417 -417
  228. package/skills/engineering/improve-codebase-architecture/DEEPENING.md +37 -37
  229. package/skills/engineering/improve-codebase-architecture/INTERFACE-DESIGN.md +44 -44
  230. package/skills/engineering/improve-codebase-architecture/LANGUAGE.md +53 -53
  231. package/skills/engineering/improve-codebase-architecture/SKILL.md +71 -71
  232. package/skills/engineering/openai-docs/LICENSE.txt +201 -201
  233. package/skills/engineering/openai-docs/SKILL.md +69 -69
  234. package/skills/engineering/openai-docs/agents/openai.yaml +14 -14
  235. package/skills/engineering/openai-docs/assets/openai-small.svg +3 -3
  236. package/skills/engineering/openai-docs/references/gpt-5p4-prompting-guide.md +433 -433
  237. package/skills/engineering/openai-docs/references/latest-model.md +35 -35
  238. package/skills/engineering/openai-docs/references/upgrading-to-gpt-5p4.md +164 -164
  239. package/skills/engineering/playwright/LICENSE.txt +201 -201
  240. package/skills/engineering/playwright/NOTICE.txt +14 -14
  241. package/skills/engineering/playwright/SKILL.md +147 -147
  242. package/skills/engineering/playwright/agents/openai.yaml +6 -6
  243. package/skills/engineering/playwright/assets/playwright-small.svg +3 -3
  244. package/skills/engineering/playwright/references/cli.md +116 -116
  245. package/skills/engineering/playwright/references/workflows.md +95 -95
  246. package/skills/engineering/playwright/scripts/playwright_cli.sh +25 -25
  247. package/skills/engineering/playwright-interactive/LICENSE.txt +201 -201
  248. package/skills/engineering/playwright-interactive/NOTICE.txt +13 -13
  249. package/skills/engineering/playwright-interactive/SKILL.md +689 -689
  250. package/skills/engineering/playwright-interactive/agents/openai.yaml +6 -6
  251. package/skills/engineering/playwright-interactive/assets/playwright-small.svg +3 -3
  252. package/skills/engineering/render-deploy/LICENSE.txt +201 -201
  253. package/skills/engineering/render-deploy/SKILL.md +479 -479
  254. package/skills/engineering/render-deploy/agents/openai.yaml +14 -14
  255. package/skills/engineering/render-deploy/assets/docker.yaml +62 -62
  256. package/skills/engineering/render-deploy/assets/go-api.yaml +35 -35
  257. package/skills/engineering/render-deploy/assets/nextjs-postgres.yaml +35 -35
  258. package/skills/engineering/render-deploy/assets/node-express.yaml +25 -25
  259. package/skills/engineering/render-deploy/assets/python-django.yaml +89 -89
  260. package/skills/engineering/render-deploy/assets/render-small.svg +3 -3
  261. package/skills/engineering/render-deploy/assets/static-site.yaml +54 -54
  262. package/skills/engineering/render-deploy/references/blueprint-spec.md +718 -718
  263. package/skills/engineering/render-deploy/references/codebase-analysis.md +49 -49
  264. package/skills/engineering/render-deploy/references/configuration-guide.md +603 -603
  265. package/skills/engineering/render-deploy/references/deployment-details.md +224 -224
  266. package/skills/engineering/render-deploy/references/direct-creation.md +113 -113
  267. package/skills/engineering/render-deploy/references/error-patterns.md +13 -13
  268. package/skills/engineering/render-deploy/references/post-deploy-checks.md +36 -36
  269. package/skills/engineering/render-deploy/references/runtimes.md +473 -473
  270. package/skills/engineering/render-deploy/references/service-types.md +450 -450
  271. package/skills/engineering/render-deploy/references/troubleshooting-basics.md +36 -36
  272. package/skills/engineering/repomix/SKILL.md +215 -215
  273. package/skills/engineering/repomix/references/configuration.md +211 -211
  274. package/skills/engineering/repomix/references/usage-patterns.md +232 -232
  275. package/skills/engineering/repomix/scripts/README.md +179 -179
  276. package/skills/engineering/repomix/scripts/repomix_batch.py +455 -455
  277. package/skills/engineering/repomix/scripts/repos.example.json +15 -15
  278. package/skills/engineering/repomix/scripts/requirements.txt +15 -15
  279. package/skills/engineering/repomix/scripts/tests/test_repomix_batch.py +531 -531
  280. package/skills/engineering/setup-matt-pocock-skills/SKILL.md +121 -121
  281. package/skills/engineering/setup-matt-pocock-skills/domain.md +51 -51
  282. package/skills/engineering/setup-matt-pocock-skills/issue-tracker-github.md +22 -22
  283. package/skills/engineering/setup-matt-pocock-skills/issue-tracker-gitlab.md +23 -23
  284. package/skills/engineering/setup-matt-pocock-skills/issue-tracker-local.md +19 -19
  285. package/skills/engineering/setup-matt-pocock-skills/triage-labels.md +15 -15
  286. package/skills/engineering/shopify/README.md +66 -66
  287. package/skills/engineering/shopify/SKILL.md +319 -319
  288. package/skills/engineering/shopify/references/app-development.md +470 -470
  289. package/skills/engineering/shopify/references/extensions.md +493 -493
  290. package/skills/engineering/shopify/references/themes.md +498 -498
  291. package/skills/engineering/shopify/scripts/requirements.txt +19 -19
  292. package/skills/engineering/shopify/scripts/shopify_init.py +423 -423
  293. package/skills/engineering/shopify/scripts/tests/test_shopify_init.py +385 -385
  294. package/skills/engineering/tdd/SKILL.md +109 -109
  295. package/skills/engineering/tdd/deep-modules.md +33 -33
  296. package/skills/engineering/tdd/interface-design.md +31 -31
  297. package/skills/engineering/tdd/mocking.md +59 -59
  298. package/skills/engineering/tdd/refactoring.md +10 -10
  299. package/skills/engineering/tdd/tests.md +61 -61
  300. package/skills/engineering/to-issues/SKILL.md +81 -81
  301. package/skills/engineering/to-prd/SKILL.md +74 -74
  302. package/skills/engineering/triage/AGENT-BRIEF.md +168 -168
  303. package/skills/engineering/triage/OUT-OF-SCOPE.md +101 -101
  304. package/skills/engineering/triage/SKILL.md +103 -103
  305. package/skills/engineering/web-frameworks/SKILL.md +324 -324
  306. package/skills/engineering/web-frameworks/references/nextjs-app-router.md +465 -465
  307. package/skills/engineering/web-frameworks/references/nextjs-data-fetching.md +459 -459
  308. package/skills/engineering/web-frameworks/references/nextjs-optimization.md +511 -511
  309. package/skills/engineering/web-frameworks/references/nextjs-server-components.md +495 -495
  310. package/skills/engineering/web-frameworks/references/remix-icon-integration.md +603 -603
  311. package/skills/engineering/web-frameworks/references/turborepo-caching.md +551 -551
  312. package/skills/engineering/web-frameworks/references/turborepo-pipelines.md +517 -517
  313. package/skills/engineering/web-frameworks/references/turborepo-setup.md +542 -542
  314. package/skills/engineering/web-frameworks/scripts/nextjs_init.py +547 -547
  315. package/skills/engineering/web-frameworks/scripts/requirements.txt +16 -16
  316. package/skills/engineering/web-frameworks/scripts/tests/requirements.txt +3 -3
  317. package/skills/engineering/web-frameworks/scripts/tests/test_nextjs_init.py +319 -319
  318. package/skills/engineering/web-frameworks/scripts/tests/test_turborepo_migrate.py +374 -374
  319. package/skills/engineering/web-frameworks/scripts/turborepo_migrate.py +394 -394
  320. package/skills/engineering/write-a-skill/SKILL.md +117 -117
  321. package/skills/kilo-kit/SKILL.md +346 -346
  322. package/skills/kilo-kit/_template/SKILL.md +185 -185
  323. package/skills/kilo-kit/debugging/root-cause/SKILL.md +360 -360
  324. package/skills/kilo-kit/debugging/systematic/SKILL.md +339 -339
  325. package/skills/kilo-kit/debugging/verification/SKILL.md +424 -424
  326. package/skills/kilo-kit/development/backend/SKILL.md +540 -540
  327. package/skills/kilo-kit/development/security/SKILL.md +529 -529
  328. package/skills/kilo-kit/quality/code-review/SKILL.md +297 -297
  329. package/skills/kilo-kit/quality/testing/SKILL.md +540 -540
  330. package/skills/kilo-kit/references/output-formats.md +204 -204
  331. package/skills/kilo-kit/references/patterns.md +156 -156
  332. package/skills/kilo-kit/references/performance-benchmarks.md +90 -90
  333. package/skills/operations/chrome-devtools/SKILL.md +392 -392
  334. package/skills/operations/chrome-devtools/references/cdp-domains.md +694 -694
  335. package/skills/operations/chrome-devtools/references/performance-guide.md +940 -940
  336. package/skills/operations/chrome-devtools/references/puppeteer-reference.md +953 -953
  337. package/skills/operations/chrome-devtools/scripts/PERSISTENT-BROWSER.md +107 -107
  338. package/skills/operations/chrome-devtools/scripts/README.md +213 -213
  339. package/skills/operations/chrome-devtools/scripts/__tests__/selector.test.js +210 -210
  340. package/skills/operations/chrome-devtools/scripts/click.js +79 -79
  341. package/skills/operations/chrome-devtools/scripts/close-persistent.js +36 -36
  342. package/skills/operations/chrome-devtools/scripts/console.js +75 -75
  343. package/skills/operations/chrome-devtools/scripts/evaluate.js +49 -49
  344. package/skills/operations/chrome-devtools/scripts/fill.js +72 -72
  345. package/skills/operations/chrome-devtools/scripts/install-deps.sh +181 -181
  346. package/skills/operations/chrome-devtools/scripts/install.sh +83 -83
  347. package/skills/operations/chrome-devtools/scripts/launch-persistent.js +71 -71
  348. package/skills/operations/chrome-devtools/scripts/lib/browser.js +144 -144
  349. package/skills/operations/chrome-devtools/scripts/lib/selector.js +178 -178
  350. package/skills/operations/chrome-devtools/scripts/navigate.js +46 -46
  351. package/skills/operations/chrome-devtools/scripts/network.js +102 -102
  352. package/skills/operations/chrome-devtools/scripts/package-lock.json +1206 -1206
  353. package/skills/operations/chrome-devtools/scripts/package.json +15 -15
  354. package/skills/operations/chrome-devtools/scripts/performance.js +145 -145
  355. package/skills/operations/chrome-devtools/scripts/screenshot.js +180 -180
  356. package/skills/operations/chrome-devtools/scripts/snapshot.js +131 -131
  357. package/skills/operations/devops/.env.example +76 -76
  358. package/skills/operations/devops/SKILL.md +285 -285
  359. package/skills/operations/devops/references/browser-rendering.md +305 -305
  360. package/skills/operations/devops/references/cloudflare-d1-kv.md +123 -123
  361. package/skills/operations/devops/references/cloudflare-platform.md +271 -271
  362. package/skills/operations/devops/references/cloudflare-r2-storage.md +280 -280
  363. package/skills/operations/devops/references/cloudflare-workers-advanced.md +312 -312
  364. package/skills/operations/devops/references/cloudflare-workers-apis.md +309 -309
  365. package/skills/operations/devops/references/cloudflare-workers-basics.md +418 -418
  366. package/skills/operations/devops/references/docker-basics.md +297 -297
  367. package/skills/operations/devops/references/docker-compose.md +292 -292
  368. package/skills/operations/devops/references/gcloud-platform.md +297 -297
  369. package/skills/operations/devops/references/gcloud-services.md +304 -304
  370. package/skills/operations/devops/scripts/cloudflare_deploy.py +269 -269
  371. package/skills/operations/devops/scripts/docker_optimize.py +320 -320
  372. package/skills/operations/devops/scripts/requirements.txt +20 -20
  373. package/skills/operations/devops/scripts/tests/requirements.txt +3 -3
  374. package/skills/operations/devops/scripts/tests/test_cloudflare_deploy.py +285 -285
  375. package/skills/operations/devops/scripts/tests/test_docker_optimize.py +436 -436
  376. package/skills/operations/mcp-builder/LICENSE.txt +201 -201
  377. package/skills/operations/mcp-builder/SKILL.md +328 -328
  378. package/skills/operations/mcp-builder/reference/evaluation.md +601 -601
  379. package/skills/operations/mcp-builder/reference/mcp_best_practices.md +915 -915
  380. package/skills/operations/mcp-builder/reference/node_mcp_server.md +915 -915
  381. package/skills/operations/mcp-builder/reference/python_mcp_server.md +751 -751
  382. package/skills/operations/mcp-builder/scripts/connections.py +151 -151
  383. package/skills/operations/mcp-builder/scripts/evaluation.py +373 -373
  384. package/skills/operations/mcp-builder/scripts/example_evaluation.xml +22 -22
  385. package/skills/operations/mcp-builder/scripts/requirements.txt +2 -2
  386. package/skills/operations/mcp-management/README.md +219 -219
  387. package/skills/operations/mcp-management/SKILL.md +175 -175
  388. package/skills/operations/mcp-management/assets/tools.json +3043 -3043
  389. package/skills/operations/mcp-management/references/configuration.md +114 -114
  390. package/skills/operations/mcp-management/references/gemini-cli-integration.md +201 -201
  391. package/skills/operations/mcp-management/references/mcp-protocol.md +116 -116
  392. package/skills/operations/mcp-management/scripts/.env.example +10 -10
  393. package/skills/operations/mcp-management/scripts/cli.ts +155 -155
  394. package/skills/operations/mcp-management/scripts/dist/analyze-tools.js +70 -70
  395. package/skills/operations/mcp-management/scripts/dist/cli.js +131 -131
  396. package/skills/operations/mcp-management/scripts/dist/mcp-client.js +115 -115
  397. package/skills/operations/mcp-management/scripts/mcp-client.ts +163 -163
  398. package/skills/operations/mcp-management/scripts/package.json +18 -18
  399. package/skills/operations/mcp-management/scripts/tsconfig.json +15 -15
  400. package/skills/problem-solving/collision-zone-thinking/SKILL.md +62 -62
  401. package/skills/problem-solving/defense-in-depth/SKILL.md +130 -130
  402. package/skills/problem-solving/inversion-exercise/SKILL.md +58 -58
  403. package/skills/problem-solving/meta-pattern-recognition/SKILL.md +54 -54
  404. package/skills/problem-solving/root-cause-tracing/SKILL.md +177 -177
  405. package/skills/problem-solving/root-cause-tracing/find-polluter.sh +63 -63
  406. package/skills/problem-solving/scale-game/SKILL.md +63 -63
  407. package/skills/problem-solving/sequential-thinking/README.md +118 -118
  408. package/skills/problem-solving/sequential-thinking/SKILL.md +93 -93
  409. package/skills/problem-solving/sequential-thinking/references/advanced.md +122 -122
  410. package/skills/problem-solving/sequential-thinking/references/examples.md +274 -274
  411. package/skills/problem-solving/simplification-cascades/SKILL.md +76 -76
  412. package/skills/problem-solving/when-stuck/SKILL.md +88 -88
  413. package/skills/productivity/caveman/SKILL.md +49 -49
  414. package/skills/productivity/grill-me/SKILL.md +10 -10
  415. package/skills/productivity/grill-with-docs/ADR-FORMAT.md +47 -47
  416. package/skills/productivity/grill-with-docs/CONTEXT-FORMAT.md +77 -77
  417. package/skills/productivity/grill-with-docs/SKILL.md +88 -88
  418. package/skills/productivity/writing-skills/graphviz-conventions.dot +171 -171
  419. package/skills/productivity/zoom-out/SKILL.md +7 -7
  420. package/skills/writing-docs/doc/LICENSE.txt +201 -201
  421. package/skills/writing-docs/doc/SKILL.md +80 -80
  422. package/skills/writing-docs/doc/agents/openai.yaml +6 -6
  423. package/skills/writing-docs/doc/assets/doc-small.svg +3 -3
  424. package/skills/writing-docs/doc/scripts/render_docx.py +296 -296
  425. package/skills/writing-docs/docx/LICENSE.txt +30 -30
  426. package/skills/writing-docs/docx/SKILL.md +196 -196
  427. package/skills/writing-docs/docx/docx-js.md +349 -349
  428. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
  429. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -146
  430. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
  431. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -11
  432. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
  433. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -23
  434. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -185
  435. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
  436. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
  437. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -28
  438. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -144
  439. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
  440. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -25
  441. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -18
  442. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -59
  443. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -56
  444. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -195
  445. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
  446. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -25
  447. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
  448. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
  449. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -509
  450. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -12
  451. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -108
  452. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -96
  453. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -3646
  454. package/skills/writing-docs/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
  455. package/skills/writing-docs/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
  456. package/skills/writing-docs/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
  457. package/skills/writing-docs/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
  458. package/skills/writing-docs/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
  459. package/skills/writing-docs/docx/ooxml/schemas/mce/mc.xsd +75 -75
  460. package/skills/writing-docs/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -560
  461. package/skills/writing-docs/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -67
  462. package/skills/writing-docs/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -14
  463. package/skills/writing-docs/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -20
  464. package/skills/writing-docs/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -13
  465. package/skills/writing-docs/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -4
  466. package/skills/writing-docs/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -8
  467. package/skills/writing-docs/docx/ooxml/scripts/pack.py +159 -159
  468. package/skills/writing-docs/docx/ooxml/scripts/unpack.py +29 -29
  469. package/skills/writing-docs/docx/ooxml/scripts/validate.py +69 -69
  470. package/skills/writing-docs/docx/ooxml/scripts/validation/__init__.py +15 -15
  471. package/skills/writing-docs/docx/ooxml/scripts/validation/base.py +951 -951
  472. package/skills/writing-docs/docx/ooxml/scripts/validation/docx.py +274 -274
  473. package/skills/writing-docs/docx/ooxml/scripts/validation/pptx.py +315 -315
  474. package/skills/writing-docs/docx/ooxml/scripts/validation/redlining.py +279 -279
  475. package/skills/writing-docs/docx/ooxml.md +609 -609
  476. package/skills/writing-docs/docx/scripts/__init__.py +1 -1
  477. package/skills/writing-docs/docx/scripts/document.py +1276 -1276
  478. package/skills/writing-docs/docx/scripts/templates/comments.xml +2 -2
  479. package/skills/writing-docs/docx/scripts/templates/commentsExtended.xml +2 -2
  480. package/skills/writing-docs/docx/scripts/templates/commentsExtensible.xml +2 -2
  481. package/skills/writing-docs/docx/scripts/templates/commentsIds.xml +2 -2
  482. package/skills/writing-docs/docx/scripts/templates/people.xml +2 -2
  483. package/skills/writing-docs/docx/scripts/utilities.py +374 -374
  484. package/skills/writing-docs/mermaidjs-v11/SKILL.md +115 -115
  485. package/skills/writing-docs/mermaidjs-v11/references/cli-usage.md +228 -228
  486. package/skills/writing-docs/mermaidjs-v11/references/configuration.md +232 -232
  487. package/skills/writing-docs/mermaidjs-v11/references/diagram-types.md +315 -315
  488. package/skills/writing-docs/mermaidjs-v11/references/examples.md +344 -344
  489. package/skills/writing-docs/mermaidjs-v11/references/integration.md +310 -310
  490. package/skills/writing-docs/pdf/LICENSE.txt +30 -30
  491. package/skills/writing-docs/pdf/SKILL.md +294 -294
  492. package/skills/writing-docs/pdf/forms.md +205 -205
  493. package/skills/writing-docs/pdf/reference.md +611 -611
  494. package/skills/writing-docs/pdf/scripts/check_bounding_boxes.py +70 -70
  495. package/skills/writing-docs/pdf/scripts/check_bounding_boxes_test.py +226 -226
  496. package/skills/writing-docs/pdf/scripts/check_fillable_fields.py +12 -12
  497. package/skills/writing-docs/pdf/scripts/convert_pdf_to_images.py +35 -35
  498. package/skills/writing-docs/pdf/scripts/create_validation_image.py +41 -41
  499. package/skills/writing-docs/pdf/scripts/extract_form_field_info.py +152 -152
  500. package/skills/writing-docs/pdf/scripts/fill_fillable_fields.py +114 -114
  501. package/skills/writing-docs/pdf/scripts/fill_pdf_form_with_annotations.py +107 -107
  502. package/skills/writing-docs/pptx/LICENSE.txt +30 -30
  503. package/skills/writing-docs/pptx/SKILL.md +483 -483
  504. package/skills/writing-docs/pptx/html2pptx.md +624 -624
  505. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
  506. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -146
  507. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
  508. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -11
  509. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
  510. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -23
  511. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -185
  512. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
  513. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
  514. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -28
  515. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -144
  516. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
  517. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -25
  518. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -18
  519. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -59
  520. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -56
  521. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -195
  522. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
  523. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -25
  524. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
  525. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
  526. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -509
  527. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -12
  528. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -108
  529. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -96
  530. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -3646
  531. package/skills/writing-docs/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
  532. package/skills/writing-docs/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
  533. package/skills/writing-docs/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
  534. package/skills/writing-docs/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
  535. package/skills/writing-docs/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
  536. package/skills/writing-docs/pptx/ooxml/schemas/mce/mc.xsd +75 -75
  537. package/skills/writing-docs/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -560
  538. package/skills/writing-docs/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -67
  539. package/skills/writing-docs/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -14
  540. package/skills/writing-docs/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -20
  541. package/skills/writing-docs/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -13
  542. package/skills/writing-docs/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -4
  543. package/skills/writing-docs/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -8
  544. package/skills/writing-docs/pptx/ooxml/scripts/pack.py +159 -159
  545. package/skills/writing-docs/pptx/ooxml/scripts/unpack.py +29 -29
  546. package/skills/writing-docs/pptx/ooxml/scripts/validate.py +69 -69
  547. package/skills/writing-docs/pptx/ooxml/scripts/validation/__init__.py +15 -15
  548. package/skills/writing-docs/pptx/ooxml/scripts/validation/base.py +951 -951
  549. package/skills/writing-docs/pptx/ooxml/scripts/validation/docx.py +274 -274
  550. package/skills/writing-docs/pptx/ooxml/scripts/validation/pptx.py +315 -315
  551. package/skills/writing-docs/pptx/ooxml/scripts/validation/redlining.py +279 -279
  552. package/skills/writing-docs/pptx/ooxml.md +426 -426
  553. package/skills/writing-docs/pptx/scripts/html2pptx.js +978 -978
  554. package/skills/writing-docs/pptx/scripts/inventory.py +1020 -1020
  555. package/skills/writing-docs/pptx/scripts/rearrange.py +231 -231
  556. package/skills/writing-docs/pptx/scripts/replace.py +385 -385
  557. package/skills/writing-docs/pptx/scripts/thumbnail.py +450 -450
  558. package/skills/writing-docs/slides/LICENSE.txt +201 -201
  559. package/skills/writing-docs/slides/SKILL.md +71 -71
  560. package/skills/writing-docs/slides/agents/openai.yaml +6 -6
  561. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/code.js +104 -104
  562. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/image.js +333 -333
  563. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/index.js +33 -33
  564. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/latex.js +51 -51
  565. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/layout.js +643 -643
  566. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/layout_builders.js +358 -358
  567. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/svg.js +36 -36
  568. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/text.js +789 -789
  569. package/skills/writing-docs/slides/assets/pptxgenjs_helpers/util.js +24 -24
  570. package/skills/writing-docs/slides/assets/slides-small.svg +3 -3
  571. package/skills/writing-docs/slides/references/pptxgenjs-helpers.md +61 -61
  572. package/skills/writing-docs/slides/scripts/create_montage.py +300 -300
  573. package/skills/writing-docs/slides/scripts/detect_font.py +873 -873
  574. package/skills/writing-docs/slides/scripts/ensure_raster_image.py +202 -202
  575. package/skills/writing-docs/slides/scripts/render_slides.py +273 -273
  576. package/skills/writing-docs/slides/scripts/slides_test.py +201 -201
  577. package/skills/writing-docs/template-skill/SKILL.md +26 -26
  578. package/skills/writing-docs/xlsx/LICENSE.txt +30 -30
  579. package/skills/writing-docs/xlsx/SKILL.md +288 -288
  580. package/skills/writing-docs/xlsx/recalc.py +177 -177
  581. package/src/core/KILO_MASTER.md +448 -448
  582. package/src/tools/validate-skill.js +421 -421
@@ -1,951 +1,951 @@
1
- """
2
- Base validator with common validation logic for document files.
3
- """
4
-
5
- import re
6
- from pathlib import Path
7
-
8
- import lxml.etree
9
-
10
-
11
- class BaseSchemaValidator:
12
- """Base validator with common validation logic for document files."""
13
-
14
- # Elements whose 'id' attributes must be unique within their file
15
- # Format: element_name -> (attribute_name, scope)
16
- # scope can be 'file' (unique within file) or 'global' (unique across all files)
17
- UNIQUE_ID_REQUIREMENTS = {
18
- # Word elements
19
- "comment": ("id", "file"), # Comment IDs in comments.xml
20
- "commentrangestart": ("id", "file"), # Must match comment IDs
21
- "commentrangeend": ("id", "file"), # Must match comment IDs
22
- "bookmarkstart": ("id", "file"), # Bookmark start IDs
23
- "bookmarkend": ("id", "file"), # Bookmark end IDs
24
- # Note: ins and del (track changes) can share IDs when part of same revision
25
- # PowerPoint elements
26
- "sldid": ("id", "file"), # Slide IDs in presentation.xml
27
- "sldmasterid": ("id", "global"), # Slide master IDs must be globally unique
28
- "sldlayoutid": ("id", "global"), # Slide layout IDs must be globally unique
29
- "cm": ("authorid", "file"), # Comment author IDs
30
- # Excel elements
31
- "sheet": ("sheetid", "file"), # Sheet IDs in workbook.xml
32
- "definedname": ("id", "file"), # Named range IDs
33
- # Drawing/Shape elements (all formats)
34
- "cxnsp": ("id", "file"), # Connection shape IDs
35
- "sp": ("id", "file"), # Shape IDs
36
- "pic": ("id", "file"), # Picture IDs
37
- "grpsp": ("id", "file"), # Group shape IDs
38
- }
39
-
40
- # Mapping of element names to expected relationship types
41
- # Subclasses should override this with format-specific mappings
42
- ELEMENT_RELATIONSHIP_TYPES = {}
43
-
44
- # Unified schema mappings for all Office document types
45
- SCHEMA_MAPPINGS = {
46
- # Document type specific schemas
47
- "word": "ISO-IEC29500-4_2016/wml.xsd", # Word documents
48
- "ppt": "ISO-IEC29500-4_2016/pml.xsd", # PowerPoint presentations
49
- "xl": "ISO-IEC29500-4_2016/sml.xsd", # Excel spreadsheets
50
- # Common file types
51
- "[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd",
52
- "app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd",
53
- "core.xml": "ecma/fouth-edition/opc-coreProperties.xsd",
54
- "custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd",
55
- ".rels": "ecma/fouth-edition/opc-relationships.xsd",
56
- # Word-specific files
57
- "people.xml": "microsoft/wml-2012.xsd",
58
- "commentsIds.xml": "microsoft/wml-cid-2016.xsd",
59
- "commentsExtensible.xml": "microsoft/wml-cex-2018.xsd",
60
- "commentsExtended.xml": "microsoft/wml-2012.xsd",
61
- # Chart files (common across document types)
62
- "chart": "ISO-IEC29500-4_2016/dml-chart.xsd",
63
- # Theme files (common across document types)
64
- "theme": "ISO-IEC29500-4_2016/dml-main.xsd",
65
- # Drawing and media files
66
- "drawing": "ISO-IEC29500-4_2016/dml-main.xsd",
67
- }
68
-
69
- # Unified namespace constants
70
- MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006"
71
- XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
72
-
73
- # Common OOXML namespaces used across validators
74
- PACKAGE_RELATIONSHIPS_NAMESPACE = (
75
- "http://schemas.openxmlformats.org/package/2006/relationships"
76
- )
77
- OFFICE_RELATIONSHIPS_NAMESPACE = (
78
- "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
79
- )
80
- CONTENT_TYPES_NAMESPACE = (
81
- "http://schemas.openxmlformats.org/package/2006/content-types"
82
- )
83
-
84
- # Folders where we should clean ignorable namespaces
85
- MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"}
86
-
87
- # All allowed OOXML namespaces (superset of all document types)
88
- OOXML_NAMESPACES = {
89
- "http://schemas.openxmlformats.org/officeDocument/2006/math",
90
- "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
91
- "http://schemas.openxmlformats.org/schemaLibrary/2006/main",
92
- "http://schemas.openxmlformats.org/drawingml/2006/main",
93
- "http://schemas.openxmlformats.org/drawingml/2006/chart",
94
- "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
95
- "http://schemas.openxmlformats.org/drawingml/2006/diagram",
96
- "http://schemas.openxmlformats.org/drawingml/2006/picture",
97
- "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
98
- "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
99
- "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
100
- "http://schemas.openxmlformats.org/presentationml/2006/main",
101
- "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
102
- "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes",
103
- "http://www.w3.org/XML/1998/namespace",
104
- }
105
-
106
- def __init__(self, unpacked_dir, original_file, verbose=False):
107
- self.unpacked_dir = Path(unpacked_dir).resolve()
108
- self.original_file = Path(original_file)
109
- self.verbose = verbose
110
-
111
- # Set schemas directory
112
- self.schemas_dir = Path(__file__).parent.parent.parent / "schemas"
113
-
114
- # Get all XML and .rels files
115
- patterns = ["*.xml", "*.rels"]
116
- self.xml_files = [
117
- f for pattern in patterns for f in self.unpacked_dir.rglob(pattern)
118
- ]
119
-
120
- if not self.xml_files:
121
- print(f"Warning: No XML files found in {self.unpacked_dir}")
122
-
123
- def validate(self):
124
- """Run all validation checks and return True if all pass."""
125
- raise NotImplementedError("Subclasses must implement the validate method")
126
-
127
- def validate_xml(self):
128
- """Validate that all XML files are well-formed."""
129
- errors = []
130
-
131
- for xml_file in self.xml_files:
132
- try:
133
- # Try to parse the XML file
134
- lxml.etree.parse(str(xml_file))
135
- except lxml.etree.XMLSyntaxError as e:
136
- errors.append(
137
- f" {xml_file.relative_to(self.unpacked_dir)}: "
138
- f"Line {e.lineno}: {e.msg}"
139
- )
140
- except Exception as e:
141
- errors.append(
142
- f" {xml_file.relative_to(self.unpacked_dir)}: "
143
- f"Unexpected error: {str(e)}"
144
- )
145
-
146
- if errors:
147
- print(f"FAILED - Found {len(errors)} XML violations:")
148
- for error in errors:
149
- print(error)
150
- return False
151
- else:
152
- if self.verbose:
153
- print("PASSED - All XML files are well-formed")
154
- return True
155
-
156
- def validate_namespaces(self):
157
- """Validate that namespace prefixes in Ignorable attributes are declared."""
158
- errors = []
159
-
160
- for xml_file in self.xml_files:
161
- try:
162
- root = lxml.etree.parse(str(xml_file)).getroot()
163
- declared = set(root.nsmap.keys()) - {None} # Exclude default namespace
164
-
165
- for attr_val in [
166
- v for k, v in root.attrib.items() if k.endswith("Ignorable")
167
- ]:
168
- undeclared = set(attr_val.split()) - declared
169
- errors.extend(
170
- f" {xml_file.relative_to(self.unpacked_dir)}: "
171
- f"Namespace '{ns}' in Ignorable but not declared"
172
- for ns in undeclared
173
- )
174
- except lxml.etree.XMLSyntaxError:
175
- continue
176
-
177
- if errors:
178
- print(f"FAILED - {len(errors)} namespace issues:")
179
- for error in errors:
180
- print(error)
181
- return False
182
- if self.verbose:
183
- print("PASSED - All namespace prefixes properly declared")
184
- return True
185
-
186
- def validate_unique_ids(self):
187
- """Validate that specific IDs are unique according to OOXML requirements."""
188
- errors = []
189
- global_ids = {} # Track globally unique IDs across all files
190
-
191
- for xml_file in self.xml_files:
192
- try:
193
- root = lxml.etree.parse(str(xml_file)).getroot()
194
- file_ids = {} # Track IDs that must be unique within this file
195
-
196
- # Remove all mc:AlternateContent elements from the tree
197
- mc_elements = root.xpath(
198
- ".//mc:AlternateContent", namespaces={"mc": self.MC_NAMESPACE}
199
- )
200
- for elem in mc_elements:
201
- elem.getparent().remove(elem)
202
-
203
- # Now check IDs in the cleaned tree
204
- for elem in root.iter():
205
- # Get the element name without namespace
206
- tag = (
207
- elem.tag.split("}")[-1].lower()
208
- if "}" in elem.tag
209
- else elem.tag.lower()
210
- )
211
-
212
- # Check if this element type has ID uniqueness requirements
213
- if tag in self.UNIQUE_ID_REQUIREMENTS:
214
- attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag]
215
-
216
- # Look for the specified attribute
217
- id_value = None
218
- for attr, value in elem.attrib.items():
219
- attr_local = (
220
- attr.split("}")[-1].lower()
221
- if "}" in attr
222
- else attr.lower()
223
- )
224
- if attr_local == attr_name:
225
- id_value = value
226
- break
227
-
228
- if id_value is not None:
229
- if scope == "global":
230
- # Check global uniqueness
231
- if id_value in global_ids:
232
- prev_file, prev_line, prev_tag = global_ids[
233
- id_value
234
- ]
235
- errors.append(
236
- f" {xml_file.relative_to(self.unpacked_dir)}: "
237
- f"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> "
238
- f"already used in {prev_file} at line {prev_line} in <{prev_tag}>"
239
- )
240
- else:
241
- global_ids[id_value] = (
242
- xml_file.relative_to(self.unpacked_dir),
243
- elem.sourceline,
244
- tag,
245
- )
246
- elif scope == "file":
247
- # Check file-level uniqueness
248
- key = (tag, attr_name)
249
- if key not in file_ids:
250
- file_ids[key] = {}
251
-
252
- if id_value in file_ids[key]:
253
- prev_line = file_ids[key][id_value]
254
- errors.append(
255
- f" {xml_file.relative_to(self.unpacked_dir)}: "
256
- f"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> "
257
- f"(first occurrence at line {prev_line})"
258
- )
259
- else:
260
- file_ids[key][id_value] = elem.sourceline
261
-
262
- except (lxml.etree.XMLSyntaxError, Exception) as e:
263
- errors.append(
264
- f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
265
- )
266
-
267
- if errors:
268
- print(f"FAILED - Found {len(errors)} ID uniqueness violations:")
269
- for error in errors:
270
- print(error)
271
- return False
272
- else:
273
- if self.verbose:
274
- print("PASSED - All required IDs are unique")
275
- return True
276
-
277
- def validate_file_references(self):
278
- """
279
- Validate that all .rels files properly reference files and that all files are referenced.
280
- """
281
- errors = []
282
-
283
- # Find all .rels files
284
- rels_files = list(self.unpacked_dir.rglob("*.rels"))
285
-
286
- if not rels_files:
287
- if self.verbose:
288
- print("PASSED - No .rels files found")
289
- return True
290
-
291
- # Get all files in the unpacked directory (excluding reference files)
292
- all_files = []
293
- for file_path in self.unpacked_dir.rglob("*"):
294
- if (
295
- file_path.is_file()
296
- and file_path.name != "[Content_Types].xml"
297
- and not file_path.name.endswith(".rels")
298
- ): # This file is not referenced by .rels
299
- all_files.append(file_path.resolve())
300
-
301
- # Track all files that are referenced by any .rels file
302
- all_referenced_files = set()
303
-
304
- if self.verbose:
305
- print(
306
- f"Found {len(rels_files)} .rels files and {len(all_files)} target files"
307
- )
308
-
309
- # Check each .rels file
310
- for rels_file in rels_files:
311
- try:
312
- # Parse relationships file
313
- rels_root = lxml.etree.parse(str(rels_file)).getroot()
314
-
315
- # Get the directory where this .rels file is located
316
- rels_dir = rels_file.parent
317
-
318
- # Find all relationships and their targets
319
- referenced_files = set()
320
- broken_refs = []
321
-
322
- for rel in rels_root.findall(
323
- ".//ns:Relationship",
324
- namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE},
325
- ):
326
- target = rel.get("Target")
327
- if target and not target.startswith(
328
- ("http", "mailto:")
329
- ): # Skip external URLs
330
- # Resolve the target path relative to the .rels file location
331
- if rels_file.name == ".rels":
332
- # Root .rels file - targets are relative to unpacked_dir
333
- target_path = self.unpacked_dir / target
334
- else:
335
- # Other .rels files - targets are relative to their parent's parent
336
- # e.g., word/_rels/document.xml.rels -> targets relative to word/
337
- base_dir = rels_dir.parent
338
- target_path = base_dir / target
339
-
340
- # Normalize the path and check if it exists
341
- try:
342
- target_path = target_path.resolve()
343
- if target_path.exists() and target_path.is_file():
344
- referenced_files.add(target_path)
345
- all_referenced_files.add(target_path)
346
- else:
347
- broken_refs.append((target, rel.sourceline))
348
- except (OSError, ValueError):
349
- broken_refs.append((target, rel.sourceline))
350
-
351
- # Report broken references
352
- if broken_refs:
353
- rel_path = rels_file.relative_to(self.unpacked_dir)
354
- for broken_ref, line_num in broken_refs:
355
- errors.append(
356
- f" {rel_path}: Line {line_num}: Broken reference to {broken_ref}"
357
- )
358
-
359
- except Exception as e:
360
- rel_path = rels_file.relative_to(self.unpacked_dir)
361
- errors.append(f" Error parsing {rel_path}: {e}")
362
-
363
- # Check for unreferenced files (files that exist but are not referenced anywhere)
364
- unreferenced_files = set(all_files) - all_referenced_files
365
-
366
- if unreferenced_files:
367
- for unref_file in sorted(unreferenced_files):
368
- unref_rel_path = unref_file.relative_to(self.unpacked_dir)
369
- errors.append(f" Unreferenced file: {unref_rel_path}")
370
-
371
- if errors:
372
- print(f"FAILED - Found {len(errors)} relationship validation errors:")
373
- for error in errors:
374
- print(error)
375
- print(
376
- "CRITICAL: These errors will cause the document to appear corrupt. "
377
- + "Broken references MUST be fixed, "
378
- + "and unreferenced files MUST be referenced or removed."
379
- )
380
- return False
381
- else:
382
- if self.verbose:
383
- print(
384
- "PASSED - All references are valid and all files are properly referenced"
385
- )
386
- return True
387
-
388
- def validate_all_relationship_ids(self):
389
- """
390
- Validate that all r:id attributes in XML files reference existing IDs
391
- in their corresponding .rels files, and optionally validate relationship types.
392
- """
393
- import lxml.etree
394
-
395
- errors = []
396
-
397
- # Process each XML file that might contain r:id references
398
- for xml_file in self.xml_files:
399
- # Skip .rels files themselves
400
- if xml_file.suffix == ".rels":
401
- continue
402
-
403
- # Determine the corresponding .rels file
404
- # For dir/file.xml, it's dir/_rels/file.xml.rels
405
- rels_dir = xml_file.parent / "_rels"
406
- rels_file = rels_dir / f"{xml_file.name}.rels"
407
-
408
- # Skip if there's no corresponding .rels file (that's okay)
409
- if not rels_file.exists():
410
- continue
411
-
412
- try:
413
- # Parse the .rels file to get valid relationship IDs and their types
414
- rels_root = lxml.etree.parse(str(rels_file)).getroot()
415
- rid_to_type = {}
416
-
417
- for rel in rels_root.findall(
418
- f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
419
- ):
420
- rid = rel.get("Id")
421
- rel_type = rel.get("Type", "")
422
- if rid:
423
- # Check for duplicate rIds
424
- if rid in rid_to_type:
425
- rels_rel_path = rels_file.relative_to(self.unpacked_dir)
426
- errors.append(
427
- f" {rels_rel_path}: Line {rel.sourceline}: "
428
- f"Duplicate relationship ID '{rid}' (IDs must be unique)"
429
- )
430
- # Extract just the type name from the full URL
431
- type_name = (
432
- rel_type.split("/")[-1] if "/" in rel_type else rel_type
433
- )
434
- rid_to_type[rid] = type_name
435
-
436
- # Parse the XML file to find all r:id references
437
- xml_root = lxml.etree.parse(str(xml_file)).getroot()
438
-
439
- # Find all elements with r:id attributes
440
- for elem in xml_root.iter():
441
- # Check for r:id attribute (relationship ID)
442
- rid_attr = elem.get(f"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id")
443
- if rid_attr:
444
- xml_rel_path = xml_file.relative_to(self.unpacked_dir)
445
- elem_name = (
446
- elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
447
- )
448
-
449
- # Check if the ID exists
450
- if rid_attr not in rid_to_type:
451
- errors.append(
452
- f" {xml_rel_path}: Line {elem.sourceline}: "
453
- f"<{elem_name}> references non-existent relationship '{rid_attr}' "
454
- f"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})"
455
- )
456
- # Check if we have type expectations for this element
457
- elif self.ELEMENT_RELATIONSHIP_TYPES:
458
- expected_type = self._get_expected_relationship_type(
459
- elem_name
460
- )
461
- if expected_type:
462
- actual_type = rid_to_type[rid_attr]
463
- # Check if the actual type matches or contains the expected type
464
- if expected_type not in actual_type.lower():
465
- errors.append(
466
- f" {xml_rel_path}: Line {elem.sourceline}: "
467
- f"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' "
468
- f"but should point to a '{expected_type}' relationship"
469
- )
470
-
471
- except Exception as e:
472
- xml_rel_path = xml_file.relative_to(self.unpacked_dir)
473
- errors.append(f" Error processing {xml_rel_path}: {e}")
474
-
475
- if errors:
476
- print(f"FAILED - Found {len(errors)} relationship ID reference errors:")
477
- for error in errors:
478
- print(error)
479
- print("\nThese ID mismatches will cause the document to appear corrupt!")
480
- return False
481
- else:
482
- if self.verbose:
483
- print("PASSED - All relationship ID references are valid")
484
- return True
485
-
486
- def _get_expected_relationship_type(self, element_name):
487
- """
488
- Get the expected relationship type for an element.
489
- First checks the explicit mapping, then tries pattern detection.
490
- """
491
- # Normalize element name to lowercase
492
- elem_lower = element_name.lower()
493
-
494
- # Check explicit mapping first
495
- if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES:
496
- return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower]
497
-
498
- # Try pattern detection for common patterns
499
- # Pattern 1: Elements ending in "Id" often expect a relationship of the prefix type
500
- if elem_lower.endswith("id") and len(elem_lower) > 2:
501
- # e.g., "sldId" -> "sld", "sldMasterId" -> "sldMaster"
502
- prefix = elem_lower[:-2] # Remove "id"
503
- # Check if this might be a compound like "sldMasterId"
504
- if prefix.endswith("master"):
505
- return prefix.lower()
506
- elif prefix.endswith("layout"):
507
- return prefix.lower()
508
- else:
509
- # Simple case like "sldId" -> "slide"
510
- # Common transformations
511
- if prefix == "sld":
512
- return "slide"
513
- return prefix.lower()
514
-
515
- # Pattern 2: Elements ending in "Reference" expect a relationship of the prefix type
516
- if elem_lower.endswith("reference") and len(elem_lower) > 9:
517
- prefix = elem_lower[:-9] # Remove "reference"
518
- return prefix.lower()
519
-
520
- return None
521
-
522
- def validate_content_types(self):
523
- """Validate that all content files are properly declared in [Content_Types].xml."""
524
- errors = []
525
-
526
- # Find [Content_Types].xml file
527
- content_types_file = self.unpacked_dir / "[Content_Types].xml"
528
- if not content_types_file.exists():
529
- print("FAILED - [Content_Types].xml file not found")
530
- return False
531
-
532
- try:
533
- # Parse and get all declared parts and extensions
534
- root = lxml.etree.parse(str(content_types_file)).getroot()
535
- declared_parts = set()
536
- declared_extensions = set()
537
-
538
- # Get Override declarations (specific files)
539
- for override in root.findall(
540
- f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override"
541
- ):
542
- part_name = override.get("PartName")
543
- if part_name is not None:
544
- declared_parts.add(part_name.lstrip("/"))
545
-
546
- # Get Default declarations (by extension)
547
- for default in root.findall(
548
- f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default"
549
- ):
550
- extension = default.get("Extension")
551
- if extension is not None:
552
- declared_extensions.add(extension.lower())
553
-
554
- # Root elements that require content type declaration
555
- declarable_roots = {
556
- "sld",
557
- "sldLayout",
558
- "sldMaster",
559
- "presentation", # PowerPoint
560
- "document", # Word
561
- "workbook",
562
- "worksheet", # Excel
563
- "theme", # Common
564
- }
565
-
566
- # Common media file extensions that should be declared
567
- media_extensions = {
568
- "png": "image/png",
569
- "jpg": "image/jpeg",
570
- "jpeg": "image/jpeg",
571
- "gif": "image/gif",
572
- "bmp": "image/bmp",
573
- "tiff": "image/tiff",
574
- "wmf": "image/x-wmf",
575
- "emf": "image/x-emf",
576
- }
577
-
578
- # Get all files in the unpacked directory
579
- all_files = list(self.unpacked_dir.rglob("*"))
580
- all_files = [f for f in all_files if f.is_file()]
581
-
582
- # Check all XML files for Override declarations
583
- for xml_file in self.xml_files:
584
- path_str = str(xml_file.relative_to(self.unpacked_dir)).replace(
585
- "\\", "/"
586
- )
587
-
588
- # Skip non-content files
589
- if any(
590
- skip in path_str
591
- for skip in [".rels", "[Content_Types]", "docProps/", "_rels/"]
592
- ):
593
- continue
594
-
595
- try:
596
- root_tag = lxml.etree.parse(str(xml_file)).getroot().tag
597
- root_name = root_tag.split("}")[-1] if "}" in root_tag else root_tag
598
-
599
- if root_name in declarable_roots and path_str not in declared_parts:
600
- errors.append(
601
- f" {path_str}: File with <{root_name}> root not declared in [Content_Types].xml"
602
- )
603
-
604
- except Exception:
605
- continue # Skip unparseable files
606
-
607
- # Check all non-XML files for Default extension declarations
608
- for file_path in all_files:
609
- # Skip XML files and metadata files (already checked above)
610
- if file_path.suffix.lower() in {".xml", ".rels"}:
611
- continue
612
- if file_path.name == "[Content_Types].xml":
613
- continue
614
- if "_rels" in file_path.parts or "docProps" in file_path.parts:
615
- continue
616
-
617
- extension = file_path.suffix.lstrip(".").lower()
618
- if extension and extension not in declared_extensions:
619
- # Check if it's a known media extension that should be declared
620
- if extension in media_extensions:
621
- relative_path = file_path.relative_to(self.unpacked_dir)
622
- errors.append(
623
- f' {relative_path}: File with extension \'{extension}\' not declared in [Content_Types].xml - should add: <Default Extension="{extension}" ContentType="{media_extensions[extension]}"/>'
624
- )
625
-
626
- except Exception as e:
627
- errors.append(f" Error parsing [Content_Types].xml: {e}")
628
-
629
- if errors:
630
- print(f"FAILED - Found {len(errors)} content type declaration errors:")
631
- for error in errors:
632
- print(error)
633
- return False
634
- else:
635
- if self.verbose:
636
- print(
637
- "PASSED - All content files are properly declared in [Content_Types].xml"
638
- )
639
- return True
640
-
641
- def validate_file_against_xsd(self, xml_file, verbose=False):
642
- """Validate a single XML file against XSD schema, comparing with original.
643
-
644
- Args:
645
- xml_file: Path to XML file to validate
646
- verbose: Enable verbose output
647
-
648
- Returns:
649
- tuple: (is_valid, new_errors_set) where is_valid is True/False/None (skipped)
650
- """
651
- # Resolve both paths to handle symlinks
652
- xml_file = Path(xml_file).resolve()
653
- unpacked_dir = self.unpacked_dir.resolve()
654
-
655
- # Validate current file
656
- is_valid, current_errors = self._validate_single_file_xsd(
657
- xml_file, unpacked_dir
658
- )
659
-
660
- if is_valid is None:
661
- return None, set() # Skipped
662
- elif is_valid:
663
- return True, set() # Valid, no errors
664
-
665
- # Get errors from original file for this specific file
666
- original_errors = self._get_original_file_errors(xml_file)
667
-
668
- # Compare with original (both are guaranteed to be sets here)
669
- assert current_errors is not None
670
- new_errors = current_errors - original_errors
671
-
672
- if new_errors:
673
- if verbose:
674
- relative_path = xml_file.relative_to(unpacked_dir)
675
- print(f"FAILED - {relative_path}: {len(new_errors)} new error(s)")
676
- for error in list(new_errors)[:3]:
677
- truncated = error[:250] + "..." if len(error) > 250 else error
678
- print(f" - {truncated}")
679
- return False, new_errors
680
- else:
681
- # All errors existed in original
682
- if verbose:
683
- print(
684
- f"PASSED - No new errors (original had {len(current_errors)} errors)"
685
- )
686
- return True, set()
687
-
688
- def validate_against_xsd(self):
689
- """Validate XML files against XSD schemas, showing only new errors compared to original."""
690
- new_errors = []
691
- original_error_count = 0
692
- valid_count = 0
693
- skipped_count = 0
694
-
695
- for xml_file in self.xml_files:
696
- relative_path = str(xml_file.relative_to(self.unpacked_dir))
697
- is_valid, new_file_errors = self.validate_file_against_xsd(
698
- xml_file, verbose=False
699
- )
700
-
701
- if is_valid is None:
702
- skipped_count += 1
703
- continue
704
- elif is_valid and not new_file_errors:
705
- valid_count += 1
706
- continue
707
- elif is_valid:
708
- # Had errors but all existed in original
709
- original_error_count += 1
710
- valid_count += 1
711
- continue
712
-
713
- # Has new errors
714
- new_errors.append(f" {relative_path}: {len(new_file_errors)} new error(s)")
715
- for error in list(new_file_errors)[:3]: # Show first 3 errors
716
- new_errors.append(
717
- f" - {error[:250]}..." if len(error) > 250 else f" - {error}"
718
- )
719
-
720
- # Print summary
721
- if self.verbose:
722
- print(f"Validated {len(self.xml_files)} files:")
723
- print(f" - Valid: {valid_count}")
724
- print(f" - Skipped (no schema): {skipped_count}")
725
- if original_error_count:
726
- print(f" - With original errors (ignored): {original_error_count}")
727
- print(
728
- f" - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith(' ')]) or 0}"
729
- )
730
-
731
- if new_errors:
732
- print("\nFAILED - Found NEW validation errors:")
733
- for error in new_errors:
734
- print(error)
735
- return False
736
- else:
737
- if self.verbose:
738
- print("\nPASSED - No new XSD validation errors introduced")
739
- return True
740
-
741
- def _get_schema_path(self, xml_file):
742
- """Determine the appropriate schema path for an XML file."""
743
- # Check exact filename match
744
- if xml_file.name in self.SCHEMA_MAPPINGS:
745
- return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name]
746
-
747
- # Check .rels files
748
- if xml_file.suffix == ".rels":
749
- return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"]
750
-
751
- # Check chart files
752
- if "charts/" in str(xml_file) and xml_file.name.startswith("chart"):
753
- return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"]
754
-
755
- # Check theme files
756
- if "theme/" in str(xml_file) and xml_file.name.startswith("theme"):
757
- return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"]
758
-
759
- # Check if file is in a main content folder and use appropriate schema
760
- if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS:
761
- return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name]
762
-
763
- return None
764
-
765
- def _clean_ignorable_namespaces(self, xml_doc):
766
- """Remove attributes and elements not in allowed namespaces."""
767
- # Create a clean copy
768
- xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
769
- xml_copy = lxml.etree.fromstring(xml_string)
770
-
771
- # Remove attributes not in allowed namespaces
772
- for elem in xml_copy.iter():
773
- attrs_to_remove = []
774
-
775
- for attr in elem.attrib:
776
- # Check if attribute is from a namespace other than allowed ones
777
- if "{" in attr:
778
- ns = attr.split("}")[0][1:]
779
- if ns not in self.OOXML_NAMESPACES:
780
- attrs_to_remove.append(attr)
781
-
782
- # Remove collected attributes
783
- for attr in attrs_to_remove:
784
- del elem.attrib[attr]
785
-
786
- # Remove elements not in allowed namespaces
787
- self._remove_ignorable_elements(xml_copy)
788
-
789
- return lxml.etree.ElementTree(xml_copy)
790
-
791
- def _remove_ignorable_elements(self, root):
792
- """Recursively remove all elements not in allowed namespaces."""
793
- elements_to_remove = []
794
-
795
- # Find elements to remove
796
- for elem in list(root):
797
- # Skip non-element nodes (comments, processing instructions, etc.)
798
- if not hasattr(elem, "tag") or callable(elem.tag):
799
- continue
800
-
801
- tag_str = str(elem.tag)
802
- if tag_str.startswith("{"):
803
- ns = tag_str.split("}")[0][1:]
804
- if ns not in self.OOXML_NAMESPACES:
805
- elements_to_remove.append(elem)
806
- continue
807
-
808
- # Recursively clean child elements
809
- self._remove_ignorable_elements(elem)
810
-
811
- # Remove collected elements
812
- for elem in elements_to_remove:
813
- root.remove(elem)
814
-
815
- def _preprocess_for_mc_ignorable(self, xml_doc):
816
- """Preprocess XML to handle mc:Ignorable attribute properly."""
817
- # Remove mc:Ignorable attributes before validation
818
- root = xml_doc.getroot()
819
-
820
- # Remove mc:Ignorable attribute from root
821
- if f"{{{self.MC_NAMESPACE}}}Ignorable" in root.attrib:
822
- del root.attrib[f"{{{self.MC_NAMESPACE}}}Ignorable"]
823
-
824
- return xml_doc
825
-
826
- def _validate_single_file_xsd(self, xml_file, base_path):
827
- """Validate a single XML file against XSD schema. Returns (is_valid, errors_set)."""
828
- schema_path = self._get_schema_path(xml_file)
829
- if not schema_path:
830
- return None, None # Skip file
831
-
832
- try:
833
- # Load schema
834
- with open(schema_path, "rb") as xsd_file:
835
- parser = lxml.etree.XMLParser()
836
- xsd_doc = lxml.etree.parse(
837
- xsd_file, parser=parser, base_url=str(schema_path)
838
- )
839
- schema = lxml.etree.XMLSchema(xsd_doc)
840
-
841
- # Load and preprocess XML
842
- with open(xml_file, "r") as f:
843
- xml_doc = lxml.etree.parse(f)
844
-
845
- xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc)
846
- xml_doc = self._preprocess_for_mc_ignorable(xml_doc)
847
-
848
- # Clean ignorable namespaces if needed
849
- relative_path = xml_file.relative_to(base_path)
850
- if (
851
- relative_path.parts
852
- and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS
853
- ):
854
- xml_doc = self._clean_ignorable_namespaces(xml_doc)
855
-
856
- # Validate
857
- if schema.validate(xml_doc):
858
- return True, set()
859
- else:
860
- errors = set()
861
- for error in schema.error_log:
862
- # Store normalized error message (without line numbers for comparison)
863
- errors.add(error.message)
864
- return False, errors
865
-
866
- except Exception as e:
867
- return False, {str(e)}
868
-
869
- def _get_original_file_errors(self, xml_file):
870
- """Get XSD validation errors from a single file in the original document.
871
-
872
- Args:
873
- xml_file: Path to the XML file in unpacked_dir to check
874
-
875
- Returns:
876
- set: Set of error messages from the original file
877
- """
878
- import tempfile
879
- import zipfile
880
-
881
- # Resolve both paths to handle symlinks (e.g., /var vs /private/var on macOS)
882
- xml_file = Path(xml_file).resolve()
883
- unpacked_dir = self.unpacked_dir.resolve()
884
- relative_path = xml_file.relative_to(unpacked_dir)
885
-
886
- with tempfile.TemporaryDirectory() as temp_dir:
887
- temp_path = Path(temp_dir)
888
-
889
- # Extract original file
890
- with zipfile.ZipFile(self.original_file, "r") as zip_ref:
891
- zip_ref.extractall(temp_path)
892
-
893
- # Find corresponding file in original
894
- original_xml_file = temp_path / relative_path
895
-
896
- if not original_xml_file.exists():
897
- # File didn't exist in original, so no original errors
898
- return set()
899
-
900
- # Validate the specific file in original
901
- is_valid, errors = self._validate_single_file_xsd(
902
- original_xml_file, temp_path
903
- )
904
- return errors if errors else set()
905
-
906
- def _remove_template_tags_from_text_nodes(self, xml_doc):
907
- """Remove template tags from XML text nodes and collect warnings.
908
-
909
- Template tags follow the pattern {{ ... }} and are used as placeholders
910
- for content replacement. They should be removed from text content before
911
- XSD validation while preserving XML structure.
912
-
913
- Returns:
914
- tuple: (cleaned_xml_doc, warnings_list)
915
- """
916
- warnings = []
917
- template_pattern = re.compile(r"\{\{[^}]*\}\}")
918
-
919
- # Create a copy of the document to avoid modifying the original
920
- xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
921
- xml_copy = lxml.etree.fromstring(xml_string)
922
-
923
- def process_text_content(text, content_type):
924
- if not text:
925
- return text
926
- matches = list(template_pattern.finditer(text))
927
- if matches:
928
- for match in matches:
929
- warnings.append(
930
- f"Found template tag in {content_type}: {match.group()}"
931
- )
932
- return template_pattern.sub("", text)
933
- return text
934
-
935
- # Process all text nodes in the document
936
- for elem in xml_copy.iter():
937
- # Skip processing if this is a w:t element
938
- if not hasattr(elem, "tag") or callable(elem.tag):
939
- continue
940
- tag_str = str(elem.tag)
941
- if tag_str.endswith("}t") or tag_str == "t":
942
- continue
943
-
944
- elem.text = process_text_content(elem.text, "text content")
945
- elem.tail = process_text_content(elem.tail, "tail content")
946
-
947
- return lxml.etree.ElementTree(xml_copy), warnings
948
-
949
-
950
- if __name__ == "__main__":
951
- raise RuntimeError("This module should not be run directly.")
1
+ """
2
+ Base validator with common validation logic for document files.
3
+ """
4
+
5
+ import re
6
+ from pathlib import Path
7
+
8
+ import lxml.etree
9
+
10
+
11
+ class BaseSchemaValidator:
12
+ """Base validator with common validation logic for document files."""
13
+
14
+ # Elements whose 'id' attributes must be unique within their file
15
+ # Format: element_name -> (attribute_name, scope)
16
+ # scope can be 'file' (unique within file) or 'global' (unique across all files)
17
+ UNIQUE_ID_REQUIREMENTS = {
18
+ # Word elements
19
+ "comment": ("id", "file"), # Comment IDs in comments.xml
20
+ "commentrangestart": ("id", "file"), # Must match comment IDs
21
+ "commentrangeend": ("id", "file"), # Must match comment IDs
22
+ "bookmarkstart": ("id", "file"), # Bookmark start IDs
23
+ "bookmarkend": ("id", "file"), # Bookmark end IDs
24
+ # Note: ins and del (track changes) can share IDs when part of same revision
25
+ # PowerPoint elements
26
+ "sldid": ("id", "file"), # Slide IDs in presentation.xml
27
+ "sldmasterid": ("id", "global"), # Slide master IDs must be globally unique
28
+ "sldlayoutid": ("id", "global"), # Slide layout IDs must be globally unique
29
+ "cm": ("authorid", "file"), # Comment author IDs
30
+ # Excel elements
31
+ "sheet": ("sheetid", "file"), # Sheet IDs in workbook.xml
32
+ "definedname": ("id", "file"), # Named range IDs
33
+ # Drawing/Shape elements (all formats)
34
+ "cxnsp": ("id", "file"), # Connection shape IDs
35
+ "sp": ("id", "file"), # Shape IDs
36
+ "pic": ("id", "file"), # Picture IDs
37
+ "grpsp": ("id", "file"), # Group shape IDs
38
+ }
39
+
40
+ # Mapping of element names to expected relationship types
41
+ # Subclasses should override this with format-specific mappings
42
+ ELEMENT_RELATIONSHIP_TYPES = {}
43
+
44
+ # Unified schema mappings for all Office document types
45
+ SCHEMA_MAPPINGS = {
46
+ # Document type specific schemas
47
+ "word": "ISO-IEC29500-4_2016/wml.xsd", # Word documents
48
+ "ppt": "ISO-IEC29500-4_2016/pml.xsd", # PowerPoint presentations
49
+ "xl": "ISO-IEC29500-4_2016/sml.xsd", # Excel spreadsheets
50
+ # Common file types
51
+ "[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd",
52
+ "app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd",
53
+ "core.xml": "ecma/fouth-edition/opc-coreProperties.xsd",
54
+ "custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd",
55
+ ".rels": "ecma/fouth-edition/opc-relationships.xsd",
56
+ # Word-specific files
57
+ "people.xml": "microsoft/wml-2012.xsd",
58
+ "commentsIds.xml": "microsoft/wml-cid-2016.xsd",
59
+ "commentsExtensible.xml": "microsoft/wml-cex-2018.xsd",
60
+ "commentsExtended.xml": "microsoft/wml-2012.xsd",
61
+ # Chart files (common across document types)
62
+ "chart": "ISO-IEC29500-4_2016/dml-chart.xsd",
63
+ # Theme files (common across document types)
64
+ "theme": "ISO-IEC29500-4_2016/dml-main.xsd",
65
+ # Drawing and media files
66
+ "drawing": "ISO-IEC29500-4_2016/dml-main.xsd",
67
+ }
68
+
69
+ # Unified namespace constants
70
+ MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006"
71
+ XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
72
+
73
+ # Common OOXML namespaces used across validators
74
+ PACKAGE_RELATIONSHIPS_NAMESPACE = (
75
+ "http://schemas.openxmlformats.org/package/2006/relationships"
76
+ )
77
+ OFFICE_RELATIONSHIPS_NAMESPACE = (
78
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
79
+ )
80
+ CONTENT_TYPES_NAMESPACE = (
81
+ "http://schemas.openxmlformats.org/package/2006/content-types"
82
+ )
83
+
84
+ # Folders where we should clean ignorable namespaces
85
+ MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"}
86
+
87
+ # All allowed OOXML namespaces (superset of all document types)
88
+ OOXML_NAMESPACES = {
89
+ "http://schemas.openxmlformats.org/officeDocument/2006/math",
90
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
91
+ "http://schemas.openxmlformats.org/schemaLibrary/2006/main",
92
+ "http://schemas.openxmlformats.org/drawingml/2006/main",
93
+ "http://schemas.openxmlformats.org/drawingml/2006/chart",
94
+ "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
95
+ "http://schemas.openxmlformats.org/drawingml/2006/diagram",
96
+ "http://schemas.openxmlformats.org/drawingml/2006/picture",
97
+ "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
98
+ "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
99
+ "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
100
+ "http://schemas.openxmlformats.org/presentationml/2006/main",
101
+ "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
102
+ "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes",
103
+ "http://www.w3.org/XML/1998/namespace",
104
+ }
105
+
106
+ def __init__(self, unpacked_dir, original_file, verbose=False):
107
+ self.unpacked_dir = Path(unpacked_dir).resolve()
108
+ self.original_file = Path(original_file)
109
+ self.verbose = verbose
110
+
111
+ # Set schemas directory
112
+ self.schemas_dir = Path(__file__).parent.parent.parent / "schemas"
113
+
114
+ # Get all XML and .rels files
115
+ patterns = ["*.xml", "*.rels"]
116
+ self.xml_files = [
117
+ f for pattern in patterns for f in self.unpacked_dir.rglob(pattern)
118
+ ]
119
+
120
+ if not self.xml_files:
121
+ print(f"Warning: No XML files found in {self.unpacked_dir}")
122
+
123
+ def validate(self):
124
+ """Run all validation checks and return True if all pass."""
125
+ raise NotImplementedError("Subclasses must implement the validate method")
126
+
127
+ def validate_xml(self):
128
+ """Validate that all XML files are well-formed."""
129
+ errors = []
130
+
131
+ for xml_file in self.xml_files:
132
+ try:
133
+ # Try to parse the XML file
134
+ lxml.etree.parse(str(xml_file))
135
+ except lxml.etree.XMLSyntaxError as e:
136
+ errors.append(
137
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
138
+ f"Line {e.lineno}: {e.msg}"
139
+ )
140
+ except Exception as e:
141
+ errors.append(
142
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
143
+ f"Unexpected error: {str(e)}"
144
+ )
145
+
146
+ if errors:
147
+ print(f"FAILED - Found {len(errors)} XML violations:")
148
+ for error in errors:
149
+ print(error)
150
+ return False
151
+ else:
152
+ if self.verbose:
153
+ print("PASSED - All XML files are well-formed")
154
+ return True
155
+
156
+ def validate_namespaces(self):
157
+ """Validate that namespace prefixes in Ignorable attributes are declared."""
158
+ errors = []
159
+
160
+ for xml_file in self.xml_files:
161
+ try:
162
+ root = lxml.etree.parse(str(xml_file)).getroot()
163
+ declared = set(root.nsmap.keys()) - {None} # Exclude default namespace
164
+
165
+ for attr_val in [
166
+ v for k, v in root.attrib.items() if k.endswith("Ignorable")
167
+ ]:
168
+ undeclared = set(attr_val.split()) - declared
169
+ errors.extend(
170
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
171
+ f"Namespace '{ns}' in Ignorable but not declared"
172
+ for ns in undeclared
173
+ )
174
+ except lxml.etree.XMLSyntaxError:
175
+ continue
176
+
177
+ if errors:
178
+ print(f"FAILED - {len(errors)} namespace issues:")
179
+ for error in errors:
180
+ print(error)
181
+ return False
182
+ if self.verbose:
183
+ print("PASSED - All namespace prefixes properly declared")
184
+ return True
185
+
186
+ def validate_unique_ids(self):
187
+ """Validate that specific IDs are unique according to OOXML requirements."""
188
+ errors = []
189
+ global_ids = {} # Track globally unique IDs across all files
190
+
191
+ for xml_file in self.xml_files:
192
+ try:
193
+ root = lxml.etree.parse(str(xml_file)).getroot()
194
+ file_ids = {} # Track IDs that must be unique within this file
195
+
196
+ # Remove all mc:AlternateContent elements from the tree
197
+ mc_elements = root.xpath(
198
+ ".//mc:AlternateContent", namespaces={"mc": self.MC_NAMESPACE}
199
+ )
200
+ for elem in mc_elements:
201
+ elem.getparent().remove(elem)
202
+
203
+ # Now check IDs in the cleaned tree
204
+ for elem in root.iter():
205
+ # Get the element name without namespace
206
+ tag = (
207
+ elem.tag.split("}")[-1].lower()
208
+ if "}" in elem.tag
209
+ else elem.tag.lower()
210
+ )
211
+
212
+ # Check if this element type has ID uniqueness requirements
213
+ if tag in self.UNIQUE_ID_REQUIREMENTS:
214
+ attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag]
215
+
216
+ # Look for the specified attribute
217
+ id_value = None
218
+ for attr, value in elem.attrib.items():
219
+ attr_local = (
220
+ attr.split("}")[-1].lower()
221
+ if "}" in attr
222
+ else attr.lower()
223
+ )
224
+ if attr_local == attr_name:
225
+ id_value = value
226
+ break
227
+
228
+ if id_value is not None:
229
+ if scope == "global":
230
+ # Check global uniqueness
231
+ if id_value in global_ids:
232
+ prev_file, prev_line, prev_tag = global_ids[
233
+ id_value
234
+ ]
235
+ errors.append(
236
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
237
+ f"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> "
238
+ f"already used in {prev_file} at line {prev_line} in <{prev_tag}>"
239
+ )
240
+ else:
241
+ global_ids[id_value] = (
242
+ xml_file.relative_to(self.unpacked_dir),
243
+ elem.sourceline,
244
+ tag,
245
+ )
246
+ elif scope == "file":
247
+ # Check file-level uniqueness
248
+ key = (tag, attr_name)
249
+ if key not in file_ids:
250
+ file_ids[key] = {}
251
+
252
+ if id_value in file_ids[key]:
253
+ prev_line = file_ids[key][id_value]
254
+ errors.append(
255
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
256
+ f"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> "
257
+ f"(first occurrence at line {prev_line})"
258
+ )
259
+ else:
260
+ file_ids[key][id_value] = elem.sourceline
261
+
262
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
263
+ errors.append(
264
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
265
+ )
266
+
267
+ if errors:
268
+ print(f"FAILED - Found {len(errors)} ID uniqueness violations:")
269
+ for error in errors:
270
+ print(error)
271
+ return False
272
+ else:
273
+ if self.verbose:
274
+ print("PASSED - All required IDs are unique")
275
+ return True
276
+
277
+ def validate_file_references(self):
278
+ """
279
+ Validate that all .rels files properly reference files and that all files are referenced.
280
+ """
281
+ errors = []
282
+
283
+ # Find all .rels files
284
+ rels_files = list(self.unpacked_dir.rglob("*.rels"))
285
+
286
+ if not rels_files:
287
+ if self.verbose:
288
+ print("PASSED - No .rels files found")
289
+ return True
290
+
291
+ # Get all files in the unpacked directory (excluding reference files)
292
+ all_files = []
293
+ for file_path in self.unpacked_dir.rglob("*"):
294
+ if (
295
+ file_path.is_file()
296
+ and file_path.name != "[Content_Types].xml"
297
+ and not file_path.name.endswith(".rels")
298
+ ): # This file is not referenced by .rels
299
+ all_files.append(file_path.resolve())
300
+
301
+ # Track all files that are referenced by any .rels file
302
+ all_referenced_files = set()
303
+
304
+ if self.verbose:
305
+ print(
306
+ f"Found {len(rels_files)} .rels files and {len(all_files)} target files"
307
+ )
308
+
309
+ # Check each .rels file
310
+ for rels_file in rels_files:
311
+ try:
312
+ # Parse relationships file
313
+ rels_root = lxml.etree.parse(str(rels_file)).getroot()
314
+
315
+ # Get the directory where this .rels file is located
316
+ rels_dir = rels_file.parent
317
+
318
+ # Find all relationships and their targets
319
+ referenced_files = set()
320
+ broken_refs = []
321
+
322
+ for rel in rels_root.findall(
323
+ ".//ns:Relationship",
324
+ namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE},
325
+ ):
326
+ target = rel.get("Target")
327
+ if target and not target.startswith(
328
+ ("http", "mailto:")
329
+ ): # Skip external URLs
330
+ # Resolve the target path relative to the .rels file location
331
+ if rels_file.name == ".rels":
332
+ # Root .rels file - targets are relative to unpacked_dir
333
+ target_path = self.unpacked_dir / target
334
+ else:
335
+ # Other .rels files - targets are relative to their parent's parent
336
+ # e.g., word/_rels/document.xml.rels -> targets relative to word/
337
+ base_dir = rels_dir.parent
338
+ target_path = base_dir / target
339
+
340
+ # Normalize the path and check if it exists
341
+ try:
342
+ target_path = target_path.resolve()
343
+ if target_path.exists() and target_path.is_file():
344
+ referenced_files.add(target_path)
345
+ all_referenced_files.add(target_path)
346
+ else:
347
+ broken_refs.append((target, rel.sourceline))
348
+ except (OSError, ValueError):
349
+ broken_refs.append((target, rel.sourceline))
350
+
351
+ # Report broken references
352
+ if broken_refs:
353
+ rel_path = rels_file.relative_to(self.unpacked_dir)
354
+ for broken_ref, line_num in broken_refs:
355
+ errors.append(
356
+ f" {rel_path}: Line {line_num}: Broken reference to {broken_ref}"
357
+ )
358
+
359
+ except Exception as e:
360
+ rel_path = rels_file.relative_to(self.unpacked_dir)
361
+ errors.append(f" Error parsing {rel_path}: {e}")
362
+
363
+ # Check for unreferenced files (files that exist but are not referenced anywhere)
364
+ unreferenced_files = set(all_files) - all_referenced_files
365
+
366
+ if unreferenced_files:
367
+ for unref_file in sorted(unreferenced_files):
368
+ unref_rel_path = unref_file.relative_to(self.unpacked_dir)
369
+ errors.append(f" Unreferenced file: {unref_rel_path}")
370
+
371
+ if errors:
372
+ print(f"FAILED - Found {len(errors)} relationship validation errors:")
373
+ for error in errors:
374
+ print(error)
375
+ print(
376
+ "CRITICAL: These errors will cause the document to appear corrupt. "
377
+ + "Broken references MUST be fixed, "
378
+ + "and unreferenced files MUST be referenced or removed."
379
+ )
380
+ return False
381
+ else:
382
+ if self.verbose:
383
+ print(
384
+ "PASSED - All references are valid and all files are properly referenced"
385
+ )
386
+ return True
387
+
388
+ def validate_all_relationship_ids(self):
389
+ """
390
+ Validate that all r:id attributes in XML files reference existing IDs
391
+ in their corresponding .rels files, and optionally validate relationship types.
392
+ """
393
+ import lxml.etree
394
+
395
+ errors = []
396
+
397
+ # Process each XML file that might contain r:id references
398
+ for xml_file in self.xml_files:
399
+ # Skip .rels files themselves
400
+ if xml_file.suffix == ".rels":
401
+ continue
402
+
403
+ # Determine the corresponding .rels file
404
+ # For dir/file.xml, it's dir/_rels/file.xml.rels
405
+ rels_dir = xml_file.parent / "_rels"
406
+ rels_file = rels_dir / f"{xml_file.name}.rels"
407
+
408
+ # Skip if there's no corresponding .rels file (that's okay)
409
+ if not rels_file.exists():
410
+ continue
411
+
412
+ try:
413
+ # Parse the .rels file to get valid relationship IDs and their types
414
+ rels_root = lxml.etree.parse(str(rels_file)).getroot()
415
+ rid_to_type = {}
416
+
417
+ for rel in rels_root.findall(
418
+ f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
419
+ ):
420
+ rid = rel.get("Id")
421
+ rel_type = rel.get("Type", "")
422
+ if rid:
423
+ # Check for duplicate rIds
424
+ if rid in rid_to_type:
425
+ rels_rel_path = rels_file.relative_to(self.unpacked_dir)
426
+ errors.append(
427
+ f" {rels_rel_path}: Line {rel.sourceline}: "
428
+ f"Duplicate relationship ID '{rid}' (IDs must be unique)"
429
+ )
430
+ # Extract just the type name from the full URL
431
+ type_name = (
432
+ rel_type.split("/")[-1] if "/" in rel_type else rel_type
433
+ )
434
+ rid_to_type[rid] = type_name
435
+
436
+ # Parse the XML file to find all r:id references
437
+ xml_root = lxml.etree.parse(str(xml_file)).getroot()
438
+
439
+ # Find all elements with r:id attributes
440
+ for elem in xml_root.iter():
441
+ # Check for r:id attribute (relationship ID)
442
+ rid_attr = elem.get(f"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id")
443
+ if rid_attr:
444
+ xml_rel_path = xml_file.relative_to(self.unpacked_dir)
445
+ elem_name = (
446
+ elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
447
+ )
448
+
449
+ # Check if the ID exists
450
+ if rid_attr not in rid_to_type:
451
+ errors.append(
452
+ f" {xml_rel_path}: Line {elem.sourceline}: "
453
+ f"<{elem_name}> references non-existent relationship '{rid_attr}' "
454
+ f"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})"
455
+ )
456
+ # Check if we have type expectations for this element
457
+ elif self.ELEMENT_RELATIONSHIP_TYPES:
458
+ expected_type = self._get_expected_relationship_type(
459
+ elem_name
460
+ )
461
+ if expected_type:
462
+ actual_type = rid_to_type[rid_attr]
463
+ # Check if the actual type matches or contains the expected type
464
+ if expected_type not in actual_type.lower():
465
+ errors.append(
466
+ f" {xml_rel_path}: Line {elem.sourceline}: "
467
+ f"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' "
468
+ f"but should point to a '{expected_type}' relationship"
469
+ )
470
+
471
+ except Exception as e:
472
+ xml_rel_path = xml_file.relative_to(self.unpacked_dir)
473
+ errors.append(f" Error processing {xml_rel_path}: {e}")
474
+
475
+ if errors:
476
+ print(f"FAILED - Found {len(errors)} relationship ID reference errors:")
477
+ for error in errors:
478
+ print(error)
479
+ print("\nThese ID mismatches will cause the document to appear corrupt!")
480
+ return False
481
+ else:
482
+ if self.verbose:
483
+ print("PASSED - All relationship ID references are valid")
484
+ return True
485
+
486
+ def _get_expected_relationship_type(self, element_name):
487
+ """
488
+ Get the expected relationship type for an element.
489
+ First checks the explicit mapping, then tries pattern detection.
490
+ """
491
+ # Normalize element name to lowercase
492
+ elem_lower = element_name.lower()
493
+
494
+ # Check explicit mapping first
495
+ if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES:
496
+ return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower]
497
+
498
+ # Try pattern detection for common patterns
499
+ # Pattern 1: Elements ending in "Id" often expect a relationship of the prefix type
500
+ if elem_lower.endswith("id") and len(elem_lower) > 2:
501
+ # e.g., "sldId" -> "sld", "sldMasterId" -> "sldMaster"
502
+ prefix = elem_lower[:-2] # Remove "id"
503
+ # Check if this might be a compound like "sldMasterId"
504
+ if prefix.endswith("master"):
505
+ return prefix.lower()
506
+ elif prefix.endswith("layout"):
507
+ return prefix.lower()
508
+ else:
509
+ # Simple case like "sldId" -> "slide"
510
+ # Common transformations
511
+ if prefix == "sld":
512
+ return "slide"
513
+ return prefix.lower()
514
+
515
+ # Pattern 2: Elements ending in "Reference" expect a relationship of the prefix type
516
+ if elem_lower.endswith("reference") and len(elem_lower) > 9:
517
+ prefix = elem_lower[:-9] # Remove "reference"
518
+ return prefix.lower()
519
+
520
+ return None
521
+
522
+ def validate_content_types(self):
523
+ """Validate that all content files are properly declared in [Content_Types].xml."""
524
+ errors = []
525
+
526
+ # Find [Content_Types].xml file
527
+ content_types_file = self.unpacked_dir / "[Content_Types].xml"
528
+ if not content_types_file.exists():
529
+ print("FAILED - [Content_Types].xml file not found")
530
+ return False
531
+
532
+ try:
533
+ # Parse and get all declared parts and extensions
534
+ root = lxml.etree.parse(str(content_types_file)).getroot()
535
+ declared_parts = set()
536
+ declared_extensions = set()
537
+
538
+ # Get Override declarations (specific files)
539
+ for override in root.findall(
540
+ f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override"
541
+ ):
542
+ part_name = override.get("PartName")
543
+ if part_name is not None:
544
+ declared_parts.add(part_name.lstrip("/"))
545
+
546
+ # Get Default declarations (by extension)
547
+ for default in root.findall(
548
+ f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default"
549
+ ):
550
+ extension = default.get("Extension")
551
+ if extension is not None:
552
+ declared_extensions.add(extension.lower())
553
+
554
+ # Root elements that require content type declaration
555
+ declarable_roots = {
556
+ "sld",
557
+ "sldLayout",
558
+ "sldMaster",
559
+ "presentation", # PowerPoint
560
+ "document", # Word
561
+ "workbook",
562
+ "worksheet", # Excel
563
+ "theme", # Common
564
+ }
565
+
566
+ # Common media file extensions that should be declared
567
+ media_extensions = {
568
+ "png": "image/png",
569
+ "jpg": "image/jpeg",
570
+ "jpeg": "image/jpeg",
571
+ "gif": "image/gif",
572
+ "bmp": "image/bmp",
573
+ "tiff": "image/tiff",
574
+ "wmf": "image/x-wmf",
575
+ "emf": "image/x-emf",
576
+ }
577
+
578
+ # Get all files in the unpacked directory
579
+ all_files = list(self.unpacked_dir.rglob("*"))
580
+ all_files = [f for f in all_files if f.is_file()]
581
+
582
+ # Check all XML files for Override declarations
583
+ for xml_file in self.xml_files:
584
+ path_str = str(xml_file.relative_to(self.unpacked_dir)).replace(
585
+ "\\", "/"
586
+ )
587
+
588
+ # Skip non-content files
589
+ if any(
590
+ skip in path_str
591
+ for skip in [".rels", "[Content_Types]", "docProps/", "_rels/"]
592
+ ):
593
+ continue
594
+
595
+ try:
596
+ root_tag = lxml.etree.parse(str(xml_file)).getroot().tag
597
+ root_name = root_tag.split("}")[-1] if "}" in root_tag else root_tag
598
+
599
+ if root_name in declarable_roots and path_str not in declared_parts:
600
+ errors.append(
601
+ f" {path_str}: File with <{root_name}> root not declared in [Content_Types].xml"
602
+ )
603
+
604
+ except Exception:
605
+ continue # Skip unparseable files
606
+
607
+ # Check all non-XML files for Default extension declarations
608
+ for file_path in all_files:
609
+ # Skip XML files and metadata files (already checked above)
610
+ if file_path.suffix.lower() in {".xml", ".rels"}:
611
+ continue
612
+ if file_path.name == "[Content_Types].xml":
613
+ continue
614
+ if "_rels" in file_path.parts or "docProps" in file_path.parts:
615
+ continue
616
+
617
+ extension = file_path.suffix.lstrip(".").lower()
618
+ if extension and extension not in declared_extensions:
619
+ # Check if it's a known media extension that should be declared
620
+ if extension in media_extensions:
621
+ relative_path = file_path.relative_to(self.unpacked_dir)
622
+ errors.append(
623
+ f' {relative_path}: File with extension \'{extension}\' not declared in [Content_Types].xml - should add: <Default Extension="{extension}" ContentType="{media_extensions[extension]}"/>'
624
+ )
625
+
626
+ except Exception as e:
627
+ errors.append(f" Error parsing [Content_Types].xml: {e}")
628
+
629
+ if errors:
630
+ print(f"FAILED - Found {len(errors)} content type declaration errors:")
631
+ for error in errors:
632
+ print(error)
633
+ return False
634
+ else:
635
+ if self.verbose:
636
+ print(
637
+ "PASSED - All content files are properly declared in [Content_Types].xml"
638
+ )
639
+ return True
640
+
641
+ def validate_file_against_xsd(self, xml_file, verbose=False):
642
+ """Validate a single XML file against XSD schema, comparing with original.
643
+
644
+ Args:
645
+ xml_file: Path to XML file to validate
646
+ verbose: Enable verbose output
647
+
648
+ Returns:
649
+ tuple: (is_valid, new_errors_set) where is_valid is True/False/None (skipped)
650
+ """
651
+ # Resolve both paths to handle symlinks
652
+ xml_file = Path(xml_file).resolve()
653
+ unpacked_dir = self.unpacked_dir.resolve()
654
+
655
+ # Validate current file
656
+ is_valid, current_errors = self._validate_single_file_xsd(
657
+ xml_file, unpacked_dir
658
+ )
659
+
660
+ if is_valid is None:
661
+ return None, set() # Skipped
662
+ elif is_valid:
663
+ return True, set() # Valid, no errors
664
+
665
+ # Get errors from original file for this specific file
666
+ original_errors = self._get_original_file_errors(xml_file)
667
+
668
+ # Compare with original (both are guaranteed to be sets here)
669
+ assert current_errors is not None
670
+ new_errors = current_errors - original_errors
671
+
672
+ if new_errors:
673
+ if verbose:
674
+ relative_path = xml_file.relative_to(unpacked_dir)
675
+ print(f"FAILED - {relative_path}: {len(new_errors)} new error(s)")
676
+ for error in list(new_errors)[:3]:
677
+ truncated = error[:250] + "..." if len(error) > 250 else error
678
+ print(f" - {truncated}")
679
+ return False, new_errors
680
+ else:
681
+ # All errors existed in original
682
+ if verbose:
683
+ print(
684
+ f"PASSED - No new errors (original had {len(current_errors)} errors)"
685
+ )
686
+ return True, set()
687
+
688
+ def validate_against_xsd(self):
689
+ """Validate XML files against XSD schemas, showing only new errors compared to original."""
690
+ new_errors = []
691
+ original_error_count = 0
692
+ valid_count = 0
693
+ skipped_count = 0
694
+
695
+ for xml_file in self.xml_files:
696
+ relative_path = str(xml_file.relative_to(self.unpacked_dir))
697
+ is_valid, new_file_errors = self.validate_file_against_xsd(
698
+ xml_file, verbose=False
699
+ )
700
+
701
+ if is_valid is None:
702
+ skipped_count += 1
703
+ continue
704
+ elif is_valid and not new_file_errors:
705
+ valid_count += 1
706
+ continue
707
+ elif is_valid:
708
+ # Had errors but all existed in original
709
+ original_error_count += 1
710
+ valid_count += 1
711
+ continue
712
+
713
+ # Has new errors
714
+ new_errors.append(f" {relative_path}: {len(new_file_errors)} new error(s)")
715
+ for error in list(new_file_errors)[:3]: # Show first 3 errors
716
+ new_errors.append(
717
+ f" - {error[:250]}..." if len(error) > 250 else f" - {error}"
718
+ )
719
+
720
+ # Print summary
721
+ if self.verbose:
722
+ print(f"Validated {len(self.xml_files)} files:")
723
+ print(f" - Valid: {valid_count}")
724
+ print(f" - Skipped (no schema): {skipped_count}")
725
+ if original_error_count:
726
+ print(f" - With original errors (ignored): {original_error_count}")
727
+ print(
728
+ f" - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith(' ')]) or 0}"
729
+ )
730
+
731
+ if new_errors:
732
+ print("\nFAILED - Found NEW validation errors:")
733
+ for error in new_errors:
734
+ print(error)
735
+ return False
736
+ else:
737
+ if self.verbose:
738
+ print("\nPASSED - No new XSD validation errors introduced")
739
+ return True
740
+
741
+ def _get_schema_path(self, xml_file):
742
+ """Determine the appropriate schema path for an XML file."""
743
+ # Check exact filename match
744
+ if xml_file.name in self.SCHEMA_MAPPINGS:
745
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name]
746
+
747
+ # Check .rels files
748
+ if xml_file.suffix == ".rels":
749
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"]
750
+
751
+ # Check chart files
752
+ if "charts/" in str(xml_file) and xml_file.name.startswith("chart"):
753
+ return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"]
754
+
755
+ # Check theme files
756
+ if "theme/" in str(xml_file) and xml_file.name.startswith("theme"):
757
+ return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"]
758
+
759
+ # Check if file is in a main content folder and use appropriate schema
760
+ if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS:
761
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name]
762
+
763
+ return None
764
+
765
+ def _clean_ignorable_namespaces(self, xml_doc):
766
+ """Remove attributes and elements not in allowed namespaces."""
767
+ # Create a clean copy
768
+ xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
769
+ xml_copy = lxml.etree.fromstring(xml_string)
770
+
771
+ # Remove attributes not in allowed namespaces
772
+ for elem in xml_copy.iter():
773
+ attrs_to_remove = []
774
+
775
+ for attr in elem.attrib:
776
+ # Check if attribute is from a namespace other than allowed ones
777
+ if "{" in attr:
778
+ ns = attr.split("}")[0][1:]
779
+ if ns not in self.OOXML_NAMESPACES:
780
+ attrs_to_remove.append(attr)
781
+
782
+ # Remove collected attributes
783
+ for attr in attrs_to_remove:
784
+ del elem.attrib[attr]
785
+
786
+ # Remove elements not in allowed namespaces
787
+ self._remove_ignorable_elements(xml_copy)
788
+
789
+ return lxml.etree.ElementTree(xml_copy)
790
+
791
+ def _remove_ignorable_elements(self, root):
792
+ """Recursively remove all elements not in allowed namespaces."""
793
+ elements_to_remove = []
794
+
795
+ # Find elements to remove
796
+ for elem in list(root):
797
+ # Skip non-element nodes (comments, processing instructions, etc.)
798
+ if not hasattr(elem, "tag") or callable(elem.tag):
799
+ continue
800
+
801
+ tag_str = str(elem.tag)
802
+ if tag_str.startswith("{"):
803
+ ns = tag_str.split("}")[0][1:]
804
+ if ns not in self.OOXML_NAMESPACES:
805
+ elements_to_remove.append(elem)
806
+ continue
807
+
808
+ # Recursively clean child elements
809
+ self._remove_ignorable_elements(elem)
810
+
811
+ # Remove collected elements
812
+ for elem in elements_to_remove:
813
+ root.remove(elem)
814
+
815
+ def _preprocess_for_mc_ignorable(self, xml_doc):
816
+ """Preprocess XML to handle mc:Ignorable attribute properly."""
817
+ # Remove mc:Ignorable attributes before validation
818
+ root = xml_doc.getroot()
819
+
820
+ # Remove mc:Ignorable attribute from root
821
+ if f"{{{self.MC_NAMESPACE}}}Ignorable" in root.attrib:
822
+ del root.attrib[f"{{{self.MC_NAMESPACE}}}Ignorable"]
823
+
824
+ return xml_doc
825
+
826
+ def _validate_single_file_xsd(self, xml_file, base_path):
827
+ """Validate a single XML file against XSD schema. Returns (is_valid, errors_set)."""
828
+ schema_path = self._get_schema_path(xml_file)
829
+ if not schema_path:
830
+ return None, None # Skip file
831
+
832
+ try:
833
+ # Load schema
834
+ with open(schema_path, "rb") as xsd_file:
835
+ parser = lxml.etree.XMLParser()
836
+ xsd_doc = lxml.etree.parse(
837
+ xsd_file, parser=parser, base_url=str(schema_path)
838
+ )
839
+ schema = lxml.etree.XMLSchema(xsd_doc)
840
+
841
+ # Load and preprocess XML
842
+ with open(xml_file, "r") as f:
843
+ xml_doc = lxml.etree.parse(f)
844
+
845
+ xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc)
846
+ xml_doc = self._preprocess_for_mc_ignorable(xml_doc)
847
+
848
+ # Clean ignorable namespaces if needed
849
+ relative_path = xml_file.relative_to(base_path)
850
+ if (
851
+ relative_path.parts
852
+ and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS
853
+ ):
854
+ xml_doc = self._clean_ignorable_namespaces(xml_doc)
855
+
856
+ # Validate
857
+ if schema.validate(xml_doc):
858
+ return True, set()
859
+ else:
860
+ errors = set()
861
+ for error in schema.error_log:
862
+ # Store normalized error message (without line numbers for comparison)
863
+ errors.add(error.message)
864
+ return False, errors
865
+
866
+ except Exception as e:
867
+ return False, {str(e)}
868
+
869
+ def _get_original_file_errors(self, xml_file):
870
+ """Get XSD validation errors from a single file in the original document.
871
+
872
+ Args:
873
+ xml_file: Path to the XML file in unpacked_dir to check
874
+
875
+ Returns:
876
+ set: Set of error messages from the original file
877
+ """
878
+ import tempfile
879
+ import zipfile
880
+
881
+ # Resolve both paths to handle symlinks (e.g., /var vs /private/var on macOS)
882
+ xml_file = Path(xml_file).resolve()
883
+ unpacked_dir = self.unpacked_dir.resolve()
884
+ relative_path = xml_file.relative_to(unpacked_dir)
885
+
886
+ with tempfile.TemporaryDirectory() as temp_dir:
887
+ temp_path = Path(temp_dir)
888
+
889
+ # Extract original file
890
+ with zipfile.ZipFile(self.original_file, "r") as zip_ref:
891
+ zip_ref.extractall(temp_path)
892
+
893
+ # Find corresponding file in original
894
+ original_xml_file = temp_path / relative_path
895
+
896
+ if not original_xml_file.exists():
897
+ # File didn't exist in original, so no original errors
898
+ return set()
899
+
900
+ # Validate the specific file in original
901
+ is_valid, errors = self._validate_single_file_xsd(
902
+ original_xml_file, temp_path
903
+ )
904
+ return errors if errors else set()
905
+
906
+ def _remove_template_tags_from_text_nodes(self, xml_doc):
907
+ """Remove template tags from XML text nodes and collect warnings.
908
+
909
+ Template tags follow the pattern {{ ... }} and are used as placeholders
910
+ for content replacement. They should be removed from text content before
911
+ XSD validation while preserving XML structure.
912
+
913
+ Returns:
914
+ tuple: (cleaned_xml_doc, warnings_list)
915
+ """
916
+ warnings = []
917
+ template_pattern = re.compile(r"\{\{[^}]*\}\}")
918
+
919
+ # Create a copy of the document to avoid modifying the original
920
+ xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
921
+ xml_copy = lxml.etree.fromstring(xml_string)
922
+
923
+ def process_text_content(text, content_type):
924
+ if not text:
925
+ return text
926
+ matches = list(template_pattern.finditer(text))
927
+ if matches:
928
+ for match in matches:
929
+ warnings.append(
930
+ f"Found template tag in {content_type}: {match.group()}"
931
+ )
932
+ return template_pattern.sub("", text)
933
+ return text
934
+
935
+ # Process all text nodes in the document
936
+ for elem in xml_copy.iter():
937
+ # Skip processing if this is a w:t element
938
+ if not hasattr(elem, "tag") or callable(elem.tag):
939
+ continue
940
+ tag_str = str(elem.tag)
941
+ if tag_str.endswith("}t") or tag_str == "t":
942
+ continue
943
+
944
+ elem.text = process_text_content(elem.text, "text content")
945
+ elem.tail = process_text_content(elem.tail, "tail content")
946
+
947
+ return lxml.etree.ElementTree(xml_copy), warnings
948
+
949
+
950
+ if __name__ == "__main__":
951
+ raise RuntimeError("This module should not be run directly.")