@kortix/sandbox 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/config/customize.sh +143 -0
  2. package/config/kortix-env-setup.sh +25 -0
  3. package/kortix-master/package.json +22 -0
  4. package/kortix-master/src/config.ts +22 -0
  5. package/kortix-master/src/index.ts +44 -0
  6. package/kortix-master/src/routes/env.ts +65 -0
  7. package/kortix-master/src/routes/proxy.ts +108 -0
  8. package/kortix-master/src/routes/update.ts +185 -0
  9. package/kortix-master/src/services/proxy.ts +43 -0
  10. package/kortix-master/src/services/secret-store.ts +156 -0
  11. package/kortix-master/tsconfig.json +14 -0
  12. package/opencode/agents/kortix-browser.md +142 -0
  13. package/opencode/agents/kortix-build.md +62 -0
  14. package/opencode/agents/kortix-explore.md +66 -0
  15. package/opencode/agents/kortix-image-gen.md +33 -0
  16. package/opencode/agents/kortix-main.md +450 -0
  17. package/opencode/agents/kortix-plan.md +100 -0
  18. package/opencode/agents/kortix-research.md +84 -0
  19. package/opencode/agents/kortix-sheets.md +61 -0
  20. package/opencode/agents/kortix-slides.md +64 -0
  21. package/opencode/agents/kortix-web-dev.md +572 -0
  22. package/opencode/commands/email.md +36 -0
  23. package/opencode/commands/init.md +43 -0
  24. package/opencode/commands/journal.md +44 -0
  25. package/opencode/commands/memory-init.md +81 -0
  26. package/opencode/commands/memory-search.md +50 -0
  27. package/opencode/commands/memory-status.md +56 -0
  28. package/opencode/commands/research.md +36 -0
  29. package/opencode/commands/search.md +38 -0
  30. package/opencode/commands/slides.md +32 -0
  31. package/opencode/commands/spreadsheet.md +30 -0
  32. package/opencode/memory.json +37 -0
  33. package/opencode/ocx.jsonc +10 -0
  34. package/opencode/opencode.jsonc +103 -0
  35. package/opencode/package.json +25 -0
  36. package/opencode/patches/apply.sh +19 -0
  37. package/opencode/patches/opencode-pty-spawn.txt +49 -0
  38. package/opencode/plugin/background-agents.ts.disabled +483 -0
  39. package/opencode/plugin/kdco-primitives/get-project-id.ts +172 -0
  40. package/opencode/plugin/kdco-primitives/index.ts +26 -0
  41. package/opencode/plugin/kdco-primitives/log-warn.ts +51 -0
  42. package/opencode/plugin/kdco-primitives/mutex.ts +122 -0
  43. package/opencode/plugin/kdco-primitives/shell.ts +138 -0
  44. package/opencode/plugin/kdco-primitives/temp.ts +36 -0
  45. package/opencode/plugin/kdco-primitives/terminal-detect.ts +34 -0
  46. package/opencode/plugin/kdco-primitives/types.ts +13 -0
  47. package/opencode/plugin/kdco-primitives/with-timeout.ts +84 -0
  48. package/opencode/plugin/memory.ts +306 -0
  49. package/opencode/plugin/worktree/state.ts +412 -0
  50. package/opencode/plugin/worktree/terminal.ts +1002 -0
  51. package/opencode/plugin/worktree.ts +861 -0
  52. package/opencode/skills/KORTIX-browser/SKILL.md +478 -0
  53. package/opencode/skills/KORTIX-cron-triggers/SKILL.md +173 -0
  54. package/opencode/skills/KORTIX-deep-research/SKILL.md +278 -0
  55. package/opencode/skills/KORTIX-docx/SKILL.md +398 -0
  56. package/opencode/skills/KORTIX-docx/scripts/__init__.py +1 -0
  57. package/opencode/skills/KORTIX-docx/scripts/accept_changes.py +104 -0
  58. package/opencode/skills/KORTIX-docx/scripts/comment.py +244 -0
  59. package/opencode/skills/KORTIX-docx/scripts/office/helpers/__init__.py +0 -0
  60. package/opencode/skills/KORTIX-docx/scripts/office/helpers/merge_runs.py +199 -0
  61. package/opencode/skills/KORTIX-docx/scripts/office/helpers/simplify_redlines.py +197 -0
  62. package/opencode/skills/KORTIX-docx/scripts/office/pack.py +159 -0
  63. package/opencode/skills/KORTIX-docx/scripts/office/soffice.py +183 -0
  64. package/opencode/skills/KORTIX-docx/scripts/office/unpack.py +132 -0
  65. package/opencode/skills/KORTIX-docx/scripts/office/validate.py +111 -0
  66. package/opencode/skills/KORTIX-docx/scripts/office/validators/__init__.py +15 -0
  67. package/opencode/skills/KORTIX-docx/scripts/office/validators/base.py +847 -0
  68. package/opencode/skills/KORTIX-docx/scripts/office/validators/docx.py +446 -0
  69. package/opencode/skills/KORTIX-docx/scripts/office/validators/pptx.py +275 -0
  70. package/opencode/skills/KORTIX-docx/scripts/office/validators/redlining.py +247 -0
  71. package/opencode/skills/KORTIX-docx/scripts/render_docx.py +179 -0
  72. package/opencode/skills/KORTIX-docx/scripts/templates/comments.xml +3 -0
  73. package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtended.xml +3 -0
  74. package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtensible.xml +3 -0
  75. package/opencode/skills/KORTIX-docx/scripts/templates/commentsIds.xml +3 -0
  76. package/opencode/skills/KORTIX-docx/scripts/templates/people.xml +3 -0
  77. package/opencode/skills/KORTIX-domain-research/SKILL.md +96 -0
  78. package/opencode/skills/KORTIX-domain-research/scripts/domain-lookup.py +810 -0
  79. package/opencode/skills/KORTIX-elevenlabs/SKILL.md +230 -0
  80. package/opencode/skills/KORTIX-elevenlabs/scripts/tts.py +389 -0
  81. package/opencode/skills/KORTIX-email/SKILL.md +145 -0
  82. package/opencode/skills/KORTIX-legal-writer/SKILL.md +409 -0
  83. package/opencode/skills/KORTIX-legal-writer/references/bluebook.md +152 -0
  84. package/opencode/skills/KORTIX-legal-writer/references/document-types.md +416 -0
  85. package/opencode/skills/KORTIX-legal-writer/scripts/courtlistener.py +291 -0
  86. package/opencode/skills/KORTIX-legal-writer/scripts/ecfr_lookup.py +299 -0
  87. package/opencode/skills/KORTIX-legal-writer/scripts/verify-legal.py +507 -0
  88. package/opencode/skills/KORTIX-logo-creator/SKILL.md +293 -0
  89. package/opencode/skills/KORTIX-logo-creator/references/prompt-patterns.md +134 -0
  90. package/opencode/skills/KORTIX-logo-creator/scripts/compose_logo.py +406 -0
  91. package/opencode/skills/KORTIX-logo-creator/scripts/create_logo_sheet.py +258 -0
  92. package/opencode/skills/KORTIX-logo-creator/scripts/remove_bg.py +96 -0
  93. package/opencode/skills/KORTIX-memory/SKILL.md +261 -0
  94. package/opencode/skills/KORTIX-memory/scripts/export-sessions.py +409 -0
  95. package/opencode/skills/KORTIX-paper-creator/SKILL.md +549 -0
  96. package/opencode/skills/KORTIX-paper-creator/assets/template.tex +101 -0
  97. package/opencode/skills/KORTIX-paper-creator/scripts/compile.sh +177 -0
  98. package/opencode/skills/KORTIX-paper-creator/scripts/openalex_to_bibtex.py +220 -0
  99. package/opencode/skills/KORTIX-paper-creator/scripts/verify.sh +354 -0
  100. package/opencode/skills/KORTIX-paper-search/SKILL.md +418 -0
  101. package/opencode/skills/KORTIX-pdf/SKILL.md +232 -0
  102. package/opencode/skills/KORTIX-pdf/forms.md +36 -0
  103. package/opencode/skills/KORTIX-pdf/reference.md +105 -0
  104. package/opencode/skills/KORTIX-pdf/scripts/check_bounding_boxes.py +65 -0
  105. package/opencode/skills/KORTIX-pdf/scripts/check_fillable_fields.py +11 -0
  106. package/opencode/skills/KORTIX-pdf/scripts/convert_pdf_to_images.py +33 -0
  107. package/opencode/skills/KORTIX-pdf/scripts/create_validation_image.py +37 -0
  108. package/opencode/skills/KORTIX-pdf/scripts/extract_form_field_info.py +122 -0
  109. package/opencode/skills/KORTIX-pdf/scripts/extract_form_structure.py +115 -0
  110. package/opencode/skills/KORTIX-pdf/scripts/fill_fillable_fields.py +98 -0
  111. package/opencode/skills/KORTIX-pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
  112. package/opencode/skills/KORTIX-plan/SKILL.md +228 -0
  113. package/opencode/skills/KORTIX-presentation-viewer/SKILL.md +87 -0
  114. package/opencode/skills/KORTIX-presentation-viewer/serve.ts +136 -0
  115. package/opencode/skills/KORTIX-presentation-viewer/viewer.html +559 -0
  116. package/opencode/skills/KORTIX-presentations/SKILL.md +344 -0
  117. package/opencode/skills/KORTIX-remotion/SKILL.md +56 -0
  118. package/opencode/skills/KORTIX-remotion/rules/3d.md +86 -0
  119. package/opencode/skills/KORTIX-remotion/rules/animations.md +29 -0
  120. package/opencode/skills/KORTIX-remotion/rules/assets.md +78 -0
  121. package/opencode/skills/KORTIX-remotion/rules/audio-visualization.md +198 -0
  122. package/opencode/skills/KORTIX-remotion/rules/audio.md +169 -0
  123. package/opencode/skills/KORTIX-remotion/rules/calculate-metadata.md +104 -0
  124. package/opencode/skills/KORTIX-remotion/rules/can-decode.md +75 -0
  125. package/opencode/skills/KORTIX-remotion/rules/charts.md +120 -0
  126. package/opencode/skills/KORTIX-remotion/rules/compositions.md +141 -0
  127. package/opencode/skills/KORTIX-remotion/rules/display-captions.md +184 -0
  128. package/opencode/skills/KORTIX-remotion/rules/extract-frames.md +229 -0
  129. package/opencode/skills/KORTIX-remotion/rules/ffmpeg.md +38 -0
  130. package/opencode/skills/KORTIX-remotion/rules/fonts.md +152 -0
  131. package/opencode/skills/KORTIX-remotion/rules/get-audio-duration.md +58 -0
  132. package/opencode/skills/KORTIX-remotion/rules/get-video-dimensions.md +68 -0
  133. package/opencode/skills/KORTIX-remotion/rules/get-video-duration.md +58 -0
  134. package/opencode/skills/KORTIX-remotion/rules/gifs.md +141 -0
  135. package/opencode/skills/KORTIX-remotion/rules/images.md +130 -0
  136. package/opencode/skills/KORTIX-remotion/rules/import-srt-captions.md +69 -0
  137. package/opencode/skills/KORTIX-remotion/rules/light-leaks.md +73 -0
  138. package/opencode/skills/KORTIX-remotion/rules/lottie.md +68 -0
  139. package/opencode/skills/KORTIX-remotion/rules/maps.md +401 -0
  140. package/opencode/skills/KORTIX-remotion/rules/measuring-dom-nodes.md +35 -0
  141. package/opencode/skills/KORTIX-remotion/rules/measuring-text.md +143 -0
  142. package/opencode/skills/KORTIX-remotion/rules/parameters.md +98 -0
  143. package/opencode/skills/KORTIX-remotion/rules/sequencing.md +118 -0
  144. package/opencode/skills/KORTIX-remotion/rules/subtitles.md +36 -0
  145. package/opencode/skills/KORTIX-remotion/rules/tailwind.md +11 -0
  146. package/opencode/skills/KORTIX-remotion/rules/text-animations.md +20 -0
  147. package/opencode/skills/KORTIX-remotion/rules/timing.md +179 -0
  148. package/opencode/skills/KORTIX-remotion/rules/transcribe-captions.md +70 -0
  149. package/opencode/skills/KORTIX-remotion/rules/transitions.md +197 -0
  150. package/opencode/skills/KORTIX-remotion/rules/transparent-videos.md +106 -0
  151. package/opencode/skills/KORTIX-remotion/rules/trimming.md +53 -0
  152. package/opencode/skills/KORTIX-remotion/rules/videos.md +171 -0
  153. package/opencode/skills/KORTIX-secrets/SKILL.md +280 -0
  154. package/opencode/skills/KORTIX-semantic-search/SKILL.md +213 -0
  155. package/opencode/skills/KORTIX-session-search/SKILL.md +807 -0
  156. package/opencode/skills/KORTIX-session-search/Untitled +1 -0
  157. package/opencode/skills/KORTIX-skill-creator/SKILL.md +163 -0
  158. package/opencode/skills/KORTIX-web-research/SKILL.md +69 -0
  159. package/opencode/skills/KORTIX-xlsx/LICENSE.txt +30 -0
  160. package/opencode/skills/KORTIX-xlsx/SKILL.md +549 -0
  161. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/__init__.py +0 -0
  162. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/merge_runs.py +199 -0
  163. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
  164. package/opencode/skills/KORTIX-xlsx/scripts/office/pack.py +159 -0
  165. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  166. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  167. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  168. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  169. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  170. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  171. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  172. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  173. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  174. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  175. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  176. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  177. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  178. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  179. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  180. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  181. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  182. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  183. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  184. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  185. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  186. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  187. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  188. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  189. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  190. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  191. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  192. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  193. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  194. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  195. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  196. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  197. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  198. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  199. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  200. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  201. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  202. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  203. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  204. package/opencode/skills/KORTIX-xlsx/scripts/office/soffice.py +183 -0
  205. package/opencode/skills/KORTIX-xlsx/scripts/office/unpack.py +132 -0
  206. package/opencode/skills/KORTIX-xlsx/scripts/office/validate.py +111 -0
  207. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/__init__.py +15 -0
  208. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/base.py +847 -0
  209. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/docx.py +446 -0
  210. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/pptx.py +275 -0
  211. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/redlining.py +247 -0
  212. package/opencode/skills/KORTIX-xlsx/scripts/recalc.py +184 -0
  213. package/opencode/tools/image-gen.ts +342 -0
  214. package/opencode/tools/image-search.ts +190 -0
  215. package/opencode/tools/memory-get.ts +168 -0
  216. package/opencode/tools/memory-search.ts +247 -0
  217. package/opencode/tools/presentation-gen.ts +723 -0
  218. package/opencode/tools/scrape-webpage.ts +115 -0
  219. package/opencode/tools/scripts/.python-version +1 -0
  220. package/opencode/tools/scripts/convert_pdf.py +184 -0
  221. package/opencode/tools/scripts/convert_pptx.py +562 -0
  222. package/opencode/tools/scripts/pyproject.toml +11 -0
  223. package/opencode/tools/scripts/uv.lock +287 -0
  224. package/opencode/tools/scripts/validate_slide.py +74 -0
  225. package/opencode/tools/show-user.ts +217 -0
  226. package/opencode/tools/tests/e2e-presentation-fix.ts +277 -0
  227. package/opencode/tools/tests/image-gen.test.ts +215 -0
  228. package/opencode/tools/tests/image-search.test.ts +125 -0
  229. package/opencode/tools/tests/memory-system-benchmark.ts +1076 -0
  230. package/opencode/tools/tests/presentation-gen.test.ts +389 -0
  231. package/opencode/tools/tests/scrape-webpage.test.ts +74 -0
  232. package/opencode/tools/tests/show-user.test.ts +241 -0
  233. package/opencode/tools/tests/video-gen.test.ts +110 -0
  234. package/opencode/tools/tests/web-search.test.ts +106 -0
  235. package/opencode/tools/video-gen.ts +200 -0
  236. package/opencode/tools/web-search.ts +153 -0
  237. package/opencode/tsconfig.json +29 -0
  238. package/package.json +36 -0
  239. package/patch-agent-browser.js +100 -0
  240. package/postinstall.sh +88 -0
  241. package/services/KORTIX-presentation-viewer/run +37 -0
  242. package/services/agent-browser-viewer/run +48 -0
  243. package/services/kortix-master/run +16 -0
  244. package/services/lss-sync/run +22 -0
  245. package/services/opencode-serve/run +25 -0
  246. package/services/opencode-web/run +21 -0
@@ -0,0 +1,847 @@
1
+ """
2
+ Base validator with common validation logic for document files.
3
+ """
4
+
5
+ import re
6
+ from pathlib import Path
7
+
8
+ import defusedxml.minidom
9
+ import lxml.etree
10
+
11
+
12
+ class BaseSchemaValidator:
13
+
14
+ IGNORED_VALIDATION_ERRORS = [
15
+ "hyphenationZone",
16
+ "purl.org/dc/terms",
17
+ ]
18
+
19
+ UNIQUE_ID_REQUIREMENTS = {
20
+ "comment": ("id", "file"),
21
+ "commentrangestart": ("id", "file"),
22
+ "commentrangeend": ("id", "file"),
23
+ "bookmarkstart": ("id", "file"),
24
+ "bookmarkend": ("id", "file"),
25
+ "sldid": ("id", "file"),
26
+ "sldmasterid": ("id", "global"),
27
+ "sldlayoutid": ("id", "global"),
28
+ "cm": ("authorid", "file"),
29
+ "sheet": ("sheetid", "file"),
30
+ "definedname": ("id", "file"),
31
+ "cxnsp": ("id", "file"),
32
+ "sp": ("id", "file"),
33
+ "pic": ("id", "file"),
34
+ "grpsp": ("id", "file"),
35
+ }
36
+
37
+ EXCLUDED_ID_CONTAINERS = {
38
+ "sectionlst",
39
+ }
40
+
41
+ ELEMENT_RELATIONSHIP_TYPES = {}
42
+
43
+ SCHEMA_MAPPINGS = {
44
+ "word": "ISO-IEC29500-4_2016/wml.xsd",
45
+ "ppt": "ISO-IEC29500-4_2016/pml.xsd",
46
+ "xl": "ISO-IEC29500-4_2016/sml.xsd",
47
+ "[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd",
48
+ "app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd",
49
+ "core.xml": "ecma/fouth-edition/opc-coreProperties.xsd",
50
+ "custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd",
51
+ ".rels": "ecma/fouth-edition/opc-relationships.xsd",
52
+ "people.xml": "microsoft/wml-2012.xsd",
53
+ "commentsIds.xml": "microsoft/wml-cid-2016.xsd",
54
+ "commentsExtensible.xml": "microsoft/wml-cex-2018.xsd",
55
+ "commentsExtended.xml": "microsoft/wml-2012.xsd",
56
+ "chart": "ISO-IEC29500-4_2016/dml-chart.xsd",
57
+ "theme": "ISO-IEC29500-4_2016/dml-main.xsd",
58
+ "drawing": "ISO-IEC29500-4_2016/dml-main.xsd",
59
+ }
60
+
61
+ MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006"
62
+ XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
63
+
64
+ PACKAGE_RELATIONSHIPS_NAMESPACE = (
65
+ "http://schemas.openxmlformats.org/package/2006/relationships"
66
+ )
67
+ OFFICE_RELATIONSHIPS_NAMESPACE = (
68
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
69
+ )
70
+ CONTENT_TYPES_NAMESPACE = (
71
+ "http://schemas.openxmlformats.org/package/2006/content-types"
72
+ )
73
+
74
+ MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"}
75
+
76
+ OOXML_NAMESPACES = {
77
+ "http://schemas.openxmlformats.org/officeDocument/2006/math",
78
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
79
+ "http://schemas.openxmlformats.org/schemaLibrary/2006/main",
80
+ "http://schemas.openxmlformats.org/drawingml/2006/main",
81
+ "http://schemas.openxmlformats.org/drawingml/2006/chart",
82
+ "http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
83
+ "http://schemas.openxmlformats.org/drawingml/2006/diagram",
84
+ "http://schemas.openxmlformats.org/drawingml/2006/picture",
85
+ "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
86
+ "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
87
+ "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
88
+ "http://schemas.openxmlformats.org/presentationml/2006/main",
89
+ "http://schemas.openxmlformats.org/spreadsheetml/2006/main",
90
+ "http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes",
91
+ "http://www.w3.org/XML/1998/namespace",
92
+ }
93
+
94
+ def __init__(self, unpacked_dir, original_file=None, verbose=False):
95
+ self.unpacked_dir = Path(unpacked_dir).resolve()
96
+ self.original_file = Path(original_file) if original_file else None
97
+ self.verbose = verbose
98
+
99
+ self.schemas_dir = Path(__file__).parent.parent / "schemas"
100
+
101
+ patterns = ["*.xml", "*.rels"]
102
+ self.xml_files = [
103
+ f for pattern in patterns for f in self.unpacked_dir.rglob(pattern)
104
+ ]
105
+
106
+ if not self.xml_files:
107
+ print(f"Warning: No XML files found in {self.unpacked_dir}")
108
+
109
+ def validate(self):
110
+ raise NotImplementedError("Subclasses must implement the validate method")
111
+
112
+ def repair(self) -> int:
113
+ return self.repair_whitespace_preservation()
114
+
115
+ def repair_whitespace_preservation(self) -> int:
116
+ repairs = 0
117
+
118
+ for xml_file in self.xml_files:
119
+ try:
120
+ content = xml_file.read_text(encoding="utf-8")
121
+ dom = defusedxml.minidom.parseString(content)
122
+ modified = False
123
+
124
+ for elem in dom.getElementsByTagName("*"):
125
+ if elem.tagName.endswith(":t") and elem.firstChild:
126
+ text = elem.firstChild.nodeValue
127
+ if text and (text.startswith((' ', '\t')) or text.endswith((' ', '\t'))):
128
+ if elem.getAttribute("xml:space") != "preserve":
129
+ elem.setAttribute("xml:space", "preserve")
130
+ text_preview = repr(text[:30]) + "..." if len(text) > 30 else repr(text)
131
+ print(f" Repaired: {xml_file.name}: Added xml:space='preserve' to {elem.tagName}: {text_preview}")
132
+ repairs += 1
133
+ modified = True
134
+
135
+ if modified:
136
+ xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
137
+
138
+ except Exception:
139
+ pass
140
+
141
+ return repairs
142
+
143
+ def validate_xml(self):
144
+ errors = []
145
+
146
+ for xml_file in self.xml_files:
147
+ try:
148
+ lxml.etree.parse(str(xml_file))
149
+ except lxml.etree.XMLSyntaxError as e:
150
+ errors.append(
151
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
152
+ f"Line {e.lineno}: {e.msg}"
153
+ )
154
+ except Exception as e:
155
+ errors.append(
156
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
157
+ f"Unexpected error: {str(e)}"
158
+ )
159
+
160
+ if errors:
161
+ print(f"FAILED - Found {len(errors)} XML violations:")
162
+ for error in errors:
163
+ print(error)
164
+ return False
165
+ else:
166
+ if self.verbose:
167
+ print("PASSED - All XML files are well-formed")
168
+ return True
169
+
170
+ def validate_namespaces(self):
171
+ errors = []
172
+
173
+ for xml_file in self.xml_files:
174
+ try:
175
+ root = lxml.etree.parse(str(xml_file)).getroot()
176
+ declared = set(root.nsmap.keys()) - {None}
177
+
178
+ for attr_val in [
179
+ v for k, v in root.attrib.items() if k.endswith("Ignorable")
180
+ ]:
181
+ undeclared = set(attr_val.split()) - declared
182
+ errors.extend(
183
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
184
+ f"Namespace '{ns}' in Ignorable but not declared"
185
+ for ns in undeclared
186
+ )
187
+ except lxml.etree.XMLSyntaxError:
188
+ continue
189
+
190
+ if errors:
191
+ print(f"FAILED - {len(errors)} namespace issues:")
192
+ for error in errors:
193
+ print(error)
194
+ return False
195
+ if self.verbose:
196
+ print("PASSED - All namespace prefixes properly declared")
197
+ return True
198
+
199
+ def validate_unique_ids(self):
200
+ errors = []
201
+ global_ids = {}
202
+
203
+ for xml_file in self.xml_files:
204
+ try:
205
+ root = lxml.etree.parse(str(xml_file)).getroot()
206
+ file_ids = {}
207
+
208
+ mc_elements = root.xpath(
209
+ ".//mc:AlternateContent", namespaces={"mc": self.MC_NAMESPACE}
210
+ )
211
+ for elem in mc_elements:
212
+ elem.getparent().remove(elem)
213
+
214
+ for elem in root.iter():
215
+ tag = (
216
+ elem.tag.split("}")[-1].lower()
217
+ if "}" in elem.tag
218
+ else elem.tag.lower()
219
+ )
220
+
221
+ if tag in self.UNIQUE_ID_REQUIREMENTS:
222
+ in_excluded_container = any(
223
+ ancestor.tag.split("}")[-1].lower() in self.EXCLUDED_ID_CONTAINERS
224
+ for ancestor in elem.iterancestors()
225
+ )
226
+ if in_excluded_container:
227
+ continue
228
+
229
+ attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag]
230
+
231
+ id_value = None
232
+ for attr, value in elem.attrib.items():
233
+ attr_local = (
234
+ attr.split("}")[-1].lower()
235
+ if "}" in attr
236
+ else attr.lower()
237
+ )
238
+ if attr_local == attr_name:
239
+ id_value = value
240
+ break
241
+
242
+ if id_value is not None:
243
+ if scope == "global":
244
+ if id_value in global_ids:
245
+ prev_file, prev_line, prev_tag = global_ids[
246
+ id_value
247
+ ]
248
+ errors.append(
249
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
250
+ f"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> "
251
+ f"already used in {prev_file} at line {prev_line} in <{prev_tag}>"
252
+ )
253
+ else:
254
+ global_ids[id_value] = (
255
+ xml_file.relative_to(self.unpacked_dir),
256
+ elem.sourceline,
257
+ tag,
258
+ )
259
+ elif scope == "file":
260
+ key = (tag, attr_name)
261
+ if key not in file_ids:
262
+ file_ids[key] = {}
263
+
264
+ if id_value in file_ids[key]:
265
+ prev_line = file_ids[key][id_value]
266
+ errors.append(
267
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
268
+ f"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> "
269
+ f"(first occurrence at line {prev_line})"
270
+ )
271
+ else:
272
+ file_ids[key][id_value] = elem.sourceline
273
+
274
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
275
+ errors.append(
276
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
277
+ )
278
+
279
+ if errors:
280
+ print(f"FAILED - Found {len(errors)} ID uniqueness violations:")
281
+ for error in errors:
282
+ print(error)
283
+ return False
284
+ else:
285
+ if self.verbose:
286
+ print("PASSED - All required IDs are unique")
287
+ return True
288
+
289
+ def validate_file_references(self):
290
+ errors = []
291
+
292
+ rels_files = list(self.unpacked_dir.rglob("*.rels"))
293
+
294
+ if not rels_files:
295
+ if self.verbose:
296
+ print("PASSED - No .rels files found")
297
+ return True
298
+
299
+ all_files = []
300
+ for file_path in self.unpacked_dir.rglob("*"):
301
+ if (
302
+ file_path.is_file()
303
+ and file_path.name != "[Content_Types].xml"
304
+ and not file_path.name.endswith(".rels")
305
+ ):
306
+ all_files.append(file_path.resolve())
307
+
308
+ all_referenced_files = set()
309
+
310
+ if self.verbose:
311
+ print(
312
+ f"Found {len(rels_files)} .rels files and {len(all_files)} target files"
313
+ )
314
+
315
+ for rels_file in rels_files:
316
+ try:
317
+ rels_root = lxml.etree.parse(str(rels_file)).getroot()
318
+
319
+ rels_dir = rels_file.parent
320
+
321
+ referenced_files = set()
322
+ broken_refs = []
323
+
324
+ for rel in rels_root.findall(
325
+ ".//ns:Relationship",
326
+ namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE},
327
+ ):
328
+ target = rel.get("Target")
329
+ if target and not target.startswith(
330
+ ("http", "mailto:")
331
+ ):
332
+ if target.startswith("/"):
333
+ target_path = self.unpacked_dir / target.lstrip("/")
334
+ elif rels_file.name == ".rels":
335
+ target_path = self.unpacked_dir / target
336
+ else:
337
+ base_dir = rels_dir.parent
338
+ target_path = base_dir / target
339
+
340
+ try:
341
+ target_path = target_path.resolve()
342
+ if target_path.exists() and target_path.is_file():
343
+ referenced_files.add(target_path)
344
+ all_referenced_files.add(target_path)
345
+ else:
346
+ broken_refs.append((target, rel.sourceline))
347
+ except (OSError, ValueError):
348
+ broken_refs.append((target, rel.sourceline))
349
+
350
+ if broken_refs:
351
+ rel_path = rels_file.relative_to(self.unpacked_dir)
352
+ for broken_ref, line_num in broken_refs:
353
+ errors.append(
354
+ f" {rel_path}: Line {line_num}: Broken reference to {broken_ref}"
355
+ )
356
+
357
+ except Exception as e:
358
+ rel_path = rels_file.relative_to(self.unpacked_dir)
359
+ errors.append(f" Error parsing {rel_path}: {e}")
360
+
361
+ unreferenced_files = set(all_files) - all_referenced_files
362
+
363
+ if unreferenced_files:
364
+ for unref_file in sorted(unreferenced_files):
365
+ unref_rel_path = unref_file.relative_to(self.unpacked_dir)
366
+ errors.append(f" Unreferenced file: {unref_rel_path}")
367
+
368
+ if errors:
369
+ print(f"FAILED - Found {len(errors)} relationship validation errors:")
370
+ for error in errors:
371
+ print(error)
372
+ print(
373
+ "CRITICAL: These errors will cause the document to appear corrupt. "
374
+ + "Broken references MUST be fixed, "
375
+ + "and unreferenced files MUST be referenced or removed."
376
+ )
377
+ return False
378
+ else:
379
+ if self.verbose:
380
+ print(
381
+ "PASSED - All references are valid and all files are properly referenced"
382
+ )
383
+ return True
384
+
385
+ def validate_all_relationship_ids(self):
386
+ import lxml.etree
387
+
388
+ errors = []
389
+
390
+ for xml_file in self.xml_files:
391
+ if xml_file.suffix == ".rels":
392
+ continue
393
+
394
+ rels_dir = xml_file.parent / "_rels"
395
+ rels_file = rels_dir / f"{xml_file.name}.rels"
396
+
397
+ if not rels_file.exists():
398
+ continue
399
+
400
+ try:
401
+ rels_root = lxml.etree.parse(str(rels_file)).getroot()
402
+ rid_to_type = {}
403
+
404
+ for rel in rels_root.findall(
405
+ f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
406
+ ):
407
+ rid = rel.get("Id")
408
+ rel_type = rel.get("Type", "")
409
+ if rid:
410
+ if rid in rid_to_type:
411
+ rels_rel_path = rels_file.relative_to(self.unpacked_dir)
412
+ errors.append(
413
+ f" {rels_rel_path}: Line {rel.sourceline}: "
414
+ f"Duplicate relationship ID '{rid}' (IDs must be unique)"
415
+ )
416
+ type_name = (
417
+ rel_type.split("/")[-1] if "/" in rel_type else rel_type
418
+ )
419
+ rid_to_type[rid] = type_name
420
+
421
+ xml_root = lxml.etree.parse(str(xml_file)).getroot()
422
+
423
+ r_ns = self.OFFICE_RELATIONSHIPS_NAMESPACE
424
+ rid_attrs_to_check = ["id", "embed", "link"]
425
+ for elem in xml_root.iter():
426
+ for attr_name in rid_attrs_to_check:
427
+ rid_attr = elem.get(f"{{{r_ns}}}{attr_name}")
428
+ if not rid_attr:
429
+ continue
430
+ xml_rel_path = xml_file.relative_to(self.unpacked_dir)
431
+ elem_name = (
432
+ elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
433
+ )
434
+
435
+ if rid_attr not in rid_to_type:
436
+ errors.append(
437
+ f" {xml_rel_path}: Line {elem.sourceline}: "
438
+ f"<{elem_name}> r:{attr_name} references non-existent relationship '{rid_attr}' "
439
+ f"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})"
440
+ )
441
+ elif attr_name == "id" and self.ELEMENT_RELATIONSHIP_TYPES:
442
+ expected_type = self._get_expected_relationship_type(
443
+ elem_name
444
+ )
445
+ if expected_type:
446
+ actual_type = rid_to_type[rid_attr]
447
+ if expected_type not in actual_type.lower():
448
+ errors.append(
449
+ f" {xml_rel_path}: Line {elem.sourceline}: "
450
+ f"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' "
451
+ f"but should point to a '{expected_type}' relationship"
452
+ )
453
+
454
+ except Exception as e:
455
+ xml_rel_path = xml_file.relative_to(self.unpacked_dir)
456
+ errors.append(f" Error processing {xml_rel_path}: {e}")
457
+
458
+ if errors:
459
+ print(f"FAILED - Found {len(errors)} relationship ID reference errors:")
460
+ for error in errors:
461
+ print(error)
462
+ print("\nThese ID mismatches will cause the document to appear corrupt!")
463
+ return False
464
+ else:
465
+ if self.verbose:
466
+ print("PASSED - All relationship ID references are valid")
467
+ return True
468
+
469
+ def _get_expected_relationship_type(self, element_name):
470
+ elem_lower = element_name.lower()
471
+
472
+ if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES:
473
+ return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower]
474
+
475
+ if elem_lower.endswith("id") and len(elem_lower) > 2:
476
+ prefix = elem_lower[:-2]
477
+ if prefix.endswith("master"):
478
+ return prefix.lower()
479
+ elif prefix.endswith("layout"):
480
+ return prefix.lower()
481
+ else:
482
+ if prefix == "sld":
483
+ return "slide"
484
+ return prefix.lower()
485
+
486
+ if elem_lower.endswith("reference") and len(elem_lower) > 9:
487
+ prefix = elem_lower[:-9]
488
+ return prefix.lower()
489
+
490
+ return None
491
+
492
+ def validate_content_types(self):
493
+ errors = []
494
+
495
+ content_types_file = self.unpacked_dir / "[Content_Types].xml"
496
+ if not content_types_file.exists():
497
+ print("FAILED - [Content_Types].xml file not found")
498
+ return False
499
+
500
+ try:
501
+ root = lxml.etree.parse(str(content_types_file)).getroot()
502
+ declared_parts = set()
503
+ declared_extensions = set()
504
+
505
+ for override in root.findall(
506
+ f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override"
507
+ ):
508
+ part_name = override.get("PartName")
509
+ if part_name is not None:
510
+ declared_parts.add(part_name.lstrip("/"))
511
+
512
+ for default in root.findall(
513
+ f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default"
514
+ ):
515
+ extension = default.get("Extension")
516
+ if extension is not None:
517
+ declared_extensions.add(extension.lower())
518
+
519
+ declarable_roots = {
520
+ "sld",
521
+ "sldLayout",
522
+ "sldMaster",
523
+ "presentation",
524
+ "document",
525
+ "workbook",
526
+ "worksheet",
527
+ "theme",
528
+ }
529
+
530
+ media_extensions = {
531
+ "png": "image/png",
532
+ "jpg": "image/jpeg",
533
+ "jpeg": "image/jpeg",
534
+ "gif": "image/gif",
535
+ "bmp": "image/bmp",
536
+ "tiff": "image/tiff",
537
+ "wmf": "image/x-wmf",
538
+ "emf": "image/x-emf",
539
+ }
540
+
541
+ all_files = list(self.unpacked_dir.rglob("*"))
542
+ all_files = [f for f in all_files if f.is_file()]
543
+
544
+ for xml_file in self.xml_files:
545
+ path_str = str(xml_file.relative_to(self.unpacked_dir)).replace(
546
+ "\\", "/"
547
+ )
548
+
549
+ if any(
550
+ skip in path_str
551
+ for skip in [".rels", "[Content_Types]", "docProps/", "_rels/"]
552
+ ):
553
+ continue
554
+
555
+ try:
556
+ root_tag = lxml.etree.parse(str(xml_file)).getroot().tag
557
+ root_name = root_tag.split("}")[-1] if "}" in root_tag else root_tag
558
+
559
+ if root_name in declarable_roots and path_str not in declared_parts:
560
+ errors.append(
561
+ f" {path_str}: File with <{root_name}> root not declared in [Content_Types].xml"
562
+ )
563
+
564
+ except Exception:
565
+ continue
566
+
567
+ for file_path in all_files:
568
+ if file_path.suffix.lower() in {".xml", ".rels"}:
569
+ continue
570
+ if file_path.name == "[Content_Types].xml":
571
+ continue
572
+ if "_rels" in file_path.parts or "docProps" in file_path.parts:
573
+ continue
574
+
575
+ extension = file_path.suffix.lstrip(".").lower()
576
+ if extension and extension not in declared_extensions:
577
+ if extension in media_extensions:
578
+ relative_path = file_path.relative_to(self.unpacked_dir)
579
+ errors.append(
580
+ f' {relative_path}: File with extension \'{extension}\' not declared in [Content_Types].xml - should add: <Default Extension="{extension}" ContentType="{media_extensions[extension]}"/>'
581
+ )
582
+
583
+ except Exception as e:
584
+ errors.append(f" Error parsing [Content_Types].xml: {e}")
585
+
586
+ if errors:
587
+ print(f"FAILED - Found {len(errors)} content type declaration errors:")
588
+ for error in errors:
589
+ print(error)
590
+ return False
591
+ else:
592
+ if self.verbose:
593
+ print(
594
+ "PASSED - All content files are properly declared in [Content_Types].xml"
595
+ )
596
+ return True
597
+
598
+ def validate_file_against_xsd(self, xml_file, verbose=False):
599
+ xml_file = Path(xml_file).resolve()
600
+ unpacked_dir = self.unpacked_dir.resolve()
601
+
602
+ is_valid, current_errors = self._validate_single_file_xsd(
603
+ xml_file, unpacked_dir
604
+ )
605
+
606
+ if is_valid is None:
607
+ return None, set()
608
+ elif is_valid:
609
+ return True, set()
610
+
611
+ original_errors = self._get_original_file_errors(xml_file)
612
+
613
+ assert current_errors is not None
614
+ new_errors = current_errors - original_errors
615
+
616
+ new_errors = {
617
+ e for e in new_errors
618
+ if not any(pattern in e for pattern in self.IGNORED_VALIDATION_ERRORS)
619
+ }
620
+
621
+ if new_errors:
622
+ if verbose:
623
+ relative_path = xml_file.relative_to(unpacked_dir)
624
+ print(f"FAILED - {relative_path}: {len(new_errors)} new error(s)")
625
+ for error in list(new_errors)[:3]:
626
+ truncated = error[:250] + "..." if len(error) > 250 else error
627
+ print(f" - {truncated}")
628
+ return False, new_errors
629
+ else:
630
+ if verbose:
631
+ print(
632
+ f"PASSED - No new errors (original had {len(current_errors)} errors)"
633
+ )
634
+ return True, set()
635
+
636
+ def validate_against_xsd(self):
637
+ new_errors = []
638
+ original_error_count = 0
639
+ valid_count = 0
640
+ skipped_count = 0
641
+
642
+ for xml_file in self.xml_files:
643
+ relative_path = str(xml_file.relative_to(self.unpacked_dir))
644
+ is_valid, new_file_errors = self.validate_file_against_xsd(
645
+ xml_file, verbose=False
646
+ )
647
+
648
+ if is_valid is None:
649
+ skipped_count += 1
650
+ continue
651
+ elif is_valid and not new_file_errors:
652
+ valid_count += 1
653
+ continue
654
+ elif is_valid:
655
+ original_error_count += 1
656
+ valid_count += 1
657
+ continue
658
+
659
+ new_errors.append(f" {relative_path}: {len(new_file_errors)} new error(s)")
660
+ for error in list(new_file_errors)[:3]:
661
+ new_errors.append(
662
+ f" - {error[:250]}..." if len(error) > 250 else f" - {error}"
663
+ )
664
+
665
+ if self.verbose:
666
+ print(f"Validated {len(self.xml_files)} files:")
667
+ print(f" - Valid: {valid_count}")
668
+ print(f" - Skipped (no schema): {skipped_count}")
669
+ if original_error_count:
670
+ print(f" - With original errors (ignored): {original_error_count}")
671
+ print(
672
+ f" - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith(' ')]) or 0}"
673
+ )
674
+
675
+ if new_errors:
676
+ print("\nFAILED - Found NEW validation errors:")
677
+ for error in new_errors:
678
+ print(error)
679
+ return False
680
+ else:
681
+ if self.verbose:
682
+ print("\nPASSED - No new XSD validation errors introduced")
683
+ return True
684
+
685
+ def _get_schema_path(self, xml_file):
686
+ if xml_file.name in self.SCHEMA_MAPPINGS:
687
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name]
688
+
689
+ if xml_file.suffix == ".rels":
690
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"]
691
+
692
+ if "charts/" in str(xml_file) and xml_file.name.startswith("chart"):
693
+ return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"]
694
+
695
+ if "theme/" in str(xml_file) and xml_file.name.startswith("theme"):
696
+ return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"]
697
+
698
+ if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS:
699
+ return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name]
700
+
701
+ return None
702
+
703
+ def _clean_ignorable_namespaces(self, xml_doc):
704
+ xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
705
+ xml_copy = lxml.etree.fromstring(xml_string)
706
+
707
+ for elem in xml_copy.iter():
708
+ attrs_to_remove = []
709
+
710
+ for attr in elem.attrib:
711
+ if "{" in attr:
712
+ ns = attr.split("}")[0][1:]
713
+ if ns not in self.OOXML_NAMESPACES:
714
+ attrs_to_remove.append(attr)
715
+
716
+ for attr in attrs_to_remove:
717
+ del elem.attrib[attr]
718
+
719
+ self._remove_ignorable_elements(xml_copy)
720
+
721
+ return lxml.etree.ElementTree(xml_copy)
722
+
723
+ def _remove_ignorable_elements(self, root):
724
+ elements_to_remove = []
725
+
726
+ for elem in list(root):
727
+ if not hasattr(elem, "tag") or callable(elem.tag):
728
+ continue
729
+
730
+ tag_str = str(elem.tag)
731
+ if tag_str.startswith("{"):
732
+ ns = tag_str.split("}")[0][1:]
733
+ if ns not in self.OOXML_NAMESPACES:
734
+ elements_to_remove.append(elem)
735
+ continue
736
+
737
+ self._remove_ignorable_elements(elem)
738
+
739
+ for elem in elements_to_remove:
740
+ root.remove(elem)
741
+
742
+ def _preprocess_for_mc_ignorable(self, xml_doc):
743
+ root = xml_doc.getroot()
744
+
745
+ if f"{{{self.MC_NAMESPACE}}}Ignorable" in root.attrib:
746
+ del root.attrib[f"{{{self.MC_NAMESPACE}}}Ignorable"]
747
+
748
+ return xml_doc
749
+
750
+ def _validate_single_file_xsd(self, xml_file, base_path):
751
+ schema_path = self._get_schema_path(xml_file)
752
+ if not schema_path:
753
+ return None, None
754
+
755
+ try:
756
+ with open(schema_path, "rb") as xsd_file:
757
+ parser = lxml.etree.XMLParser()
758
+ xsd_doc = lxml.etree.parse(
759
+ xsd_file, parser=parser, base_url=str(schema_path)
760
+ )
761
+ schema = lxml.etree.XMLSchema(xsd_doc)
762
+
763
+ with open(xml_file, "r") as f:
764
+ xml_doc = lxml.etree.parse(f)
765
+
766
+ xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc)
767
+ xml_doc = self._preprocess_for_mc_ignorable(xml_doc)
768
+
769
+ relative_path = xml_file.relative_to(base_path)
770
+ if (
771
+ relative_path.parts
772
+ and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS
773
+ ):
774
+ xml_doc = self._clean_ignorable_namespaces(xml_doc)
775
+
776
+ if schema.validate(xml_doc):
777
+ return True, set()
778
+ else:
779
+ errors = set()
780
+ for error in schema.error_log:
781
+ errors.add(error.message)
782
+ return False, errors
783
+
784
+ except Exception as e:
785
+ return False, {str(e)}
786
+
787
+ def _get_original_file_errors(self, xml_file):
788
+ if self.original_file is None:
789
+ return set()
790
+
791
+ import tempfile
792
+ import zipfile
793
+
794
+ xml_file = Path(xml_file).resolve()
795
+ unpacked_dir = self.unpacked_dir.resolve()
796
+ relative_path = xml_file.relative_to(unpacked_dir)
797
+
798
+ with tempfile.TemporaryDirectory() as temp_dir:
799
+ temp_path = Path(temp_dir)
800
+
801
+ with zipfile.ZipFile(self.original_file, "r") as zip_ref:
802
+ zip_ref.extractall(temp_path)
803
+
804
+ original_xml_file = temp_path / relative_path
805
+
806
+ if not original_xml_file.exists():
807
+ return set()
808
+
809
+ is_valid, errors = self._validate_single_file_xsd(
810
+ original_xml_file, temp_path
811
+ )
812
+ return errors if errors else set()
813
+
814
+ def _remove_template_tags_from_text_nodes(self, xml_doc):
815
+ warnings = []
816
+ template_pattern = re.compile(r"\{\{[^}]*\}\}")
817
+
818
+ xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
819
+ xml_copy = lxml.etree.fromstring(xml_string)
820
+
821
+ def process_text_content(text, content_type):
822
+ if not text:
823
+ return text
824
+ matches = list(template_pattern.finditer(text))
825
+ if matches:
826
+ for match in matches:
827
+ warnings.append(
828
+ f"Found template tag in {content_type}: {match.group()}"
829
+ )
830
+ return template_pattern.sub("", text)
831
+ return text
832
+
833
+ for elem in xml_copy.iter():
834
+ if not hasattr(elem, "tag") or callable(elem.tag):
835
+ continue
836
+ tag_str = str(elem.tag)
837
+ if tag_str.endswith("}t") or tag_str == "t":
838
+ continue
839
+
840
+ elem.text = process_text_content(elem.text, "text content")
841
+ elem.tail = process_text_content(elem.tail, "tail content")
842
+
843
+ return lxml.etree.ElementTree(xml_copy), warnings
844
+
845
+
846
+ if __name__ == "__main__":
847
+ raise RuntimeError("This module should not be run directly.")